diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,371033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5704042203127404, + "eval_steps": 500, + "global_step": 530000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 4.8498192836089445e-05, + "grad_norm": 3.401921510696411, + "learning_rate": 0.0002, + "loss": 10.4016, + "step": 10 + }, + { + "epoch": 9.699638567217889e-05, + "grad_norm": 13.897741317749023, + "learning_rate": 0.0002, + "loss": 5.9957, + "step": 20 + }, + { + "epoch": 0.00014549457850826833, + "grad_norm": 1.9996206760406494, + "learning_rate": 0.0002, + "loss": 2.3837, + "step": 30 + }, + { + "epoch": 0.00019399277134435778, + "grad_norm": 1.4307533502578735, + "learning_rate": 0.0002, + "loss": 1.0019, + "step": 40 + }, + { + "epoch": 0.00024249096418044722, + "grad_norm": 0.1947925090789795, + "learning_rate": 0.0002, + "loss": 0.3145, + "step": 50 + }, + { + "epoch": 0.00029098915701653667, + "grad_norm": 1.2832244634628296, + "learning_rate": 0.0002, + "loss": 0.4639, + "step": 60 + }, + { + "epoch": 0.0003394873498526261, + "grad_norm": 0.14918968081474304, + "learning_rate": 0.0002, + "loss": 0.0649, + "step": 70 + }, + { + "epoch": 0.00038798554268871556, + "grad_norm": 0.05554560199379921, + "learning_rate": 0.0002, + "loss": 0.0145, + "step": 80 + }, + { + "epoch": 0.000436483735524805, + "grad_norm": 0.02133125439286232, + "learning_rate": 0.0002, + "loss": 0.0074, + "step": 90 + }, + { + "epoch": 0.00048498192836089445, + "grad_norm": 0.011311554349958897, + "learning_rate": 0.0002, + "loss": 0.0039, + "step": 100 + }, + { + "epoch": 0.0005334801211969839, + "grad_norm": 0.006487010512501001, + "learning_rate": 0.0002, + "loss": 0.0026, + "step": 110 + }, + { + "epoch": 0.0005819783140330733, + "grad_norm": 0.22169442474842072, + "learning_rate": 0.0002, + "loss": 0.0022, + "step": 120 + }, + { + "epoch": 0.0006304765068691628, + "grad_norm": 0.004462387412786484, + "learning_rate": 0.0002, + "loss": 0.0025, + "step": 130 + }, + { + "epoch": 0.0006789746997052522, + "grad_norm": 0.006146426312625408, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 140 + }, + { + "epoch": 0.0007274728925413417, + "grad_norm": 0.0058703175745904446, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 150 + }, + { + "epoch": 0.0007759710853774311, + "grad_norm": 0.00636435579508543, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 160 + }, + { + "epoch": 0.0008244692782135206, + "grad_norm": 0.002858775435015559, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 170 + }, + { + "epoch": 0.00087296747104961, + "grad_norm": 0.0026936496142297983, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 180 + }, + { + "epoch": 0.0009214656638856995, + "grad_norm": 0.0034936692100018263, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 190 + }, + { + "epoch": 0.0009699638567217889, + "grad_norm": 0.0026432229205965996, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 200 + }, + { + "epoch": 0.0010184620495578783, + "grad_norm": 0.002656930824741721, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 210 + }, + { + "epoch": 0.0010669602423939678, + "grad_norm": 0.0018161912448704243, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 220 + }, + { + "epoch": 0.0011154584352300572, + "grad_norm": 0.0020376199390739202, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 230 + }, + { + "epoch": 0.0011639566280661467, + "grad_norm": 0.0025872995611280203, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 240 + }, + { + "epoch": 0.0012124548209022361, + "grad_norm": 0.0019854113925248384, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 250 + }, + { + "epoch": 0.0012609530137383256, + "grad_norm": 0.01487038191407919, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 260 + }, + { + "epoch": 0.001309451206574415, + "grad_norm": 0.16450995206832886, + "learning_rate": 0.0002, + "loss": 0.0016, + "step": 270 + }, + { + "epoch": 0.0013579493994105045, + "grad_norm": 0.001389127573929727, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 280 + }, + { + "epoch": 0.001406447592246594, + "grad_norm": 0.00198512920178473, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 290 + }, + { + "epoch": 0.0014549457850826833, + "grad_norm": 0.0016489489935338497, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 300 + }, + { + "epoch": 0.0015034439779187728, + "grad_norm": 0.006001756060868502, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 310 + }, + { + "epoch": 0.0015519421707548622, + "grad_norm": 0.0009794628713279963, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 320 + }, + { + "epoch": 0.0016004403635909517, + "grad_norm": 0.00257917121052742, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 330 + }, + { + "epoch": 0.0016489385564270411, + "grad_norm": 0.0017814363818615675, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 340 + }, + { + "epoch": 0.0016974367492631306, + "grad_norm": 0.0012392844073474407, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 350 + }, + { + "epoch": 0.00174593494209922, + "grad_norm": 0.0012720872182399035, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 360 + }, + { + "epoch": 0.0017944331349353095, + "grad_norm": 0.0009001717553474009, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 370 + }, + { + "epoch": 0.001842931327771399, + "grad_norm": 0.0007893209112808108, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 380 + }, + { + "epoch": 0.0018914295206074884, + "grad_norm": 0.001039596158079803, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 390 + }, + { + "epoch": 0.0019399277134435778, + "grad_norm": 0.0010425036307424307, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 400 + }, + { + "epoch": 0.0019884259062796675, + "grad_norm": 0.0007197722443379462, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 410 + }, + { + "epoch": 0.0020369240991157567, + "grad_norm": 0.00068820541491732, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 420 + }, + { + "epoch": 0.0020854222919518463, + "grad_norm": 0.0006834401283413172, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 430 + }, + { + "epoch": 0.0021339204847879356, + "grad_norm": 0.0007635322399437428, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 440 + }, + { + "epoch": 0.0021824186776240252, + "grad_norm": 0.0007121615344658494, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 450 + }, + { + "epoch": 0.0022309168704601145, + "grad_norm": 0.000578841776587069, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 460 + }, + { + "epoch": 0.002279415063296204, + "grad_norm": 0.0005565525498241186, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 470 + }, + { + "epoch": 0.0023279132561322934, + "grad_norm": 0.0007787013310007751, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 480 + }, + { + "epoch": 0.002376411448968383, + "grad_norm": 0.0007087746053002775, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 490 + }, + { + "epoch": 0.0024249096418044722, + "grad_norm": 0.000640068668872118, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 500 + }, + { + "epoch": 0.002473407834640562, + "grad_norm": 0.013301050290465355, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 510 + }, + { + "epoch": 0.002521906027476651, + "grad_norm": 0.0007127533899620175, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 520 + }, + { + "epoch": 0.002570404220312741, + "grad_norm": 0.0007592527545057237, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 530 + }, + { + "epoch": 0.00261890241314883, + "grad_norm": 0.0007010172121226788, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 540 + }, + { + "epoch": 0.0026674006059849197, + "grad_norm": 0.0007431154372170568, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 550 + }, + { + "epoch": 0.002715898798821009, + "grad_norm": 0.0004927972913719714, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 560 + }, + { + "epoch": 0.0027643969916570986, + "grad_norm": 0.0005598691641353071, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 570 + }, + { + "epoch": 0.002812895184493188, + "grad_norm": 0.0005180771113373339, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 580 + }, + { + "epoch": 0.0028613933773292775, + "grad_norm": 0.0007024877122603357, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 590 + }, + { + "epoch": 0.0029098915701653667, + "grad_norm": 0.0008381381630897522, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 600 + }, + { + "epoch": 0.0029583897630014564, + "grad_norm": 0.0007985506090335548, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 610 + }, + { + "epoch": 0.0030068879558375456, + "grad_norm": 0.0007280935533344746, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 620 + }, + { + "epoch": 0.0030553861486736352, + "grad_norm": 0.013212242163717747, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 630 + }, + { + "epoch": 0.0031038843415097245, + "grad_norm": 0.0006937393918633461, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 640 + }, + { + "epoch": 0.003152382534345814, + "grad_norm": 0.000510547251906246, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 650 + }, + { + "epoch": 0.0032008807271819034, + "grad_norm": 0.0005374035681597888, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 660 + }, + { + "epoch": 0.003249378920017993, + "grad_norm": 0.0008110536728054285, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 670 + }, + { + "epoch": 0.0032978771128540823, + "grad_norm": 0.000788794772233814, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 680 + }, + { + "epoch": 0.003346375305690172, + "grad_norm": 0.000516055035404861, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 690 + }, + { + "epoch": 0.003394873498526261, + "grad_norm": 0.00039570246008224785, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 700 + }, + { + "epoch": 0.003443371691362351, + "grad_norm": 0.0004178276867605746, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 710 + }, + { + "epoch": 0.00349186988419844, + "grad_norm": 0.0005905093275941908, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 720 + }, + { + "epoch": 0.0035403680770345297, + "grad_norm": 0.00031047806260176003, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 730 + }, + { + "epoch": 0.003588866269870619, + "grad_norm": 0.0003917264111805707, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 740 + }, + { + "epoch": 0.0036373644627067086, + "grad_norm": 0.00040266895666718483, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 750 + }, + { + "epoch": 0.003685862655542798, + "grad_norm": 0.001953916624188423, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 760 + }, + { + "epoch": 0.0037343608483788875, + "grad_norm": 0.00027910369681194425, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 770 + }, + { + "epoch": 0.0037828590412149767, + "grad_norm": 0.00028836799901910126, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 780 + }, + { + "epoch": 0.0038313572340510664, + "grad_norm": 0.0004685298481490463, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 790 + }, + { + "epoch": 0.0038798554268871556, + "grad_norm": 0.00034783914452418685, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 800 + }, + { + "epoch": 0.003928353619723245, + "grad_norm": 0.00026378428447060287, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 810 + }, + { + "epoch": 0.003976851812559335, + "grad_norm": 0.00024760147789493203, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 820 + }, + { + "epoch": 0.004025350005395424, + "grad_norm": 0.0002506239979993552, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 830 + }, + { + "epoch": 0.004073848198231513, + "grad_norm": 0.00028521669446490705, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 840 + }, + { + "epoch": 0.004122346391067603, + "grad_norm": 0.00028912577545270324, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 850 + }, + { + "epoch": 0.004170844583903693, + "grad_norm": 0.00040300696855410933, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 860 + }, + { + "epoch": 0.004219342776739782, + "grad_norm": 0.0004937839112244546, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 870 + }, + { + "epoch": 0.004267840969575871, + "grad_norm": 0.0003239746729377657, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 880 + }, + { + "epoch": 0.00431633916241196, + "grad_norm": 0.0005633537657558918, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 890 + }, + { + "epoch": 0.0043648373552480505, + "grad_norm": 0.0003012517699971795, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 900 + }, + { + "epoch": 0.00441333554808414, + "grad_norm": 0.00041446267277933657, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 910 + }, + { + "epoch": 0.004461833740920229, + "grad_norm": 0.03768712282180786, + "learning_rate": 0.0002, + "loss": 0.0027, + "step": 920 + }, + { + "epoch": 0.004510331933756318, + "grad_norm": 0.00550413690507412, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 930 + }, + { + "epoch": 0.004558830126592408, + "grad_norm": 0.0008840215741656721, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 940 + }, + { + "epoch": 0.0046073283194284975, + "grad_norm": 0.0011060723336413503, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 950 + }, + { + "epoch": 0.004655826512264587, + "grad_norm": 0.0026776755694299936, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 960 + }, + { + "epoch": 0.004704324705100676, + "grad_norm": 0.04273254796862602, + "learning_rate": 0.0002, + "loss": 0.0058, + "step": 970 + }, + { + "epoch": 0.004752822897936766, + "grad_norm": 0.0015692897140979767, + "learning_rate": 0.0002, + "loss": 0.0028, + "step": 980 + }, + { + "epoch": 0.004801321090772855, + "grad_norm": 0.01573827862739563, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 990 + }, + { + "epoch": 0.0048498192836089445, + "grad_norm": 0.014643815346062183, + "learning_rate": 0.0002, + "loss": 0.0022, + "step": 1000 + }, + { + "epoch": 0.004898317476445034, + "grad_norm": 0.0009391502244397998, + "learning_rate": 0.0002, + "loss": 0.0062, + "step": 1010 + }, + { + "epoch": 0.004946815669281124, + "grad_norm": 0.001330905593931675, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1020 + }, + { + "epoch": 0.004995313862117213, + "grad_norm": 0.0009897787822410464, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 1030 + }, + { + "epoch": 0.005043812054953302, + "grad_norm": 0.0006987621309235692, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1040 + }, + { + "epoch": 0.0050923102477893915, + "grad_norm": 0.0005251926486380398, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1050 + }, + { + "epoch": 0.005140808440625482, + "grad_norm": 0.0005181785672903061, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1060 + }, + { + "epoch": 0.005189306633461571, + "grad_norm": 0.0005012532928958535, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1070 + }, + { + "epoch": 0.00523780482629766, + "grad_norm": 0.00036683777580037713, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1080 + }, + { + "epoch": 0.005286303019133749, + "grad_norm": 0.00036020923289470375, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1090 + }, + { + "epoch": 0.005334801211969839, + "grad_norm": 0.09687065333127975, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1100 + }, + { + "epoch": 0.005383299404805929, + "grad_norm": 0.0009012875379994512, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1110 + }, + { + "epoch": 0.005431797597642018, + "grad_norm": 0.0002848742878995836, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 1120 + }, + { + "epoch": 0.005480295790478107, + "grad_norm": 0.0004620971158146858, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1130 + }, + { + "epoch": 0.005528793983314197, + "grad_norm": 0.0012130578979849815, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 1140 + }, + { + "epoch": 0.005577292176150286, + "grad_norm": 0.0006764543359167874, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1150 + }, + { + "epoch": 0.005625790368986376, + "grad_norm": 0.0005768231931142509, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 1160 + }, + { + "epoch": 0.005674288561822465, + "grad_norm": 0.0005872192559763789, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1170 + }, + { + "epoch": 0.005722786754658555, + "grad_norm": 0.0004242525901645422, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1180 + }, + { + "epoch": 0.005771284947494644, + "grad_norm": 0.000369954330381006, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1190 + }, + { + "epoch": 0.005819783140330733, + "grad_norm": 0.0003509187663439661, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1200 + }, + { + "epoch": 0.005868281333166823, + "grad_norm": 0.00027737728669308126, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1210 + }, + { + "epoch": 0.005916779526002913, + "grad_norm": 0.00025289514451287687, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1220 + }, + { + "epoch": 0.005965277718839002, + "grad_norm": 0.0003254996845498681, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1230 + }, + { + "epoch": 0.006013775911675091, + "grad_norm": 0.0002572360390331596, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1240 + }, + { + "epoch": 0.00606227410451118, + "grad_norm": 0.0017714647110551596, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1250 + }, + { + "epoch": 0.0061107722973472705, + "grad_norm": 0.00018860500131268054, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1260 + }, + { + "epoch": 0.00615927049018336, + "grad_norm": 0.0002288929099449888, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 1270 + }, + { + "epoch": 0.006207768683019449, + "grad_norm": 0.0003170353302266449, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1280 + }, + { + "epoch": 0.006256266875855538, + "grad_norm": 0.00021989369997754693, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1290 + }, + { + "epoch": 0.006304765068691628, + "grad_norm": 0.00021534231200348586, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1300 + }, + { + "epoch": 0.0063532632615277175, + "grad_norm": 0.00018051544611807913, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1310 + }, + { + "epoch": 0.006401761454363807, + "grad_norm": 0.0007750970544293523, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 1320 + }, + { + "epoch": 0.006450259647199896, + "grad_norm": 0.001144366804510355, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1330 + }, + { + "epoch": 0.006498757840035986, + "grad_norm": 0.0016096309991553426, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1340 + }, + { + "epoch": 0.006547256032872075, + "grad_norm": 0.002395368181169033, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1350 + }, + { + "epoch": 0.0065957542257081645, + "grad_norm": 0.0005462820990942419, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1360 + }, + { + "epoch": 0.006644252418544254, + "grad_norm": 0.00045515570673160255, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1370 + }, + { + "epoch": 0.006692750611380344, + "grad_norm": 0.0003860605356749147, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1380 + }, + { + "epoch": 0.006741248804216433, + "grad_norm": 0.0005727302632294595, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1390 + }, + { + "epoch": 0.006789746997052522, + "grad_norm": 0.0008291719714179635, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1400 + }, + { + "epoch": 0.0068382451898886115, + "grad_norm": 0.00048769699060358107, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1410 + }, + { + "epoch": 0.006886743382724702, + "grad_norm": 0.0003173357981722802, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1420 + }, + { + "epoch": 0.006935241575560791, + "grad_norm": 0.00020130285702180117, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1430 + }, + { + "epoch": 0.00698373976839688, + "grad_norm": 0.00022847567743156105, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1440 + }, + { + "epoch": 0.007032237961232969, + "grad_norm": 0.00020484585547819734, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1450 + }, + { + "epoch": 0.007080736154069059, + "grad_norm": 0.0038814437575638294, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1460 + }, + { + "epoch": 0.007129234346905149, + "grad_norm": 0.00018287332204636186, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1470 + }, + { + "epoch": 0.007177732539741238, + "grad_norm": 0.00014427177666220814, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1480 + }, + { + "epoch": 0.007226230732577327, + "grad_norm": 0.00015218337648548186, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1490 + }, + { + "epoch": 0.007274728925413417, + "grad_norm": 0.00013113676686771214, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1500 + }, + { + "epoch": 0.007323227118249506, + "grad_norm": 0.00013037346070632339, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1510 + }, + { + "epoch": 0.007371725311085596, + "grad_norm": 0.00018233424634672701, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1520 + }, + { + "epoch": 0.007420223503921685, + "grad_norm": 0.0010977855417877436, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1530 + }, + { + "epoch": 0.007468721696757775, + "grad_norm": 0.00027828459860756993, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1540 + }, + { + "epoch": 0.007517219889593864, + "grad_norm": 0.00022721156710758805, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1550 + }, + { + "epoch": 0.007565718082429953, + "grad_norm": 0.00016667507588863373, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1560 + }, + { + "epoch": 0.007614216275266043, + "grad_norm": 0.00037867180071771145, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1570 + }, + { + "epoch": 0.007662714468102133, + "grad_norm": 0.0003820607962552458, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1580 + }, + { + "epoch": 0.007711212660938222, + "grad_norm": 0.0005051225307397544, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 1590 + }, + { + "epoch": 0.007759710853774311, + "grad_norm": 0.0007472014985978603, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1600 + }, + { + "epoch": 0.0078082090466104, + "grad_norm": 0.00038025015965104103, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1610 + }, + { + "epoch": 0.00785670723944649, + "grad_norm": 0.00025320579879917204, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1620 + }, + { + "epoch": 0.00790520543228258, + "grad_norm": 0.00016546444385312498, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1630 + }, + { + "epoch": 0.00795370362511867, + "grad_norm": 0.00021247833501547575, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1640 + }, + { + "epoch": 0.008002201817954758, + "grad_norm": 0.0002372404414927587, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 1650 + }, + { + "epoch": 0.008050700010790848, + "grad_norm": 0.00042528100311756134, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1660 + }, + { + "epoch": 0.008099198203626937, + "grad_norm": 0.004243955016136169, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 1670 + }, + { + "epoch": 0.008147696396463027, + "grad_norm": 0.021519022062420845, + "learning_rate": 0.0002, + "loss": 0.0016, + "step": 1680 + }, + { + "epoch": 0.008196194589299117, + "grad_norm": 0.000375902745872736, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 1690 + }, + { + "epoch": 0.008244692782135205, + "grad_norm": 0.02532416582107544, + "learning_rate": 0.0002, + "loss": 0.0033, + "step": 1700 + }, + { + "epoch": 0.008293190974971295, + "grad_norm": 0.0005332357832230628, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1710 + }, + { + "epoch": 0.008341689167807385, + "grad_norm": 0.0004275449609849602, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1720 + }, + { + "epoch": 0.008390187360643474, + "grad_norm": 0.0004501264775171876, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1730 + }, + { + "epoch": 0.008438685553479564, + "grad_norm": 0.00023955205688253045, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1740 + }, + { + "epoch": 0.008487183746315652, + "grad_norm": 0.00022734090453013778, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1750 + }, + { + "epoch": 0.008535681939151742, + "grad_norm": 0.03831092640757561, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1760 + }, + { + "epoch": 0.008584180131987832, + "grad_norm": 0.0004711935471277684, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 1770 + }, + { + "epoch": 0.00863267832482392, + "grad_norm": 0.001324107637628913, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1780 + }, + { + "epoch": 0.00868117651766001, + "grad_norm": 0.0007930688443593681, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1790 + }, + { + "epoch": 0.008729674710496101, + "grad_norm": 0.00031223424593918025, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1800 + }, + { + "epoch": 0.00877817290333219, + "grad_norm": 0.00016809871885925531, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1810 + }, + { + "epoch": 0.00882667109616828, + "grad_norm": 0.00013141403906047344, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1820 + }, + { + "epoch": 0.008875169289004368, + "grad_norm": 0.00013855239376425743, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1830 + }, + { + "epoch": 0.008923667481840458, + "grad_norm": 0.03184623271226883, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 1840 + }, + { + "epoch": 0.008972165674676548, + "grad_norm": 0.00018033194646704942, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1850 + }, + { + "epoch": 0.009020663867512636, + "grad_norm": 0.0001644649455556646, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1860 + }, + { + "epoch": 0.009069162060348726, + "grad_norm": 0.00014652893878519535, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1870 + }, + { + "epoch": 0.009117660253184817, + "grad_norm": 0.00038745460915379226, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1880 + }, + { + "epoch": 0.009166158446020905, + "grad_norm": 0.002037727041170001, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1890 + }, + { + "epoch": 0.009214656638856995, + "grad_norm": 0.0011448945151641965, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 1900 + }, + { + "epoch": 0.009263154831693083, + "grad_norm": 0.0005339317722246051, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1910 + }, + { + "epoch": 0.009311653024529173, + "grad_norm": 0.00031241695978678763, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1920 + }, + { + "epoch": 0.009360151217365264, + "grad_norm": 0.0004136281495448202, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1930 + }, + { + "epoch": 0.009408649410201352, + "grad_norm": 0.0002452209300827235, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 1940 + }, + { + "epoch": 0.009457147603037442, + "grad_norm": 0.0002588129136711359, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1950 + }, + { + "epoch": 0.009505645795873532, + "grad_norm": 0.00028753941296599805, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1960 + }, + { + "epoch": 0.00955414398870962, + "grad_norm": 0.0002383045939495787, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1970 + }, + { + "epoch": 0.00960264218154571, + "grad_norm": 0.00022786481713410467, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 1980 + }, + { + "epoch": 0.009651140374381799, + "grad_norm": 0.00017714434943627566, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 1990 + }, + { + "epoch": 0.009699638567217889, + "grad_norm": 0.00012434124073479325, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2000 + }, + { + "epoch": 0.009748136760053979, + "grad_norm": 0.00012143594358349219, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2010 + }, + { + "epoch": 0.009796634952890067, + "grad_norm": 0.0001250120549229905, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2020 + }, + { + "epoch": 0.009845133145726158, + "grad_norm": 0.00010851406841538846, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2030 + }, + { + "epoch": 0.009893631338562248, + "grad_norm": 8.474250353174284e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 2040 + }, + { + "epoch": 0.009942129531398336, + "grad_norm": 0.00013999867951497436, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2050 + }, + { + "epoch": 0.009990627724234426, + "grad_norm": 0.0030420986004173756, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2060 + }, + { + "epoch": 0.010039125917070514, + "grad_norm": 0.00012416052049957216, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 2070 + }, + { + "epoch": 0.010087624109906605, + "grad_norm": 0.0002773732994683087, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2080 + }, + { + "epoch": 0.010136122302742695, + "grad_norm": 0.0002473875065334141, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2090 + }, + { + "epoch": 0.010184620495578783, + "grad_norm": 0.00019013883138541132, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2100 + }, + { + "epoch": 0.010233118688414873, + "grad_norm": 0.00011932319466723129, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2110 + }, + { + "epoch": 0.010281616881250963, + "grad_norm": 9.397786925546825e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2120 + }, + { + "epoch": 0.010330115074087052, + "grad_norm": 9.93499270407483e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2130 + }, + { + "epoch": 0.010378613266923142, + "grad_norm": 0.0005599190481007099, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2140 + }, + { + "epoch": 0.01042711145975923, + "grad_norm": 8.05821327958256e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2150 + }, + { + "epoch": 0.01047560965259532, + "grad_norm": 7.690718484809622e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2160 + }, + { + "epoch": 0.01052410784543141, + "grad_norm": 0.0001311394589720294, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2170 + }, + { + "epoch": 0.010572606038267499, + "grad_norm": 7.0554917328991e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2180 + }, + { + "epoch": 0.010621104231103589, + "grad_norm": 6.566940282937139e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2190 + }, + { + "epoch": 0.010669602423939679, + "grad_norm": 7.170523167587817e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2200 + }, + { + "epoch": 0.010718100616775767, + "grad_norm": 7.413407729472965e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2210 + }, + { + "epoch": 0.010766598809611857, + "grad_norm": 6.512560503324494e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2220 + }, + { + "epoch": 0.010815097002447946, + "grad_norm": 6.419439159799367e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2230 + }, + { + "epoch": 0.010863595195284036, + "grad_norm": 5.435882121673785e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2240 + }, + { + "epoch": 0.010912093388120126, + "grad_norm": 5.7789988204604015e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2250 + }, + { + "epoch": 0.010960591580956214, + "grad_norm": 0.00012735588825307786, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2260 + }, + { + "epoch": 0.011009089773792304, + "grad_norm": 7.192805787781253e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2270 + }, + { + "epoch": 0.011057587966628394, + "grad_norm": 6.439461139962077e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2280 + }, + { + "epoch": 0.011106086159464483, + "grad_norm": 6.443232268793508e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2290 + }, + { + "epoch": 0.011154584352300573, + "grad_norm": 5.329927444108762e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2300 + }, + { + "epoch": 0.011203082545136661, + "grad_norm": 5.3703432058682665e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2310 + }, + { + "epoch": 0.011251580737972751, + "grad_norm": 9.636081085773185e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2320 + }, + { + "epoch": 0.011300078930808841, + "grad_norm": 5.2529750973917544e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2330 + }, + { + "epoch": 0.01134857712364493, + "grad_norm": 5.52529381820932e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2340 + }, + { + "epoch": 0.01139707531648102, + "grad_norm": 5.330303974915296e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2350 + }, + { + "epoch": 0.01144557350931711, + "grad_norm": 6.210993160493672e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 2360 + }, + { + "epoch": 0.011494071702153198, + "grad_norm": 5.533001967705786e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2370 + }, + { + "epoch": 0.011542569894989288, + "grad_norm": 6.0176880651852116e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2380 + }, + { + "epoch": 0.011591068087825377, + "grad_norm": 4.627074667951092e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2390 + }, + { + "epoch": 0.011639566280661467, + "grad_norm": 4.806919241673313e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2400 + }, + { + "epoch": 0.011688064473497557, + "grad_norm": 5.363941818359308e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2410 + }, + { + "epoch": 0.011736562666333645, + "grad_norm": 5.238647645455785e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2420 + }, + { + "epoch": 0.011785060859169735, + "grad_norm": 5.766215326730162e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2430 + }, + { + "epoch": 0.011833559052005825, + "grad_norm": 4.784628617926501e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2440 + }, + { + "epoch": 0.011882057244841914, + "grad_norm": 4.507412450038828e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2450 + }, + { + "epoch": 0.011930555437678004, + "grad_norm": 5.009095912100747e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2460 + }, + { + "epoch": 0.011979053630514092, + "grad_norm": 9.547016088617966e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2470 + }, + { + "epoch": 0.012027551823350182, + "grad_norm": 4.956463089911267e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2480 + }, + { + "epoch": 0.012076050016186272, + "grad_norm": 3.8098092772997916e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 2490 + }, + { + "epoch": 0.01212454820902236, + "grad_norm": 5.3010197007097304e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2500 + }, + { + "epoch": 0.012173046401858451, + "grad_norm": 6.385769665939733e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2510 + }, + { + "epoch": 0.012221544594694541, + "grad_norm": 6.0451315221143886e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2520 + }, + { + "epoch": 0.01227004278753063, + "grad_norm": 6.026815754012205e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2530 + }, + { + "epoch": 0.01231854098036672, + "grad_norm": 4.749012441607192e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2540 + }, + { + "epoch": 0.012367039173202808, + "grad_norm": 4.3753931095125154e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2550 + }, + { + "epoch": 0.012415537366038898, + "grad_norm": 4.421301491674967e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2560 + }, + { + "epoch": 0.012464035558874988, + "grad_norm": 4.9000231229001656e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2570 + }, + { + "epoch": 0.012512533751711076, + "grad_norm": 4.536732376436703e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2580 + }, + { + "epoch": 0.012561031944547166, + "grad_norm": 3.858628042507917e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2590 + }, + { + "epoch": 0.012609530137383257, + "grad_norm": 3.9656504668528214e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2600 + }, + { + "epoch": 0.012658028330219345, + "grad_norm": 4.04647144023329e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2610 + }, + { + "epoch": 0.012706526523055435, + "grad_norm": 3.872746674460359e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2620 + }, + { + "epoch": 0.012755024715891523, + "grad_norm": 3.813809234998189e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2630 + }, + { + "epoch": 0.012803522908727613, + "grad_norm": 3.569950058590621e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2640 + }, + { + "epoch": 0.012852021101563704, + "grad_norm": 3.4690314350882545e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2650 + }, + { + "epoch": 0.012900519294399792, + "grad_norm": 3.553960777935572e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2660 + }, + { + "epoch": 0.012949017487235882, + "grad_norm": 4.0256083593703806e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2670 + }, + { + "epoch": 0.012997515680071972, + "grad_norm": 3.963953349739313e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2680 + }, + { + "epoch": 0.01304601387290806, + "grad_norm": 3.474903496680781e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2690 + }, + { + "epoch": 0.01309451206574415, + "grad_norm": 4.418441676534712e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2700 + }, + { + "epoch": 0.013143010258580239, + "grad_norm": 4.4784770580008626e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2710 + }, + { + "epoch": 0.013191508451416329, + "grad_norm": 3.307613587821834e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2720 + }, + { + "epoch": 0.013240006644252419, + "grad_norm": 3.112412741756998e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2730 + }, + { + "epoch": 0.013288504837088507, + "grad_norm": 3.0193372367648408e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2740 + }, + { + "epoch": 0.013337003029924598, + "grad_norm": 3.3176034776261076e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 2750 + }, + { + "epoch": 0.013385501222760688, + "grad_norm": 3.879675205098465e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2760 + }, + { + "epoch": 0.013433999415596776, + "grad_norm": 3.438025669311173e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2770 + }, + { + "epoch": 0.013482497608432866, + "grad_norm": 0.00030628612148575485, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2780 + }, + { + "epoch": 0.013530995801268954, + "grad_norm": 3.189587368979119e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2790 + }, + { + "epoch": 0.013579493994105045, + "grad_norm": 3.3002674172166735e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2800 + }, + { + "epoch": 0.013627992186941135, + "grad_norm": 3.2518863008590415e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2810 + }, + { + "epoch": 0.013676490379777223, + "grad_norm": 3.289935193606652e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2820 + }, + { + "epoch": 0.013724988572613313, + "grad_norm": 3.195609315298498e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2830 + }, + { + "epoch": 0.013773486765449403, + "grad_norm": 3.363032374181785e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2840 + }, + { + "epoch": 0.013821984958285492, + "grad_norm": 3.2508956792298704e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2850 + }, + { + "epoch": 0.013870483151121582, + "grad_norm": 2.807633427437395e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2860 + }, + { + "epoch": 0.01391898134395767, + "grad_norm": 3.4021421015495434e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2870 + }, + { + "epoch": 0.01396747953679376, + "grad_norm": 3.490302333375439e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2880 + }, + { + "epoch": 0.01401597772962985, + "grad_norm": 2.934489748440683e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2890 + }, + { + "epoch": 0.014064475922465939, + "grad_norm": 3.0453318686340936e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2900 + }, + { + "epoch": 0.014112974115302029, + "grad_norm": 2.6859714125748724e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2910 + }, + { + "epoch": 0.014161472308138119, + "grad_norm": 2.7808655431726947e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2920 + }, + { + "epoch": 0.014209970500974207, + "grad_norm": 2.972401853185147e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2930 + }, + { + "epoch": 0.014258468693810297, + "grad_norm": 3.193219527020119e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2940 + }, + { + "epoch": 0.014306966886646386, + "grad_norm": 2.5547258701408282e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2950 + }, + { + "epoch": 0.014355465079482476, + "grad_norm": 2.809025681926869e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2960 + }, + { + "epoch": 0.014403963272318566, + "grad_norm": 6.532006955239922e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2970 + }, + { + "epoch": 0.014452461465154654, + "grad_norm": 2.6718509616330266e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2980 + }, + { + "epoch": 0.014500959657990744, + "grad_norm": 2.597825186967384e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 2990 + }, + { + "epoch": 0.014549457850826834, + "grad_norm": 2.544716880947817e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3000 + }, + { + "epoch": 0.014597956043662923, + "grad_norm": 8.716906449990347e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3010 + }, + { + "epoch": 0.014646454236499013, + "grad_norm": 3.658728383015841e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3020 + }, + { + "epoch": 0.014694952429335101, + "grad_norm": 2.9040293156867847e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3030 + }, + { + "epoch": 0.014743450622171191, + "grad_norm": 2.7226524252910167e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3040 + }, + { + "epoch": 0.014791948815007281, + "grad_norm": 2.5078217731788754e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3050 + }, + { + "epoch": 0.01484044700784337, + "grad_norm": 2.5239569367840886e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3060 + }, + { + "epoch": 0.01488894520067946, + "grad_norm": 2.591350130387582e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3070 + }, + { + "epoch": 0.01493744339351555, + "grad_norm": 2.3746091756038368e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3080 + }, + { + "epoch": 0.014985941586351638, + "grad_norm": 2.6277071810909547e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3090 + }, + { + "epoch": 0.015034439779187728, + "grad_norm": 2.5831610400928184e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3100 + }, + { + "epoch": 0.015082937972023817, + "grad_norm": 2.1711408408009447e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3110 + }, + { + "epoch": 0.015131436164859907, + "grad_norm": 4.807241202797741e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3120 + }, + { + "epoch": 0.015179934357695997, + "grad_norm": 2.1975380150252022e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3130 + }, + { + "epoch": 0.015228432550532085, + "grad_norm": 2.4351558749913238e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3140 + }, + { + "epoch": 0.015276930743368175, + "grad_norm": 2.085001142404508e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3150 + }, + { + "epoch": 0.015325428936204265, + "grad_norm": 2.5563431336195208e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3160 + }, + { + "epoch": 0.015373927129040354, + "grad_norm": 2.669816603884101e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3170 + }, + { + "epoch": 0.015422425321876444, + "grad_norm": 2.8344389647827484e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3180 + }, + { + "epoch": 0.015470923514712532, + "grad_norm": 3.239440775359981e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3190 + }, + { + "epoch": 0.015519421707548622, + "grad_norm": 3.216868208255619e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3200 + }, + { + "epoch": 0.015567919900384712, + "grad_norm": 2.523766306694597e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3210 + }, + { + "epoch": 0.0156164180932208, + "grad_norm": 2.5410614398424514e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3220 + }, + { + "epoch": 0.01566491628605689, + "grad_norm": 6.142240454209968e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3230 + }, + { + "epoch": 0.01571341447889298, + "grad_norm": 2.9455424737534486e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3240 + }, + { + "epoch": 0.01576191267172907, + "grad_norm": 3.059877417399548e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3250 + }, + { + "epoch": 0.01581041086456516, + "grad_norm": 2.692589259822853e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3260 + }, + { + "epoch": 0.015858909057401248, + "grad_norm": 2.2913105567567982e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3270 + }, + { + "epoch": 0.01590740725023734, + "grad_norm": 1.8952971004182473e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3280 + }, + { + "epoch": 0.015955905443073428, + "grad_norm": 2.6442254238645546e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3290 + }, + { + "epoch": 0.016004403635909516, + "grad_norm": 2.1637737518176436e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3300 + }, + { + "epoch": 0.016052901828745605, + "grad_norm": 2.683934144442901e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3310 + }, + { + "epoch": 0.016101400021581697, + "grad_norm": 2.808283898048103e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3320 + }, + { + "epoch": 0.016149898214417785, + "grad_norm": 1.8540775272413157e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3330 + }, + { + "epoch": 0.016198396407253873, + "grad_norm": 1.922142291732598e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3340 + }, + { + "epoch": 0.016246894600089965, + "grad_norm": 1.969012555491645e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3350 + }, + { + "epoch": 0.016295392792926053, + "grad_norm": 1.9848514057230204e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3360 + }, + { + "epoch": 0.016343890985762142, + "grad_norm": 1.8932407328975387e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3370 + }, + { + "epoch": 0.016392389178598234, + "grad_norm": 2.1402060156106018e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3380 + }, + { + "epoch": 0.016440887371434322, + "grad_norm": 2.2788977730670013e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3390 + }, + { + "epoch": 0.01648938556427041, + "grad_norm": 1.8156835722038522e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3400 + }, + { + "epoch": 0.016537883757106502, + "grad_norm": 1.8024480596068315e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3410 + }, + { + "epoch": 0.01658638194994259, + "grad_norm": 3.1111074349610135e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3420 + }, + { + "epoch": 0.01663488014277868, + "grad_norm": 1.6724710803828202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3430 + }, + { + "epoch": 0.01668337833561477, + "grad_norm": 1.9928987967432477e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3440 + }, + { + "epoch": 0.01673187652845086, + "grad_norm": 3.9633272535866126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3450 + }, + { + "epoch": 0.016780374721286948, + "grad_norm": 2.359643804084044e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3460 + }, + { + "epoch": 0.016828872914123036, + "grad_norm": 2.2968695702729747e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3470 + }, + { + "epoch": 0.016877371106959128, + "grad_norm": 2.3718172087683342e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3480 + }, + { + "epoch": 0.016925869299795216, + "grad_norm": 2.1252251826808788e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3490 + }, + { + "epoch": 0.016974367492631304, + "grad_norm": 2.0427803974598646e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3500 + }, + { + "epoch": 0.017022865685467396, + "grad_norm": 2.0412424419191666e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3510 + }, + { + "epoch": 0.017071363878303485, + "grad_norm": 2.7123120162286796e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3520 + }, + { + "epoch": 0.017119862071139573, + "grad_norm": 4.439138137968257e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3530 + }, + { + "epoch": 0.017168360263975665, + "grad_norm": 2.0359104382805526e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3540 + }, + { + "epoch": 0.017216858456811753, + "grad_norm": 2.348544330743607e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3550 + }, + { + "epoch": 0.01726535664964784, + "grad_norm": 2.047535963356495e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3560 + }, + { + "epoch": 0.017313854842483933, + "grad_norm": 2.5934654331649654e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3570 + }, + { + "epoch": 0.01736235303532002, + "grad_norm": 1.9089007764705457e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3580 + }, + { + "epoch": 0.01741085122815611, + "grad_norm": 2.4476456019328907e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3590 + }, + { + "epoch": 0.017459349420992202, + "grad_norm": 1.862550743680913e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3600 + }, + { + "epoch": 0.01750784761382829, + "grad_norm": 2.2003345293342136e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3610 + }, + { + "epoch": 0.01755634580666438, + "grad_norm": 2.10319285542937e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3620 + }, + { + "epoch": 0.017604843999500467, + "grad_norm": 2.5724679289851338e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3630 + }, + { + "epoch": 0.01765334219233656, + "grad_norm": 1.8561622709967196e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3640 + }, + { + "epoch": 0.017701840385172647, + "grad_norm": 1.929724749061279e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3650 + }, + { + "epoch": 0.017750338578008736, + "grad_norm": 1.599496499693487e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3660 + }, + { + "epoch": 0.017798836770844827, + "grad_norm": 2.225570278824307e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3670 + }, + { + "epoch": 0.017847334963680916, + "grad_norm": 1.609372702660039e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3680 + }, + { + "epoch": 0.017895833156517004, + "grad_norm": 2.1418916730908677e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3690 + }, + { + "epoch": 0.017944331349353096, + "grad_norm": 2.1873465811950155e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3700 + }, + { + "epoch": 0.017992829542189184, + "grad_norm": 1.890608291432727e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3710 + }, + { + "epoch": 0.018041327735025273, + "grad_norm": 1.6422887711087242e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3720 + }, + { + "epoch": 0.018089825927861364, + "grad_norm": 2.2717893443768844e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3730 + }, + { + "epoch": 0.018138324120697453, + "grad_norm": 1.8193206415162422e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3740 + }, + { + "epoch": 0.01818682231353354, + "grad_norm": 9.347966988570988e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3750 + }, + { + "epoch": 0.018235320506369633, + "grad_norm": 4.942889790982008e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3760 + }, + { + "epoch": 0.01828381869920572, + "grad_norm": 8.335105667356402e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3770 + }, + { + "epoch": 0.01833231689204181, + "grad_norm": 1.7085816580220126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3780 + }, + { + "epoch": 0.018380815084877898, + "grad_norm": 1.5400844858959317e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3790 + }, + { + "epoch": 0.01842931327771399, + "grad_norm": 1.60175532073481e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3800 + }, + { + "epoch": 0.01847781147055008, + "grad_norm": 1.6502053767908365e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3810 + }, + { + "epoch": 0.018526309663386167, + "grad_norm": 2.095667878165841e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3820 + }, + { + "epoch": 0.01857480785622226, + "grad_norm": 1.4561408534063958e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3830 + }, + { + "epoch": 0.018623306049058347, + "grad_norm": 1.6186600987566635e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3840 + }, + { + "epoch": 0.018671804241894435, + "grad_norm": 1.6133721146616153e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3850 + }, + { + "epoch": 0.018720302434730527, + "grad_norm": 1.4049633136892226e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3860 + }, + { + "epoch": 0.018768800627566615, + "grad_norm": 1.545670602354221e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3870 + }, + { + "epoch": 0.018817298820402704, + "grad_norm": 2.0202322048135102e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3880 + }, + { + "epoch": 0.018865797013238796, + "grad_norm": 1.3844931345374789e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3890 + }, + { + "epoch": 0.018914295206074884, + "grad_norm": 2.6957028239849024e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3900 + }, + { + "epoch": 0.018962793398910972, + "grad_norm": 1.3786462659481913e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3910 + }, + { + "epoch": 0.019011291591747064, + "grad_norm": 1.4718484635523055e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3920 + }, + { + "epoch": 0.019059789784583153, + "grad_norm": 1.4985786947363522e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3930 + }, + { + "epoch": 0.01910828797741924, + "grad_norm": 1.47341024785419e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3940 + }, + { + "epoch": 0.01915678617025533, + "grad_norm": 1.2669097486650571e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3950 + }, + { + "epoch": 0.01920528436309142, + "grad_norm": 0.000660232559312135, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3960 + }, + { + "epoch": 0.01925378255592751, + "grad_norm": 1.4272475709731225e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3970 + }, + { + "epoch": 0.019302280748763598, + "grad_norm": 1.2425161912688054e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3980 + }, + { + "epoch": 0.01935077894159969, + "grad_norm": 1.3335371477296576e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 3990 + }, + { + "epoch": 0.019399277134435778, + "grad_norm": 1.8507409549783915e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4000 + }, + { + "epoch": 0.019447775327271866, + "grad_norm": 1.3201280125940684e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4010 + }, + { + "epoch": 0.019496273520107958, + "grad_norm": 1.1998431546089705e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4020 + }, + { + "epoch": 0.019544771712944047, + "grad_norm": 1.1841683772217948e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4030 + }, + { + "epoch": 0.019593269905780135, + "grad_norm": 1.4229660337150563e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4040 + }, + { + "epoch": 0.019641768098616227, + "grad_norm": 1.7309675968135707e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4050 + }, + { + "epoch": 0.019690266291452315, + "grad_norm": 1.386030089634005e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4060 + }, + { + "epoch": 0.019738764484288403, + "grad_norm": 1.1688211998261977e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4070 + }, + { + "epoch": 0.019787262677124495, + "grad_norm": 1.2108700502722058e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4080 + }, + { + "epoch": 0.019835760869960584, + "grad_norm": 1.2539600902528036e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4090 + }, + { + "epoch": 0.019884259062796672, + "grad_norm": 1.2441832950571552e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4100 + }, + { + "epoch": 0.01993275725563276, + "grad_norm": 1.1901170182682108e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4110 + }, + { + "epoch": 0.019981255448468852, + "grad_norm": 1.1525607078510802e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4120 + }, + { + "epoch": 0.02002975364130494, + "grad_norm": 1.1843925676657818e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4130 + }, + { + "epoch": 0.02007825183414103, + "grad_norm": 1.1561856808839366e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4140 + }, + { + "epoch": 0.02012675002697712, + "grad_norm": 1.2374130164971575e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4150 + }, + { + "epoch": 0.02017524821981321, + "grad_norm": 1.1701306902978104e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4160 + }, + { + "epoch": 0.020223746412649297, + "grad_norm": 1.0118999853148125e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4170 + }, + { + "epoch": 0.02027224460548539, + "grad_norm": 1.1716034350683913e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4180 + }, + { + "epoch": 0.020320742798321478, + "grad_norm": 2.262662019347772e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4190 + }, + { + "epoch": 0.020369240991157566, + "grad_norm": 1.1629310392891057e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4200 + }, + { + "epoch": 0.020417739183993658, + "grad_norm": 1.1180899491591845e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4210 + }, + { + "epoch": 0.020466237376829746, + "grad_norm": 1.1558730875549372e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4220 + }, + { + "epoch": 0.020514735569665835, + "grad_norm": 1.0648478564689867e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4230 + }, + { + "epoch": 0.020563233762501926, + "grad_norm": 1.2251565749465954e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4240 + }, + { + "epoch": 0.020611731955338015, + "grad_norm": 1.0718873454607092e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4250 + }, + { + "epoch": 0.020660230148174103, + "grad_norm": 1.0541658411966637e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4260 + }, + { + "epoch": 0.02070872834101019, + "grad_norm": 1.2773655726050492e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4270 + }, + { + "epoch": 0.020757226533846283, + "grad_norm": 1.0555306289461441e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4280 + }, + { + "epoch": 0.02080572472668237, + "grad_norm": 1.230425823450787e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4290 + }, + { + "epoch": 0.02085422291951846, + "grad_norm": 1.0438788194733206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4300 + }, + { + "epoch": 0.020902721112354552, + "grad_norm": 1.0556002962403e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4310 + }, + { + "epoch": 0.02095121930519064, + "grad_norm": 9.921159289660864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4320 + }, + { + "epoch": 0.02099971749802673, + "grad_norm": 1.031543615681585e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4330 + }, + { + "epoch": 0.02104821569086282, + "grad_norm": 9.659279385232367e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4340 + }, + { + "epoch": 0.02109671388369891, + "grad_norm": 1.099162909667939e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4350 + }, + { + "epoch": 0.021145212076534997, + "grad_norm": 9.704109288577456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4360 + }, + { + "epoch": 0.02119371026937109, + "grad_norm": 1.013235032587545e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4370 + }, + { + "epoch": 0.021242208462207177, + "grad_norm": 9.676203262642957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4380 + }, + { + "epoch": 0.021290706655043266, + "grad_norm": 1.0511443178984337e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4390 + }, + { + "epoch": 0.021339204847879358, + "grad_norm": 9.69452321442077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4400 + }, + { + "epoch": 0.021387703040715446, + "grad_norm": 9.810908522922546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4410 + }, + { + "epoch": 0.021436201233551534, + "grad_norm": 9.145154763245955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4420 + }, + { + "epoch": 0.021484699426387623, + "grad_norm": 1.0036212188424543e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4430 + }, + { + "epoch": 0.021533197619223714, + "grad_norm": 9.212588338414207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4440 + }, + { + "epoch": 0.021581695812059803, + "grad_norm": 1.1454280866018962e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4450 + }, + { + "epoch": 0.02163019400489589, + "grad_norm": 9.699712791189086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4460 + }, + { + "epoch": 0.021678692197731983, + "grad_norm": 9.733629667607602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4470 + }, + { + "epoch": 0.02172719039056807, + "grad_norm": 9.03077034308808e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4480 + }, + { + "epoch": 0.02177568858340416, + "grad_norm": 9.394411790708546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4490 + }, + { + "epoch": 0.02182418677624025, + "grad_norm": 1.2135337783547584e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4500 + }, + { + "epoch": 0.02187268496907634, + "grad_norm": 9.033049536810722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4510 + }, + { + "epoch": 0.021921183161912428, + "grad_norm": 9.008810593513772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4520 + }, + { + "epoch": 0.02196968135474852, + "grad_norm": 9.53160815697629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4530 + }, + { + "epoch": 0.02201817954758461, + "grad_norm": 1.0882147762458771e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4540 + }, + { + "epoch": 0.022066677740420697, + "grad_norm": 8.365943358512595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4550 + }, + { + "epoch": 0.02211517593325679, + "grad_norm": 9.030636647366919e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4560 + }, + { + "epoch": 0.022163674126092877, + "grad_norm": 8.701438673597295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4570 + }, + { + "epoch": 0.022212172318928965, + "grad_norm": 1.11085446405923e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4580 + }, + { + "epoch": 0.022260670511765054, + "grad_norm": 8.333625373779796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4590 + }, + { + "epoch": 0.022309168704601146, + "grad_norm": 8.141844773490448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4600 + }, + { + "epoch": 0.022357666897437234, + "grad_norm": 7.731326149951201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4610 + }, + { + "epoch": 0.022406165090273322, + "grad_norm": 8.796716429060325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4620 + }, + { + "epoch": 0.022454663283109414, + "grad_norm": 8.866407370078377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4630 + }, + { + "epoch": 0.022503161475945502, + "grad_norm": 1.1834132237709127e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4640 + }, + { + "epoch": 0.02255165966878159, + "grad_norm": 8.573326340410858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4650 + }, + { + "epoch": 0.022600157861617683, + "grad_norm": 8.440535566478502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4660 + }, + { + "epoch": 0.02264865605445377, + "grad_norm": 8.069107025221456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4670 + }, + { + "epoch": 0.02269715424728986, + "grad_norm": 9.369609870191198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4680 + }, + { + "epoch": 0.02274565244012595, + "grad_norm": 7.557323442597408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4690 + }, + { + "epoch": 0.02279415063296204, + "grad_norm": 8.471086403005756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4700 + }, + { + "epoch": 0.022842648825798128, + "grad_norm": 7.855770491005387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4710 + }, + { + "epoch": 0.02289114701863422, + "grad_norm": 7.562311111541931e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4720 + }, + { + "epoch": 0.022939645211470308, + "grad_norm": 8.593046004534699e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4730 + }, + { + "epoch": 0.022988143404306396, + "grad_norm": 7.766670023556799e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4740 + }, + { + "epoch": 0.023036641597142485, + "grad_norm": 8.479108146275394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4750 + }, + { + "epoch": 0.023085139789978577, + "grad_norm": 8.438463737547863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4760 + }, + { + "epoch": 0.023133637982814665, + "grad_norm": 9.30433270696085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4770 + }, + { + "epoch": 0.023182136175650753, + "grad_norm": 7.591228495584801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4780 + }, + { + "epoch": 0.023230634368486845, + "grad_norm": 7.335223926929757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4790 + }, + { + "epoch": 0.023279132561322934, + "grad_norm": 7.786124115227722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4800 + }, + { + "epoch": 0.023327630754159022, + "grad_norm": 7.275707957887789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4810 + }, + { + "epoch": 0.023376128946995114, + "grad_norm": 7.5862112680624705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4820 + }, + { + "epoch": 0.023424627139831202, + "grad_norm": 8.274499123217538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4830 + }, + { + "epoch": 0.02347312533266729, + "grad_norm": 7.40771611162927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4840 + }, + { + "epoch": 0.023521623525503382, + "grad_norm": 8.432433787675109e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4850 + }, + { + "epoch": 0.02357012171833947, + "grad_norm": 9.894341928884387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4860 + }, + { + "epoch": 0.02361861991117556, + "grad_norm": 8.106089808279648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4870 + }, + { + "epoch": 0.02366711810401165, + "grad_norm": 8.99591850611614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4880 + }, + { + "epoch": 0.02371561629684774, + "grad_norm": 7.667134013900068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4890 + }, + { + "epoch": 0.023764114489683828, + "grad_norm": 7.970151273184456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4900 + }, + { + "epoch": 0.023812612682519916, + "grad_norm": 7.418645509460475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4910 + }, + { + "epoch": 0.023861110875356008, + "grad_norm": 0.00018695919425226748, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4920 + }, + { + "epoch": 0.023909609068192096, + "grad_norm": 7.167537660279777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4930 + }, + { + "epoch": 0.023958107261028184, + "grad_norm": 8.739086297282483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4940 + }, + { + "epoch": 0.024006605453864276, + "grad_norm": 7.372039817710174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4950 + }, + { + "epoch": 0.024055103646700365, + "grad_norm": 8.081947271421086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4960 + }, + { + "epoch": 0.024103601839536453, + "grad_norm": 1.1621184057730716e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4970 + }, + { + "epoch": 0.024152100032372545, + "grad_norm": 0.0028724365402013063, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 4980 + }, + { + "epoch": 0.024200598225208633, + "grad_norm": 0.0049903118051588535, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 4990 + }, + { + "epoch": 0.02424909641804472, + "grad_norm": 4.0494833228876814e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5000 + }, + { + "epoch": 0.024297594610880813, + "grad_norm": 0.00011626134801190346, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5010 + }, + { + "epoch": 0.024346092803716902, + "grad_norm": 2.6259533115080558e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5020 + }, + { + "epoch": 0.02439459099655299, + "grad_norm": 2.3327102098846808e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5030 + }, + { + "epoch": 0.024443089189389082, + "grad_norm": 3.105128780589439e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5040 + }, + { + "epoch": 0.02449158738222517, + "grad_norm": 2.858679181372281e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5050 + }, + { + "epoch": 0.02454008557506126, + "grad_norm": 1.7966585801332258e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5060 + }, + { + "epoch": 0.024588583767897347, + "grad_norm": 1.6864065401023254e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5070 + }, + { + "epoch": 0.02463708196073344, + "grad_norm": 1.8220314814243466e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5080 + }, + { + "epoch": 0.024685580153569527, + "grad_norm": 5.9352325479267165e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5090 + }, + { + "epoch": 0.024734078346405616, + "grad_norm": 2.498740104783792e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5100 + }, + { + "epoch": 0.024782576539241707, + "grad_norm": 1.4341011592478026e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5110 + }, + { + "epoch": 0.024831074732077796, + "grad_norm": 1.3966350707050879e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5120 + }, + { + "epoch": 0.024879572924913884, + "grad_norm": 1.5163705029408447e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5130 + }, + { + "epoch": 0.024928071117749976, + "grad_norm": 1.3937609764980152e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5140 + }, + { + "epoch": 0.024976569310586064, + "grad_norm": 1.3512964869732969e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5150 + }, + { + "epoch": 0.025025067503422153, + "grad_norm": 1.122905177908251e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5160 + }, + { + "epoch": 0.025073565696258245, + "grad_norm": 1.1734232430171687e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5170 + }, + { + "epoch": 0.025122063889094333, + "grad_norm": 1.0516900147194974e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5180 + }, + { + "epoch": 0.02517056208193042, + "grad_norm": 1.1899182027264033e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5190 + }, + { + "epoch": 0.025219060274766513, + "grad_norm": 1.2756092473864555e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 5200 + }, + { + "epoch": 0.0252675584676026, + "grad_norm": 0.0011436078930273652, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5210 + }, + { + "epoch": 0.02531605666043869, + "grad_norm": 0.00014834047760814428, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5220 + }, + { + "epoch": 0.025364554853274778, + "grad_norm": 3.994107464677654e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5230 + }, + { + "epoch": 0.02541305304611087, + "grad_norm": 6.844012386864051e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5240 + }, + { + "epoch": 0.02546155123894696, + "grad_norm": 0.0005335849709808826, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 5250 + }, + { + "epoch": 0.025510049431783047, + "grad_norm": 0.023523710668087006, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 5260 + }, + { + "epoch": 0.02555854762461914, + "grad_norm": 0.00014132307842373848, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5270 + }, + { + "epoch": 0.025607045817455227, + "grad_norm": 0.00011207117495359853, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 5280 + }, + { + "epoch": 0.025655544010291315, + "grad_norm": 0.033627431839704514, + "learning_rate": 0.0002, + "loss": 0.0603, + "step": 5290 + }, + { + "epoch": 0.025704042203127407, + "grad_norm": 0.0018406114540994167, + "learning_rate": 0.0002, + "loss": 0.0024, + "step": 5300 + }, + { + "epoch": 0.025752540395963495, + "grad_norm": 0.012991310097277164, + "learning_rate": 0.0002, + "loss": 0.0171, + "step": 5310 + }, + { + "epoch": 0.025801038588799584, + "grad_norm": 0.0018365787109360099, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 5320 + }, + { + "epoch": 0.025849536781635676, + "grad_norm": 0.0006698972429148853, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 5330 + }, + { + "epoch": 0.025898034974471764, + "grad_norm": 0.01420981902629137, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 5340 + }, + { + "epoch": 0.025946533167307852, + "grad_norm": 0.012310982681810856, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 5350 + }, + { + "epoch": 0.025995031360143944, + "grad_norm": 1.1538974046707153, + "learning_rate": 0.0002, + "loss": 0.0361, + "step": 5360 + }, + { + "epoch": 0.026043529552980033, + "grad_norm": 0.026477281004190445, + "learning_rate": 0.0002, + "loss": 0.0109, + "step": 5370 + }, + { + "epoch": 0.02609202774581612, + "grad_norm": 0.0007327749044634402, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 5380 + }, + { + "epoch": 0.02614052593865221, + "grad_norm": 0.0005067326710559428, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 5390 + }, + { + "epoch": 0.0261890241314883, + "grad_norm": 0.00030109204817563295, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 5400 + }, + { + "epoch": 0.02623752232432439, + "grad_norm": 0.0005644927732646465, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 5410 + }, + { + "epoch": 0.026286020517160478, + "grad_norm": 0.00019512473954819143, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5420 + }, + { + "epoch": 0.02633451870999657, + "grad_norm": 0.00022160787193570286, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 5430 + }, + { + "epoch": 0.026383016902832658, + "grad_norm": 0.00030178893939591944, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 5440 + }, + { + "epoch": 0.026431515095668746, + "grad_norm": 0.0002512048522476107, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 5450 + }, + { + "epoch": 0.026480013288504838, + "grad_norm": 0.000211679536732845, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 5460 + }, + { + "epoch": 0.026528511481340927, + "grad_norm": 0.0002661389298737049, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5470 + }, + { + "epoch": 0.026577009674177015, + "grad_norm": 0.00024135725107043982, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5480 + }, + { + "epoch": 0.026625507867013107, + "grad_norm": 0.00023873276950325817, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5490 + }, + { + "epoch": 0.026674006059849195, + "grad_norm": 0.00018590458785183728, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5500 + }, + { + "epoch": 0.026722504252685284, + "grad_norm": 0.00012239671195857227, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5510 + }, + { + "epoch": 0.026771002445521375, + "grad_norm": 9.728335135150701e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5520 + }, + { + "epoch": 0.026819500638357464, + "grad_norm": 0.00025746942264959216, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5530 + }, + { + "epoch": 0.026867998831193552, + "grad_norm": 0.0001057315239449963, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5540 + }, + { + "epoch": 0.02691649702402964, + "grad_norm": 0.00012231498840264976, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 5550 + }, + { + "epoch": 0.026964995216865732, + "grad_norm": 9.459725697524846e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5560 + }, + { + "epoch": 0.02701349340970182, + "grad_norm": 8.842835086397827e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5570 + }, + { + "epoch": 0.02706199160253791, + "grad_norm": 6.921276508364826e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5580 + }, + { + "epoch": 0.027110489795374, + "grad_norm": 0.0001127079376601614, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5590 + }, + { + "epoch": 0.02715898798821009, + "grad_norm": 8.340023487107828e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5600 + }, + { + "epoch": 0.027207486181046178, + "grad_norm": 5.476119622471742e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5610 + }, + { + "epoch": 0.02725598437388227, + "grad_norm": 6.622980436077341e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5620 + }, + { + "epoch": 0.027304482566718358, + "grad_norm": 5.600484291790053e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5630 + }, + { + "epoch": 0.027352980759554446, + "grad_norm": 6.922688044141978e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5640 + }, + { + "epoch": 0.027401478952390538, + "grad_norm": 6.364896398736164e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5650 + }, + { + "epoch": 0.027449977145226626, + "grad_norm": 5.0746300985338166e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5660 + }, + { + "epoch": 0.027498475338062715, + "grad_norm": 4.630871626432054e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5670 + }, + { + "epoch": 0.027546973530898806, + "grad_norm": 5.315276939654723e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 5680 + }, + { + "epoch": 0.027595471723734895, + "grad_norm": 0.00023823593801353127, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 5690 + }, + { + "epoch": 0.027643969916570983, + "grad_norm": 0.0013333763927221298, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 5700 + }, + { + "epoch": 0.02769246810940707, + "grad_norm": 0.00044539780355989933, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5710 + }, + { + "epoch": 0.027740966302243163, + "grad_norm": 0.00020216338452883065, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5720 + }, + { + "epoch": 0.02778946449507925, + "grad_norm": 0.00019061286002397537, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5730 + }, + { + "epoch": 0.02783796268791534, + "grad_norm": 0.00014615882537327707, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5740 + }, + { + "epoch": 0.027886460880751432, + "grad_norm": 0.00075985596049577, + "learning_rate": 0.0002, + "loss": 0.002, + "step": 5750 + }, + { + "epoch": 0.02793495907358752, + "grad_norm": 0.022426636889576912, + "learning_rate": 0.0002, + "loss": 0.0034, + "step": 5760 + }, + { + "epoch": 0.02798345726642361, + "grad_norm": 0.32654815912246704, + "learning_rate": 0.0002, + "loss": 0.0114, + "step": 5770 + }, + { + "epoch": 0.0280319554592597, + "grad_norm": 0.003718369407579303, + "learning_rate": 0.0002, + "loss": 0.0175, + "step": 5780 + }, + { + "epoch": 0.02808045365209579, + "grad_norm": 0.0005128814373165369, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 5790 + }, + { + "epoch": 0.028128951844931877, + "grad_norm": 0.009583787061274052, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 5800 + }, + { + "epoch": 0.02817745003776797, + "grad_norm": 0.0003343712887726724, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 5810 + }, + { + "epoch": 0.028225948230604057, + "grad_norm": 0.0005959090194664896, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5820 + }, + { + "epoch": 0.028274446423440146, + "grad_norm": 0.0004703009908553213, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 5830 + }, + { + "epoch": 0.028322944616276238, + "grad_norm": 0.00041350742685608566, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 5840 + }, + { + "epoch": 0.028371442809112326, + "grad_norm": 0.00023075319768395275, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5850 + }, + { + "epoch": 0.028419941001948414, + "grad_norm": 0.0002336896868655458, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 5860 + }, + { + "epoch": 0.028468439194784503, + "grad_norm": 0.000371629255823791, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5870 + }, + { + "epoch": 0.028516937387620594, + "grad_norm": 0.00025413150433450937, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5880 + }, + { + "epoch": 0.028565435580456683, + "grad_norm": 0.016120560467243195, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 5890 + }, + { + "epoch": 0.02861393377329277, + "grad_norm": 0.00016619780217297375, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5900 + }, + { + "epoch": 0.028662431966128863, + "grad_norm": 0.000173722772160545, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5910 + }, + { + "epoch": 0.02871093015896495, + "grad_norm": 0.000268951611360535, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5920 + }, + { + "epoch": 0.02875942835180104, + "grad_norm": 0.0001602552511030808, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5930 + }, + { + "epoch": 0.02880792654463713, + "grad_norm": 0.0001315105619141832, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5940 + }, + { + "epoch": 0.02885642473747322, + "grad_norm": 0.0001068285055225715, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5950 + }, + { + "epoch": 0.02890492293030931, + "grad_norm": 0.00011226639617234468, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5960 + }, + { + "epoch": 0.0289534211231454, + "grad_norm": 0.00010572014434728771, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5970 + }, + { + "epoch": 0.02900191931598149, + "grad_norm": 9.318481897935271e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5980 + }, + { + "epoch": 0.029050417508817577, + "grad_norm": 0.00010216530063189566, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 5990 + }, + { + "epoch": 0.02909891570165367, + "grad_norm": 8.584584429627284e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6000 + }, + { + "epoch": 0.029147413894489757, + "grad_norm": 8.218123548431322e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6010 + }, + { + "epoch": 0.029195912087325845, + "grad_norm": 7.70106926211156e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6020 + }, + { + "epoch": 0.029244410280161934, + "grad_norm": 7.329580694204196e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6030 + }, + { + "epoch": 0.029292908472998026, + "grad_norm": 0.00017599698912817985, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6040 + }, + { + "epoch": 0.029341406665834114, + "grad_norm": 6.348195893224329e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6050 + }, + { + "epoch": 0.029389904858670202, + "grad_norm": 0.00020357522589620203, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6060 + }, + { + "epoch": 0.029438403051506294, + "grad_norm": 6.033327372279018e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6070 + }, + { + "epoch": 0.029486901244342383, + "grad_norm": 6.862440932309255e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6080 + }, + { + "epoch": 0.02953539943717847, + "grad_norm": 5.633971522911452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6090 + }, + { + "epoch": 0.029583897630014563, + "grad_norm": 5.7009474403457716e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6100 + }, + { + "epoch": 0.02963239582285065, + "grad_norm": 5.2002145821461454e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6110 + }, + { + "epoch": 0.02968089401568674, + "grad_norm": 5.366498589864932e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6120 + }, + { + "epoch": 0.02972939220852283, + "grad_norm": 5.047260128776543e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6130 + }, + { + "epoch": 0.02977789040135892, + "grad_norm": 4.848846947425045e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6140 + }, + { + "epoch": 0.029826388594195008, + "grad_norm": 4.416262163431384e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6150 + }, + { + "epoch": 0.0298748867870311, + "grad_norm": 4.80538365081884e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6160 + }, + { + "epoch": 0.029923384979867188, + "grad_norm": 4.49155195383355e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6170 + }, + { + "epoch": 0.029971883172703277, + "grad_norm": 4.672894283430651e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6180 + }, + { + "epoch": 0.030020381365539365, + "grad_norm": 4.16544389736373e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6190 + }, + { + "epoch": 0.030068879558375457, + "grad_norm": 5.254022471490316e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6200 + }, + { + "epoch": 0.030117377751211545, + "grad_norm": 4.1400158806936815e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6210 + }, + { + "epoch": 0.030165875944047633, + "grad_norm": 3.979404937126674e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6220 + }, + { + "epoch": 0.030214374136883725, + "grad_norm": 4.252413782523945e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6230 + }, + { + "epoch": 0.030262872329719814, + "grad_norm": 3.9314232708420604e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6240 + }, + { + "epoch": 0.030311370522555902, + "grad_norm": 3.528226079652086e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6250 + }, + { + "epoch": 0.030359868715391994, + "grad_norm": 3.709212614921853e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6260 + }, + { + "epoch": 0.030408366908228082, + "grad_norm": 4.059882485307753e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6270 + }, + { + "epoch": 0.03045686510106417, + "grad_norm": 3.330764957354404e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 6280 + }, + { + "epoch": 0.030505363293900262, + "grad_norm": 0.00016012066043913364, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 6290 + }, + { + "epoch": 0.03055386148673635, + "grad_norm": 0.0010215889196842909, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 6300 + }, + { + "epoch": 0.03060235967957244, + "grad_norm": 0.00019147679267916828, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 6310 + }, + { + "epoch": 0.03065085787240853, + "grad_norm": 0.00030747128766961396, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6320 + }, + { + "epoch": 0.03069935606524462, + "grad_norm": 0.004503184929490089, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6330 + }, + { + "epoch": 0.030747854258080708, + "grad_norm": 0.00012332572077866644, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6340 + }, + { + "epoch": 0.030796352450916796, + "grad_norm": 0.00010717248369473964, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6350 + }, + { + "epoch": 0.030844850643752888, + "grad_norm": 8.24575763544999e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6360 + }, + { + "epoch": 0.030893348836588976, + "grad_norm": 7.35415960662067e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6370 + }, + { + "epoch": 0.030941847029425065, + "grad_norm": 6.757079972885549e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6380 + }, + { + "epoch": 0.030990345222261156, + "grad_norm": 6.832870712969452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6390 + }, + { + "epoch": 0.031038843415097245, + "grad_norm": 6.246102566365153e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6400 + }, + { + "epoch": 0.031087341607933333, + "grad_norm": 5.599322685156949e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6410 + }, + { + "epoch": 0.031135839800769425, + "grad_norm": 7.056232425384223e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6420 + }, + { + "epoch": 0.031184337993605513, + "grad_norm": 5.349808998289518e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6430 + }, + { + "epoch": 0.0312328361864416, + "grad_norm": 5.035354843130335e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6440 + }, + { + "epoch": 0.031281334379277694, + "grad_norm": 6.151409615995362e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6450 + }, + { + "epoch": 0.03132983257211378, + "grad_norm": 5.068643076810986e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6460 + }, + { + "epoch": 0.03137833076494987, + "grad_norm": 3.89632441510912e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6470 + }, + { + "epoch": 0.03142682895778596, + "grad_norm": 6.251622107811272e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6480 + }, + { + "epoch": 0.03147532715062205, + "grad_norm": 6.493285036412999e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6490 + }, + { + "epoch": 0.03152382534345814, + "grad_norm": 7.028348773019388e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6500 + }, + { + "epoch": 0.03157232353629423, + "grad_norm": 9.685532859293744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6510 + }, + { + "epoch": 0.03162082172913032, + "grad_norm": 6.394583760993555e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6520 + }, + { + "epoch": 0.03166931992196641, + "grad_norm": 3.619084236561321e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6530 + }, + { + "epoch": 0.031717818114802496, + "grad_norm": 4.126901694689877e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 6540 + }, + { + "epoch": 0.031766316307638584, + "grad_norm": 5.681557013303973e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6550 + }, + { + "epoch": 0.03181481450047468, + "grad_norm": 5.574057649937458e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 6560 + }, + { + "epoch": 0.03186331269331077, + "grad_norm": 4.644974978873506e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6570 + }, + { + "epoch": 0.031911810886146856, + "grad_norm": 4.0566021198173985e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6580 + }, + { + "epoch": 0.031960309078982944, + "grad_norm": 4.18580457335338e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6590 + }, + { + "epoch": 0.03200880727181903, + "grad_norm": 3.9231395930983126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6600 + }, + { + "epoch": 0.03205730546465512, + "grad_norm": 3.7463836633833125e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6610 + }, + { + "epoch": 0.03210580365749121, + "grad_norm": 3.4700497053563595e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6620 + }, + { + "epoch": 0.032154301850327305, + "grad_norm": 4.265785537427291e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6630 + }, + { + "epoch": 0.03220280004316339, + "grad_norm": 3.37315141223371e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6640 + }, + { + "epoch": 0.03225129823599948, + "grad_norm": 4.249701669323258e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6650 + }, + { + "epoch": 0.03229979642883557, + "grad_norm": 2.9406081011984497e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6660 + }, + { + "epoch": 0.03234829462167166, + "grad_norm": 2.754655542958062e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6670 + }, + { + "epoch": 0.03239679281450775, + "grad_norm": 2.6863608582061715e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6680 + }, + { + "epoch": 0.03244529100734384, + "grad_norm": 2.8564863896463066e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6690 + }, + { + "epoch": 0.03249378920017993, + "grad_norm": 2.9179249395383522e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6700 + }, + { + "epoch": 0.03254228739301602, + "grad_norm": 3.061136158066802e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6710 + }, + { + "epoch": 0.03259078558585211, + "grad_norm": 2.572800985944923e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6720 + }, + { + "epoch": 0.032639283778688195, + "grad_norm": 2.545036841183901e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6730 + }, + { + "epoch": 0.032687781971524284, + "grad_norm": 2.923136344179511e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6740 + }, + { + "epoch": 0.03273628016436037, + "grad_norm": 2.9834181987098418e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6750 + }, + { + "epoch": 0.03278477835719647, + "grad_norm": 2.386139749432914e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6760 + }, + { + "epoch": 0.032833276550032556, + "grad_norm": 4.0860159060684964e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 6770 + }, + { + "epoch": 0.032881774742868644, + "grad_norm": 0.0009740607347339392, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 6780 + }, + { + "epoch": 0.03293027293570473, + "grad_norm": 0.0006452045636251569, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6790 + }, + { + "epoch": 0.03297877112854082, + "grad_norm": 0.00015450162754859775, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6800 + }, + { + "epoch": 0.03302726932137691, + "grad_norm": 0.00014091415505390614, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6810 + }, + { + "epoch": 0.033075767514213004, + "grad_norm": 0.00011902609548997134, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 6820 + }, + { + "epoch": 0.03312426570704909, + "grad_norm": 0.00012973914272151887, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6830 + }, + { + "epoch": 0.03317276389988518, + "grad_norm": 9.264184336643666e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6840 + }, + { + "epoch": 0.03322126209272127, + "grad_norm": 7.418024324579164e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6850 + }, + { + "epoch": 0.03326976028555736, + "grad_norm": 0.00013189170567784458, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6860 + }, + { + "epoch": 0.033318258478393446, + "grad_norm": 0.00013378524454310536, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6870 + }, + { + "epoch": 0.03336675667122954, + "grad_norm": 8.649336086818948e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6880 + }, + { + "epoch": 0.03341525486406563, + "grad_norm": 8.085824083536863e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6890 + }, + { + "epoch": 0.03346375305690172, + "grad_norm": 9.461311128688976e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6900 + }, + { + "epoch": 0.03351225124973781, + "grad_norm": 5.3922729421174154e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6910 + }, + { + "epoch": 0.033560749442573895, + "grad_norm": 4.633833304978907e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6920 + }, + { + "epoch": 0.03360924763540998, + "grad_norm": 8.72918390086852e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6930 + }, + { + "epoch": 0.03365774582824607, + "grad_norm": 4.094720134162344e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6940 + }, + { + "epoch": 0.03370624402108217, + "grad_norm": 4.65324628748931e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6950 + }, + { + "epoch": 0.033754742213918255, + "grad_norm": 3.5644978197524324e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6960 + }, + { + "epoch": 0.033803240406754344, + "grad_norm": 3.959277455578558e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6970 + }, + { + "epoch": 0.03385173859959043, + "grad_norm": 3.4370183129794896e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6980 + }, + { + "epoch": 0.03390023679242652, + "grad_norm": 3.475501580396667e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 6990 + }, + { + "epoch": 0.03394873498526261, + "grad_norm": 2.8866948923678137e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7000 + }, + { + "epoch": 0.033997233178098704, + "grad_norm": 5.2667062846012414e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7010 + }, + { + "epoch": 0.03404573137093479, + "grad_norm": 2.5307859687018208e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7020 + }, + { + "epoch": 0.03409422956377088, + "grad_norm": 2.6366536985733546e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7030 + }, + { + "epoch": 0.03414272775660697, + "grad_norm": 2.5480603653704748e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7040 + }, + { + "epoch": 0.03419122594944306, + "grad_norm": 2.3381016944767907e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7050 + }, + { + "epoch": 0.034239724142279146, + "grad_norm": 2.2302765501081012e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7060 + }, + { + "epoch": 0.034288222335115234, + "grad_norm": 2.0732544726342894e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7070 + }, + { + "epoch": 0.03433672052795133, + "grad_norm": 2.1273443053360097e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7080 + }, + { + "epoch": 0.03438521872078742, + "grad_norm": 2.3315260477829725e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7090 + }, + { + "epoch": 0.034433716913623506, + "grad_norm": 1.8049669961328618e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7100 + }, + { + "epoch": 0.034482215106459595, + "grad_norm": 2.1147596271475777e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7110 + }, + { + "epoch": 0.03453071329929568, + "grad_norm": 1.9963423255831003e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7120 + }, + { + "epoch": 0.03457921149213177, + "grad_norm": 2.0149993360973895e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 7130 + }, + { + "epoch": 0.03462770968496787, + "grad_norm": 1.4615362488257233e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7140 + }, + { + "epoch": 0.034676207877803955, + "grad_norm": 1.3594539268524386e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7150 + }, + { + "epoch": 0.03472470607064004, + "grad_norm": 1.607606463949196e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7160 + }, + { + "epoch": 0.03477320426347613, + "grad_norm": 1.685834831732791e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7170 + }, + { + "epoch": 0.03482170245631222, + "grad_norm": 1.676543070061598e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7180 + }, + { + "epoch": 0.03487020064914831, + "grad_norm": 1.6695035810698755e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7190 + }, + { + "epoch": 0.034918698841984404, + "grad_norm": 1.3310322174220346e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7200 + }, + { + "epoch": 0.03496719703482049, + "grad_norm": 1.6586849596933462e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7210 + }, + { + "epoch": 0.03501569522765658, + "grad_norm": 1.5634706869604997e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7220 + }, + { + "epoch": 0.03506419342049267, + "grad_norm": 1.4626783013227396e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7230 + }, + { + "epoch": 0.03511269161332876, + "grad_norm": 1.735226032906212e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7240 + }, + { + "epoch": 0.035161189806164846, + "grad_norm": 1.4507915693684481e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7250 + }, + { + "epoch": 0.035209687999000934, + "grad_norm": 2.4786706489976496e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7260 + }, + { + "epoch": 0.03525818619183703, + "grad_norm": 1.3548815331887454e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7270 + }, + { + "epoch": 0.03530668438467312, + "grad_norm": 1.3481848327501211e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7280 + }, + { + "epoch": 0.035355182577509206, + "grad_norm": 1.1676605936372653e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7290 + }, + { + "epoch": 0.035403680770345294, + "grad_norm": 1.2255586625542492e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7300 + }, + { + "epoch": 0.03545217896318138, + "grad_norm": 1.321055788139347e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7310 + }, + { + "epoch": 0.03550067715601747, + "grad_norm": 1.3498844964487944e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7320 + }, + { + "epoch": 0.035549175348853566, + "grad_norm": 1.1850464943563566e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7330 + }, + { + "epoch": 0.035597673541689655, + "grad_norm": 1.1006603017449379e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7340 + }, + { + "epoch": 0.03564617173452574, + "grad_norm": 1.356872780888807e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7350 + }, + { + "epoch": 0.03569466992736183, + "grad_norm": 1.498899200669257e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7360 + }, + { + "epoch": 0.03574316812019792, + "grad_norm": 1.2133556992921513e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7370 + }, + { + "epoch": 0.03579166631303401, + "grad_norm": 1.1138084119011182e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7380 + }, + { + "epoch": 0.0358401645058701, + "grad_norm": 1.0768419087980874e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7390 + }, + { + "epoch": 0.03588866269870619, + "grad_norm": 1.1791733413701877e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7400 + }, + { + "epoch": 0.03593716089154228, + "grad_norm": 1.2984414752281737e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7410 + }, + { + "epoch": 0.03598565908437837, + "grad_norm": 1.1892873772012535e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7420 + }, + { + "epoch": 0.03603415727721446, + "grad_norm": 1.3373332876653876e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7430 + }, + { + "epoch": 0.036082655470050545, + "grad_norm": 1.0653851859387942e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7440 + }, + { + "epoch": 0.036131153662886634, + "grad_norm": 1.3871661394659895e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7450 + }, + { + "epoch": 0.03617965185572273, + "grad_norm": 1.1864987754961476e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7460 + }, + { + "epoch": 0.03622815004855882, + "grad_norm": 1.1221237400604878e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7470 + }, + { + "epoch": 0.036276648241394906, + "grad_norm": 1.1653353794827126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7480 + }, + { + "epoch": 0.036325146434230994, + "grad_norm": 9.207640687236562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7490 + }, + { + "epoch": 0.03637364462706708, + "grad_norm": 9.52445225266274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7500 + }, + { + "epoch": 0.03642214281990317, + "grad_norm": 1.0892112186411396e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7510 + }, + { + "epoch": 0.036470641012739266, + "grad_norm": 1.003123452392174e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7520 + }, + { + "epoch": 0.036519139205575354, + "grad_norm": 1.2204027370898984e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7530 + }, + { + "epoch": 0.03656763739841144, + "grad_norm": 9.9934586614836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7540 + }, + { + "epoch": 0.03661613559124753, + "grad_norm": 1.127293762692716e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7550 + }, + { + "epoch": 0.03666463378408362, + "grad_norm": 0.00019464858633000404, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7560 + }, + { + "epoch": 0.03671313197691971, + "grad_norm": 9.607069841877092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7570 + }, + { + "epoch": 0.036761630169755796, + "grad_norm": 9.794433935894631e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7580 + }, + { + "epoch": 0.03681012836259189, + "grad_norm": 9.61999103310518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7590 + }, + { + "epoch": 0.03685862655542798, + "grad_norm": 9.604537808627356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7600 + }, + { + "epoch": 0.03690712474826407, + "grad_norm": 9.23015431908425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7610 + }, + { + "epoch": 0.03695562294110016, + "grad_norm": 9.15753571462119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7620 + }, + { + "epoch": 0.037004121133936245, + "grad_norm": 9.529883755021729e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7630 + }, + { + "epoch": 0.03705261932677233, + "grad_norm": 1.7418660718249157e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7640 + }, + { + "epoch": 0.03710111751960843, + "grad_norm": 1.602948941581417e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7650 + }, + { + "epoch": 0.03714961571244452, + "grad_norm": 9.629467967897654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7660 + }, + { + "epoch": 0.037198113905280605, + "grad_norm": 8.805271136225201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7670 + }, + { + "epoch": 0.037246612098116694, + "grad_norm": 9.588402463123202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7680 + }, + { + "epoch": 0.03729511029095278, + "grad_norm": 9.68908625509357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7690 + }, + { + "epoch": 0.03734360848378887, + "grad_norm": 8.403506399190519e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7700 + }, + { + "epoch": 0.03739210667662496, + "grad_norm": 9.373730790684931e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7710 + }, + { + "epoch": 0.037440604869461054, + "grad_norm": 8.660495041112881e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7720 + }, + { + "epoch": 0.03748910306229714, + "grad_norm": 1.7680342352832668e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7730 + }, + { + "epoch": 0.03753760125513323, + "grad_norm": 8.47630508360453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7740 + }, + { + "epoch": 0.03758609944796932, + "grad_norm": 8.002424692676868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7750 + }, + { + "epoch": 0.03763459764080541, + "grad_norm": 1.0365323760197498e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7760 + }, + { + "epoch": 0.037683095833641496, + "grad_norm": 8.60414729686454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7770 + }, + { + "epoch": 0.03773159402647759, + "grad_norm": 8.460020580969285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7780 + }, + { + "epoch": 0.03778009221931368, + "grad_norm": 7.801734682288952e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7790 + }, + { + "epoch": 0.03782859041214977, + "grad_norm": 8.558849003748037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7800 + }, + { + "epoch": 0.037877088604985856, + "grad_norm": 8.114438060147222e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7810 + }, + { + "epoch": 0.037925586797821945, + "grad_norm": 8.061261723923963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7820 + }, + { + "epoch": 0.03797408499065803, + "grad_norm": 8.215903108066414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7830 + }, + { + "epoch": 0.03802258318349413, + "grad_norm": 7.691370228712913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7840 + }, + { + "epoch": 0.03807108137633022, + "grad_norm": 8.175340553862043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7850 + }, + { + "epoch": 0.038119579569166305, + "grad_norm": 7.62939453125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7860 + }, + { + "epoch": 0.03816807776200239, + "grad_norm": 7.94168772699777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7870 + }, + { + "epoch": 0.03821657595483848, + "grad_norm": 7.973182619025465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7880 + }, + { + "epoch": 0.03826507414767457, + "grad_norm": 8.520039955328684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7890 + }, + { + "epoch": 0.03831357234051066, + "grad_norm": 7.715416359133087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7900 + }, + { + "epoch": 0.038362070533346754, + "grad_norm": 8.61604894453194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7910 + }, + { + "epoch": 0.03841056872618284, + "grad_norm": 7.3998394327645656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7920 + }, + { + "epoch": 0.03845906691901893, + "grad_norm": 7.358732091233833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7930 + }, + { + "epoch": 0.03850756511185502, + "grad_norm": 7.1262561505136546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7940 + }, + { + "epoch": 0.03855606330469111, + "grad_norm": 7.373179414571496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7950 + }, + { + "epoch": 0.038604561497527196, + "grad_norm": 7.249415375554236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7960 + }, + { + "epoch": 0.03865305969036329, + "grad_norm": 7.8543407653342e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7970 + }, + { + "epoch": 0.03870155788319938, + "grad_norm": 7.156049832701683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7980 + }, + { + "epoch": 0.03875005607603547, + "grad_norm": 6.7908631535829045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 7990 + }, + { + "epoch": 0.038798554268871556, + "grad_norm": 9.396271707373671e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8000 + }, + { + "epoch": 0.038847052461707644, + "grad_norm": 7.4043778113264125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8010 + }, + { + "epoch": 0.03889555065454373, + "grad_norm": 7.1312551881419495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8020 + }, + { + "epoch": 0.03894404884737983, + "grad_norm": 6.958905487408629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8030 + }, + { + "epoch": 0.038992547040215916, + "grad_norm": 6.286035386438016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8040 + }, + { + "epoch": 0.039041045233052005, + "grad_norm": 6.8456756707746536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8050 + }, + { + "epoch": 0.03908954342588809, + "grad_norm": 6.953360298211919e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8060 + }, + { + "epoch": 0.03913804161872418, + "grad_norm": 6.942633717699209e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8070 + }, + { + "epoch": 0.03918653981156027, + "grad_norm": 1.1710857506841421e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8080 + }, + { + "epoch": 0.03923503800439636, + "grad_norm": 6.94835898684687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8090 + }, + { + "epoch": 0.03928353619723245, + "grad_norm": 6.491548901976785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8100 + }, + { + "epoch": 0.03933203439006854, + "grad_norm": 7.135988653317327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8110 + }, + { + "epoch": 0.03938053258290463, + "grad_norm": 6.800640676374314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8120 + }, + { + "epoch": 0.03942903077574072, + "grad_norm": 6.910163392603863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8130 + }, + { + "epoch": 0.03947752896857681, + "grad_norm": 6.20204036749783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8140 + }, + { + "epoch": 0.039526027161412895, + "grad_norm": 6.11617087997729e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8150 + }, + { + "epoch": 0.03957452535424899, + "grad_norm": 6.2370158957492094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8160 + }, + { + "epoch": 0.03962302354708508, + "grad_norm": 6.189494797581574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8170 + }, + { + "epoch": 0.03967152173992117, + "grad_norm": 6.652674983342877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8180 + }, + { + "epoch": 0.039720019932757256, + "grad_norm": 5.842954124091193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8190 + }, + { + "epoch": 0.039768518125593344, + "grad_norm": 5.5860750762803946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8200 + }, + { + "epoch": 0.03981701631842943, + "grad_norm": 6.679565103695495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8210 + }, + { + "epoch": 0.03986551451126552, + "grad_norm": 7.043557616270846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8220 + }, + { + "epoch": 0.039914012704101616, + "grad_norm": 6.108452453190694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8230 + }, + { + "epoch": 0.039962510896937704, + "grad_norm": 7.73970259615453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8240 + }, + { + "epoch": 0.04001100908977379, + "grad_norm": 5.871240773558384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8250 + }, + { + "epoch": 0.04005950728260988, + "grad_norm": 6.063462933525443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8260 + }, + { + "epoch": 0.04010800547544597, + "grad_norm": 6.005141585774254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8270 + }, + { + "epoch": 0.04015650366828206, + "grad_norm": 9.003595550893806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8280 + }, + { + "epoch": 0.04020500186111815, + "grad_norm": 5.00376745549147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8290 + }, + { + "epoch": 0.04025350005395424, + "grad_norm": 6.766965270799119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8300 + }, + { + "epoch": 0.04030199824679033, + "grad_norm": 5.534913725568913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8310 + }, + { + "epoch": 0.04035049643962642, + "grad_norm": 5.988365046505351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8320 + }, + { + "epoch": 0.04039899463246251, + "grad_norm": 5.917344424233306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8330 + }, + { + "epoch": 0.040447492825298595, + "grad_norm": 4.9708546612237114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8340 + }, + { + "epoch": 0.04049599101813469, + "grad_norm": 7.737412488495465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8350 + }, + { + "epoch": 0.04054448921097078, + "grad_norm": 8.057596460275818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8360 + }, + { + "epoch": 0.04059298740380687, + "grad_norm": 5.571344445343129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8370 + }, + { + "epoch": 0.040641485596642955, + "grad_norm": 6.570079676748719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8380 + }, + { + "epoch": 0.040689983789479044, + "grad_norm": 7.2816546889953315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8390 + }, + { + "epoch": 0.04073848198231513, + "grad_norm": 8.104065273073502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8400 + }, + { + "epoch": 0.04078698017515122, + "grad_norm": 1.2829202205466572e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8410 + }, + { + "epoch": 0.040835478367987316, + "grad_norm": 1.2363118003122509e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8420 + }, + { + "epoch": 0.040883976560823404, + "grad_norm": 9.683970347396098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8430 + }, + { + "epoch": 0.04093247475365949, + "grad_norm": 8.620498192613013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8440 + }, + { + "epoch": 0.04098097294649558, + "grad_norm": 9.369910003442783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8450 + }, + { + "epoch": 0.04102947113933167, + "grad_norm": 9.978874913940672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8460 + }, + { + "epoch": 0.04107796933216776, + "grad_norm": 9.417753062734846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8470 + }, + { + "epoch": 0.04112646752500385, + "grad_norm": 9.756689905771054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8480 + }, + { + "epoch": 0.04117496571783994, + "grad_norm": 8.958189027907792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8490 + }, + { + "epoch": 0.04122346391067603, + "grad_norm": 8.90523187990766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8500 + }, + { + "epoch": 0.04127196210351212, + "grad_norm": 8.706406333658379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8510 + }, + { + "epoch": 0.041320460296348206, + "grad_norm": 8.813840395305306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8520 + }, + { + "epoch": 0.041368958489184295, + "grad_norm": 9.074625268112868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8530 + }, + { + "epoch": 0.04141745668202038, + "grad_norm": 9.702470379124861e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8540 + }, + { + "epoch": 0.04146595487485648, + "grad_norm": 8.338602128787898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8550 + }, + { + "epoch": 0.04151445306769257, + "grad_norm": 7.964460564835463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8560 + }, + { + "epoch": 0.041562951260528655, + "grad_norm": 9.353669156553224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8570 + }, + { + "epoch": 0.04161144945336474, + "grad_norm": 7.328435003728373e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8580 + }, + { + "epoch": 0.04165994764620083, + "grad_norm": 7.896200258983299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8590 + }, + { + "epoch": 0.04170844583903692, + "grad_norm": 7.571693004138069e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8600 + }, + { + "epoch": 0.041756944031873015, + "grad_norm": 7.014348284428706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8610 + }, + { + "epoch": 0.041805442224709104, + "grad_norm": 6.396862772817258e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8620 + }, + { + "epoch": 0.04185394041754519, + "grad_norm": 6.549541922140634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8630 + }, + { + "epoch": 0.04190243861038128, + "grad_norm": 6.1693258430750575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8640 + }, + { + "epoch": 0.04195093680321737, + "grad_norm": 5.921248884988017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8650 + }, + { + "epoch": 0.04199943499605346, + "grad_norm": 7.138861747080227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8660 + }, + { + "epoch": 0.04204793318888955, + "grad_norm": 6.143480732134776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8670 + }, + { + "epoch": 0.04209643138172564, + "grad_norm": 6.82296877130284e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8680 + }, + { + "epoch": 0.04214492957456173, + "grad_norm": 6.029568794474471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8690 + }, + { + "epoch": 0.04219342776739782, + "grad_norm": 6.0145925999677274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8700 + }, + { + "epoch": 0.042241925960233906, + "grad_norm": 7.170782737375703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8710 + }, + { + "epoch": 0.042290424153069994, + "grad_norm": 6.7250880420033354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8720 + }, + { + "epoch": 0.04233892234590608, + "grad_norm": 6.239893082238268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8730 + }, + { + "epoch": 0.04238742053874218, + "grad_norm": 5.800477538286941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8740 + }, + { + "epoch": 0.042435918731578266, + "grad_norm": 5.244101885182317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8750 + }, + { + "epoch": 0.042484416924414355, + "grad_norm": 5.729488748329459e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8760 + }, + { + "epoch": 0.04253291511725044, + "grad_norm": 1.2397580576362088e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8770 + }, + { + "epoch": 0.04258141331008653, + "grad_norm": 5.967334345768904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8780 + }, + { + "epoch": 0.04262991150292262, + "grad_norm": 5.560129920922918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8790 + }, + { + "epoch": 0.042678409695758715, + "grad_norm": 4.845979674428236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8800 + }, + { + "epoch": 0.0427269078885948, + "grad_norm": 5.857080850546481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8810 + }, + { + "epoch": 0.04277540608143089, + "grad_norm": 5.683415565727046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8820 + }, + { + "epoch": 0.04282390427426698, + "grad_norm": 6.477517672465183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8830 + }, + { + "epoch": 0.04287240246710307, + "grad_norm": 1.0212819688604213e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8840 + }, + { + "epoch": 0.04292090065993916, + "grad_norm": 4.800426268047886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8850 + }, + { + "epoch": 0.042969398852775245, + "grad_norm": 5.954295829724288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8860 + }, + { + "epoch": 0.04301789704561134, + "grad_norm": 7.678132533328608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8870 + }, + { + "epoch": 0.04306639523844743, + "grad_norm": 5.937595233262982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8880 + }, + { + "epoch": 0.04311489343128352, + "grad_norm": 4.832709237234667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8890 + }, + { + "epoch": 0.043163391624119606, + "grad_norm": 4.593664016283583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8900 + }, + { + "epoch": 0.043211889816955694, + "grad_norm": 4.578779225994367e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8910 + }, + { + "epoch": 0.04326038800979178, + "grad_norm": 4.94439882459119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8920 + }, + { + "epoch": 0.04330888620262788, + "grad_norm": 4.988438831787789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8930 + }, + { + "epoch": 0.043357384395463966, + "grad_norm": 4.3825612010550685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8940 + }, + { + "epoch": 0.043405882588300054, + "grad_norm": 4.242381237418158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8950 + }, + { + "epoch": 0.04345438078113614, + "grad_norm": 4.6682084757776465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8960 + }, + { + "epoch": 0.04350287897397223, + "grad_norm": 4.3480795284267515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8970 + }, + { + "epoch": 0.04355137716680832, + "grad_norm": 4.554407951218309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8980 + }, + { + "epoch": 0.043599875359644415, + "grad_norm": 6.564529030583799e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 8990 + }, + { + "epoch": 0.0436483735524805, + "grad_norm": 4.638356131181354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9000 + }, + { + "epoch": 0.04369687174531659, + "grad_norm": 6.724419563397532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9010 + }, + { + "epoch": 0.04374536993815268, + "grad_norm": 4.9059240154747386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9020 + }, + { + "epoch": 0.04379386813098877, + "grad_norm": 4.876832008449128e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9030 + }, + { + "epoch": 0.043842366323824856, + "grad_norm": 4.054657438246068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9040 + }, + { + "epoch": 0.043890864516660945, + "grad_norm": 3.953309260396054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9050 + }, + { + "epoch": 0.04393936270949704, + "grad_norm": 4.572541001834907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9060 + }, + { + "epoch": 0.04398786090233313, + "grad_norm": 4.338396593084326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9070 + }, + { + "epoch": 0.04403635909516922, + "grad_norm": 4.930914656142704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9080 + }, + { + "epoch": 0.044084857288005305, + "grad_norm": 4.453223937161965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9090 + }, + { + "epoch": 0.044133355480841394, + "grad_norm": 3.6371984606375918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9100 + }, + { + "epoch": 0.04418185367367748, + "grad_norm": 3.9579977055836935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9110 + }, + { + "epoch": 0.04423035186651358, + "grad_norm": 4.016284037788864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9120 + }, + { + "epoch": 0.044278850059349666, + "grad_norm": 8.54978588904487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9130 + }, + { + "epoch": 0.044327348252185754, + "grad_norm": 3.6749827359017218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9140 + }, + { + "epoch": 0.04437584644502184, + "grad_norm": 3.812217528320616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9150 + }, + { + "epoch": 0.04442434463785793, + "grad_norm": 3.7490513022930827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9160 + }, + { + "epoch": 0.04447284283069402, + "grad_norm": 3.981920599471778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9170 + }, + { + "epoch": 0.04452134102353011, + "grad_norm": 3.843380454782164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9180 + }, + { + "epoch": 0.0445698392163662, + "grad_norm": 3.704663868120406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9190 + }, + { + "epoch": 0.04461833740920229, + "grad_norm": 3.7123857055121334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9200 + }, + { + "epoch": 0.04466683560203838, + "grad_norm": 4.193291260889964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9210 + }, + { + "epoch": 0.04471533379487447, + "grad_norm": 4.256860393070383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9220 + }, + { + "epoch": 0.044763831987710556, + "grad_norm": 4.051199084642576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9230 + }, + { + "epoch": 0.044812330180546645, + "grad_norm": 3.3698354400257813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9240 + }, + { + "epoch": 0.04486082837338274, + "grad_norm": 3.448422830842901e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9250 + }, + { + "epoch": 0.04490932656621883, + "grad_norm": 3.776534867938608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9260 + }, + { + "epoch": 0.04495782475905492, + "grad_norm": 3.688107199195656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9270 + }, + { + "epoch": 0.045006322951891005, + "grad_norm": 4.067668669449631e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9280 + }, + { + "epoch": 0.04505482114472709, + "grad_norm": 3.7274623991834233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9290 + }, + { + "epoch": 0.04510331933756318, + "grad_norm": 3.496364342936431e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9300 + }, + { + "epoch": 0.04515181753039928, + "grad_norm": 3.848217602353543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9310 + }, + { + "epoch": 0.045200315723235365, + "grad_norm": 3.617825086621451e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9320 + }, + { + "epoch": 0.045248813916071454, + "grad_norm": 3.808725523413159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9330 + }, + { + "epoch": 0.04529731210890754, + "grad_norm": 3.2963744160952047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9340 + }, + { + "epoch": 0.04534581030174363, + "grad_norm": 3.163283508911263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9350 + }, + { + "epoch": 0.04539430849457972, + "grad_norm": 3.910799478035187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9360 + }, + { + "epoch": 0.04544280668741581, + "grad_norm": 3.381737087693182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9370 + }, + { + "epoch": 0.0454913048802519, + "grad_norm": 3.926811587007251e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9380 + }, + { + "epoch": 0.04553980307308799, + "grad_norm": 3.139818772979197e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9390 + }, + { + "epoch": 0.04558830126592408, + "grad_norm": 4.12154759033001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9400 + }, + { + "epoch": 0.04563679945876017, + "grad_norm": 0.0018025252502411604, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 9410 + }, + { + "epoch": 0.045685297651596256, + "grad_norm": 0.00022556497424375266, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 9420 + }, + { + "epoch": 0.045733795844432344, + "grad_norm": 0.000531529716681689, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9430 + }, + { + "epoch": 0.04578229403726844, + "grad_norm": 0.00042388326255604625, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 9440 + }, + { + "epoch": 0.04583079223010453, + "grad_norm": 6.017644045641646e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9450 + }, + { + "epoch": 0.045879290422940616, + "grad_norm": 0.0043733734637498856, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 9460 + }, + { + "epoch": 0.045927788615776705, + "grad_norm": 0.00016566610429435968, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9470 + }, + { + "epoch": 0.04597628680861279, + "grad_norm": 2.7741512894863263e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9480 + }, + { + "epoch": 0.04602478500144888, + "grad_norm": 2.0665369447669946e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9490 + }, + { + "epoch": 0.04607328319428497, + "grad_norm": 1.8071650629281066e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9500 + }, + { + "epoch": 0.046121781387121065, + "grad_norm": 2.006798968068324e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9510 + }, + { + "epoch": 0.04617027957995715, + "grad_norm": 1.7820555513026193e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9520 + }, + { + "epoch": 0.04621877777279324, + "grad_norm": 1.63431996043073e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9530 + }, + { + "epoch": 0.04626727596562933, + "grad_norm": 1.4089758224145044e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9540 + }, + { + "epoch": 0.04631577415846542, + "grad_norm": 1.275456634175498e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9550 + }, + { + "epoch": 0.04636427235130151, + "grad_norm": 1.5379822798422538e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9560 + }, + { + "epoch": 0.0464127705441376, + "grad_norm": 1.872659231594298e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9570 + }, + { + "epoch": 0.04646126873697369, + "grad_norm": 1.3439524991554208e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9580 + }, + { + "epoch": 0.04650976692980978, + "grad_norm": 1.10854089143686e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9590 + }, + { + "epoch": 0.04655826512264587, + "grad_norm": 1.2029768186039291e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9600 + }, + { + "epoch": 0.046606763315481956, + "grad_norm": 1.2474602954171132e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9610 + }, + { + "epoch": 0.046655261508318044, + "grad_norm": 1.1225523849134333e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9620 + }, + { + "epoch": 0.04670375970115414, + "grad_norm": 1.1144649761263281e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9630 + }, + { + "epoch": 0.04675225789399023, + "grad_norm": 1.0229542567685712e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9640 + }, + { + "epoch": 0.046800756086826316, + "grad_norm": 9.595409210305661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9650 + }, + { + "epoch": 0.046849254279662404, + "grad_norm": 1.0723064406192861e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9660 + }, + { + "epoch": 0.04689775247249849, + "grad_norm": 1.047573096002452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9670 + }, + { + "epoch": 0.04694625066533458, + "grad_norm": 1.0103827662533149e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9680 + }, + { + "epoch": 0.04699474885817067, + "grad_norm": 8.881133908289485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9690 + }, + { + "epoch": 0.047043247051006765, + "grad_norm": 8.562998118577525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9700 + }, + { + "epoch": 0.04709174524384285, + "grad_norm": 9.210010830429383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9710 + }, + { + "epoch": 0.04714024343667894, + "grad_norm": 1.602974225534126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9720 + }, + { + "epoch": 0.04718874162951503, + "grad_norm": 9.007105290947948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9730 + }, + { + "epoch": 0.04723723982235112, + "grad_norm": 7.429635843436699e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9740 + }, + { + "epoch": 0.047285738015187206, + "grad_norm": 7.342392564169131e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9750 + }, + { + "epoch": 0.0473342362080233, + "grad_norm": 8.212986358557828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9760 + }, + { + "epoch": 0.04738273440085939, + "grad_norm": 7.391919098154176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9770 + }, + { + "epoch": 0.04743123259369548, + "grad_norm": 7.734554856142495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9780 + }, + { + "epoch": 0.04747973078653157, + "grad_norm": 6.9122161221457645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9790 + }, + { + "epoch": 0.047528228979367655, + "grad_norm": 6.591081273654709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9800 + }, + { + "epoch": 0.047576727172203744, + "grad_norm": 6.712503363814903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9810 + }, + { + "epoch": 0.04762522536503983, + "grad_norm": 7.2000502768787555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9820 + }, + { + "epoch": 0.04767372355787593, + "grad_norm": 1.7214842955581844e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9830 + }, + { + "epoch": 0.047722221750712016, + "grad_norm": 6.199142717377981e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9840 + }, + { + "epoch": 0.047770719943548104, + "grad_norm": 6.638089416810544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9850 + }, + { + "epoch": 0.04781921813638419, + "grad_norm": 6.409263278328581e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9860 + }, + { + "epoch": 0.04786771632922028, + "grad_norm": 8.01106943981722e-06, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 9870 + }, + { + "epoch": 0.04791621452205637, + "grad_norm": 2.6064493795274757e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9880 + }, + { + "epoch": 0.047964712714892464, + "grad_norm": 1.0608064258121885e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9890 + }, + { + "epoch": 0.04801321090772855, + "grad_norm": 1.5660256394767202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9900 + }, + { + "epoch": 0.04806170910056464, + "grad_norm": 0.0013411894906312227, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 9910 + }, + { + "epoch": 0.04811020729340073, + "grad_norm": 0.00035520290839485824, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 9920 + }, + { + "epoch": 0.04815870548623682, + "grad_norm": 0.0005328473052941263, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9930 + }, + { + "epoch": 0.048207203679072906, + "grad_norm": 0.0001534269395051524, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9940 + }, + { + "epoch": 0.048255701871909, + "grad_norm": 7.305444160010666e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9950 + }, + { + "epoch": 0.04830420006474509, + "grad_norm": 2.4335531634278595e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9960 + }, + { + "epoch": 0.04835269825758118, + "grad_norm": 2.3453203539247625e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9970 + }, + { + "epoch": 0.048401196450417266, + "grad_norm": 2.856938590412028e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9980 + }, + { + "epoch": 0.048449694643253355, + "grad_norm": 0.009913315996527672, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 9990 + }, + { + "epoch": 0.04849819283608944, + "grad_norm": 1.2470693945942912e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10000 + }, + { + "epoch": 0.04854669102892553, + "grad_norm": 2.1523612304008566e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10010 + }, + { + "epoch": 0.04859518922176163, + "grad_norm": 1.4192952221492305e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 10020 + }, + { + "epoch": 0.048643687414597715, + "grad_norm": 7.170121534727514e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10030 + }, + { + "epoch": 0.048692185607433804, + "grad_norm": 3.352250496391207e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10040 + }, + { + "epoch": 0.04874068380026989, + "grad_norm": 2.0908521037199534e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10050 + }, + { + "epoch": 0.04878918199310598, + "grad_norm": 3.319197639939375e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10060 + }, + { + "epoch": 0.04883768018594207, + "grad_norm": 2.1190389816183597e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10070 + }, + { + "epoch": 0.048886178378778164, + "grad_norm": 1.4210853805707302e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10080 + }, + { + "epoch": 0.04893467657161425, + "grad_norm": 1.9098726625088602e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10090 + }, + { + "epoch": 0.04898317476445034, + "grad_norm": 2.315339406777639e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10100 + }, + { + "epoch": 0.04903167295728643, + "grad_norm": 8.661232641316019e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10110 + }, + { + "epoch": 0.04908017115012252, + "grad_norm": 8.359824278159067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10120 + }, + { + "epoch": 0.049128669342958606, + "grad_norm": 8.215866728278343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10130 + }, + { + "epoch": 0.049177167535794694, + "grad_norm": 7.237728823383804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10140 + }, + { + "epoch": 0.04922566572863079, + "grad_norm": 7.95389678387437e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10150 + }, + { + "epoch": 0.04927416392146688, + "grad_norm": 8.943589818954933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10160 + }, + { + "epoch": 0.049322662114302966, + "grad_norm": 6.547537850565277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10170 + }, + { + "epoch": 0.049371160307139055, + "grad_norm": 6.014427526679356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10180 + }, + { + "epoch": 0.04941965849997514, + "grad_norm": 7.641005140612833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10190 + }, + { + "epoch": 0.04946815669281123, + "grad_norm": 5.074150976724923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10200 + }, + { + "epoch": 0.04951665488564733, + "grad_norm": 6.5817212089314125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10210 + }, + { + "epoch": 0.049565153078483415, + "grad_norm": 2.763122392934747e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 10220 + }, + { + "epoch": 0.0496136512713195, + "grad_norm": 0.0020755541045218706, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10230 + }, + { + "epoch": 0.04966214946415559, + "grad_norm": 6.769236642867327e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 10240 + }, + { + "epoch": 0.04971064765699168, + "grad_norm": 0.00046118462341837585, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10250 + }, + { + "epoch": 0.04975914584982777, + "grad_norm": 6.268373545026407e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10260 + }, + { + "epoch": 0.049807644042663864, + "grad_norm": 0.00010861671034945175, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10270 + }, + { + "epoch": 0.04985614223549995, + "grad_norm": 3.608133920351975e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10280 + }, + { + "epoch": 0.04990464042833604, + "grad_norm": 4.773667023982853e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10290 + }, + { + "epoch": 0.04995313862117213, + "grad_norm": 3.3673968573566526e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10300 + }, + { + "epoch": 0.05000163681400822, + "grad_norm": 2.075587872241158e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10310 + }, + { + "epoch": 0.050050135006844305, + "grad_norm": 2.138114177796524e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10320 + }, + { + "epoch": 0.050098633199680394, + "grad_norm": 1.6004529243218713e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10330 + }, + { + "epoch": 0.05014713139251649, + "grad_norm": 1.6046747987275012e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10340 + }, + { + "epoch": 0.05019562958535258, + "grad_norm": 1.7824264432420023e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10350 + }, + { + "epoch": 0.050244127778188666, + "grad_norm": 1.3925768143963069e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10360 + }, + { + "epoch": 0.050292625971024754, + "grad_norm": 1.5319830708904192e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 10370 + }, + { + "epoch": 0.05034112416386084, + "grad_norm": 0.13771001994609833, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 10380 + }, + { + "epoch": 0.05038962235669693, + "grad_norm": 0.0026809119153767824, + "learning_rate": 0.0002, + "loss": 0.0028, + "step": 10390 + }, + { + "epoch": 0.050438120549533026, + "grad_norm": 0.00020957487868145108, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 10400 + }, + { + "epoch": 0.050486618742369115, + "grad_norm": 0.00011703507334459573, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 10410 + }, + { + "epoch": 0.0505351169352052, + "grad_norm": 0.03168646618723869, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 10420 + }, + { + "epoch": 0.05058361512804129, + "grad_norm": 0.0001316604611929506, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10430 + }, + { + "epoch": 0.05063211332087738, + "grad_norm": 0.00020453822799026966, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 10440 + }, + { + "epoch": 0.05068061151371347, + "grad_norm": 0.00020978794782422483, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10450 + }, + { + "epoch": 0.050729109706549556, + "grad_norm": 0.0001208620160468854, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10460 + }, + { + "epoch": 0.05077760789938565, + "grad_norm": 0.0004944897373206913, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 10470 + }, + { + "epoch": 0.05082610609222174, + "grad_norm": 0.0001827398664318025, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10480 + }, + { + "epoch": 0.05087460428505783, + "grad_norm": 7.083562377374619e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10490 + }, + { + "epoch": 0.05092310247789392, + "grad_norm": 4.185294164926745e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10500 + }, + { + "epoch": 0.050971600670730005, + "grad_norm": 4.843409624299966e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10510 + }, + { + "epoch": 0.051020098863566093, + "grad_norm": 0.02163783088326454, + "learning_rate": 0.0002, + "loss": 0.0032, + "step": 10520 + }, + { + "epoch": 0.05106859705640219, + "grad_norm": 0.01121087372303009, + "learning_rate": 0.0002, + "loss": 0.0094, + "step": 10530 + }, + { + "epoch": 0.05111709524923828, + "grad_norm": 0.004404266364872456, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 10540 + }, + { + "epoch": 0.051165593442074365, + "grad_norm": 0.00022970781719777733, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 10550 + }, + { + "epoch": 0.051214091634910454, + "grad_norm": 0.006822929717600346, + "learning_rate": 0.0002, + "loss": 0.1078, + "step": 10560 + }, + { + "epoch": 0.05126258982774654, + "grad_norm": 0.07804814726114273, + "learning_rate": 0.0002, + "loss": 0.0023, + "step": 10570 + }, + { + "epoch": 0.05131108802058263, + "grad_norm": 0.00026928444276563823, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 10580 + }, + { + "epoch": 0.051359586213418726, + "grad_norm": 0.00015052717935759574, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10590 + }, + { + "epoch": 0.051408084406254814, + "grad_norm": 0.00010209638276137412, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10600 + }, + { + "epoch": 0.0514565825990909, + "grad_norm": 9.865434549283236e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10610 + }, + { + "epoch": 0.05150508079192699, + "grad_norm": 7.08606603438966e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10620 + }, + { + "epoch": 0.05155357898476308, + "grad_norm": 6.403225415851921e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10630 + }, + { + "epoch": 0.05160207717759917, + "grad_norm": 4.556926796794869e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10640 + }, + { + "epoch": 0.051650575370435256, + "grad_norm": 3.60831108991988e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10650 + }, + { + "epoch": 0.05169907356327135, + "grad_norm": 4.117544449400157e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10660 + }, + { + "epoch": 0.05174757175610744, + "grad_norm": 4.260629793861881e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10670 + }, + { + "epoch": 0.05179606994894353, + "grad_norm": 4.126084968447685e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10680 + }, + { + "epoch": 0.051844568141779616, + "grad_norm": 4.470795829547569e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10690 + }, + { + "epoch": 0.051893066334615705, + "grad_norm": 2.800757829390932e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10700 + }, + { + "epoch": 0.05194156452745179, + "grad_norm": 3.264260885771364e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10710 + }, + { + "epoch": 0.05199006272028789, + "grad_norm": 3.3157259167637676e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10720 + }, + { + "epoch": 0.05203856091312398, + "grad_norm": 2.9349825126701035e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10730 + }, + { + "epoch": 0.052087059105960065, + "grad_norm": 2.289807343913708e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10740 + }, + { + "epoch": 0.052135557298796154, + "grad_norm": 2.563581983849872e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10750 + }, + { + "epoch": 0.05218405549163224, + "grad_norm": 2.7204823709325865e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10760 + }, + { + "epoch": 0.05223255368446833, + "grad_norm": 3.425751856411807e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 10770 + }, + { + "epoch": 0.05228105187730442, + "grad_norm": 6.516183202620596e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10780 + }, + { + "epoch": 0.052329550070140514, + "grad_norm": 5.1227223593741655e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10790 + }, + { + "epoch": 0.0523780482629766, + "grad_norm": 0.0002602529712021351, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10800 + }, + { + "epoch": 0.05242654645581269, + "grad_norm": 9.30533351493068e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10810 + }, + { + "epoch": 0.05247504464864878, + "grad_norm": 4.300653381505981e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10820 + }, + { + "epoch": 0.05252354284148487, + "grad_norm": 6.307217699941248e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10830 + }, + { + "epoch": 0.052572041034320956, + "grad_norm": 2.209892772953026e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10840 + }, + { + "epoch": 0.05262053922715705, + "grad_norm": 1.9500850612530485e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10850 + }, + { + "epoch": 0.05266903741999314, + "grad_norm": 3.228145214961842e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10860 + }, + { + "epoch": 0.05271753561282923, + "grad_norm": 2.9730708774877712e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10870 + }, + { + "epoch": 0.052766033805665316, + "grad_norm": 3.313919296488166e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10880 + }, + { + "epoch": 0.052814531998501404, + "grad_norm": 1.4078094864089508e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10890 + }, + { + "epoch": 0.05286303019133749, + "grad_norm": 1.7005711924866773e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10900 + }, + { + "epoch": 0.05291152838417359, + "grad_norm": 2.722844874369912e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10910 + }, + { + "epoch": 0.052960026577009676, + "grad_norm": 3.2384654332417995e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 10920 + }, + { + "epoch": 0.053008524769845765, + "grad_norm": 4.2662155465222895e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10930 + }, + { + "epoch": 0.05305702296268185, + "grad_norm": 1.702200825093314e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10940 + }, + { + "epoch": 0.05310552115551794, + "grad_norm": 2.0079563910258003e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10950 + }, + { + "epoch": 0.05315401934835403, + "grad_norm": 3.322585325804539e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10960 + }, + { + "epoch": 0.05320251754119012, + "grad_norm": 4.0509741666028276e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10970 + }, + { + "epoch": 0.053251015734026214, + "grad_norm": 5.072906060377136e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10980 + }, + { + "epoch": 0.0532995139268623, + "grad_norm": 1.5648967746528797e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 10990 + }, + { + "epoch": 0.05334801211969839, + "grad_norm": 2.1187639504205436e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11000 + }, + { + "epoch": 0.05339651031253448, + "grad_norm": 4.234746302245185e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11010 + }, + { + "epoch": 0.05344500850537057, + "grad_norm": 2.5650919269537553e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11020 + }, + { + "epoch": 0.053493506698206655, + "grad_norm": 2.2488999093184248e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11030 + }, + { + "epoch": 0.05354200489104275, + "grad_norm": 1.242850976268528e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11040 + }, + { + "epoch": 0.05359050308387884, + "grad_norm": 1.850503213063348e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11050 + }, + { + "epoch": 0.05363900127671493, + "grad_norm": 2.242276787001174e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11060 + }, + { + "epoch": 0.053687499469551016, + "grad_norm": 2.3697488359175622e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11070 + }, + { + "epoch": 0.053735997662387104, + "grad_norm": 2.2779015125706792e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11080 + }, + { + "epoch": 0.05378449585522319, + "grad_norm": 1.2034762221446726e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11090 + }, + { + "epoch": 0.05383299404805928, + "grad_norm": 1.0664701221685391e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11100 + }, + { + "epoch": 0.053881492240895376, + "grad_norm": 1.515158419351792e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11110 + }, + { + "epoch": 0.053929990433731465, + "grad_norm": 4.1175648220814764e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11120 + }, + { + "epoch": 0.05397848862656755, + "grad_norm": 2.077060162264388e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11130 + }, + { + "epoch": 0.05402698681940364, + "grad_norm": 1.0967433809128124e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11140 + }, + { + "epoch": 0.05407548501223973, + "grad_norm": 1.0497791663510725e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11150 + }, + { + "epoch": 0.05412398320507582, + "grad_norm": 1.5407073078677058e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11160 + }, + { + "epoch": 0.05417248139791191, + "grad_norm": 1.2788970707333647e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11170 + }, + { + "epoch": 0.054220979590748, + "grad_norm": 1.4676373211841565e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11180 + }, + { + "epoch": 0.05426947778358409, + "grad_norm": 8.6364971139119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11190 + }, + { + "epoch": 0.05431797597642018, + "grad_norm": 9.02056763152359e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11200 + }, + { + "epoch": 0.05436647416925627, + "grad_norm": 1.4194788491295185e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11210 + }, + { + "epoch": 0.054414972362092355, + "grad_norm": 1.1208980140509084e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11220 + }, + { + "epoch": 0.05446347055492845, + "grad_norm": 1.3012527233513538e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11230 + }, + { + "epoch": 0.05451196874776454, + "grad_norm": 8.277707820525393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11240 + }, + { + "epoch": 0.05456046694060063, + "grad_norm": 7.554419880761998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11250 + }, + { + "epoch": 0.054608965133436715, + "grad_norm": 1.0980612387356814e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11260 + }, + { + "epoch": 0.054657463326272804, + "grad_norm": 1.0259945156576578e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11270 + }, + { + "epoch": 0.05470596151910889, + "grad_norm": 1.0266744538967032e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11280 + }, + { + "epoch": 0.05475445971194498, + "grad_norm": 1.568038533150684e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 11290 + }, + { + "epoch": 0.054802957904781076, + "grad_norm": 2.732918619585689e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11300 + }, + { + "epoch": 0.054851456097617164, + "grad_norm": 0.04310668632388115, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 11310 + }, + { + "epoch": 0.05489995429045325, + "grad_norm": 0.027207383885979652, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 11320 + }, + { + "epoch": 0.05494845248328934, + "grad_norm": 3.866890983772464e-05, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 11330 + }, + { + "epoch": 0.05499695067612543, + "grad_norm": 0.3306594491004944, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 11340 + }, + { + "epoch": 0.05504544886896152, + "grad_norm": 0.0014261690666899085, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 11350 + }, + { + "epoch": 0.05509394706179761, + "grad_norm": 0.013675367459654808, + "learning_rate": 0.0002, + "loss": 0.0049, + "step": 11360 + }, + { + "epoch": 0.0551424452546337, + "grad_norm": 0.00046469023800455034, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 11370 + }, + { + "epoch": 0.05519094344746979, + "grad_norm": 7.429649122059345e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11380 + }, + { + "epoch": 0.05523944164030588, + "grad_norm": 8.091307245194912e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11390 + }, + { + "epoch": 0.055287939833141966, + "grad_norm": 4.890705531579442e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11400 + }, + { + "epoch": 0.055336438025978055, + "grad_norm": 5.9566067648120224e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11410 + }, + { + "epoch": 0.05538493621881414, + "grad_norm": 0.00010811805259436369, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11420 + }, + { + "epoch": 0.05543343441165024, + "grad_norm": 3.084894342464395e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11430 + }, + { + "epoch": 0.05548193260448633, + "grad_norm": 2.6687617719289847e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11440 + }, + { + "epoch": 0.055530430797322415, + "grad_norm": 2.961488462460693e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11450 + }, + { + "epoch": 0.0555789289901585, + "grad_norm": 8.404957770835608e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11460 + }, + { + "epoch": 0.05562742718299459, + "grad_norm": 0.003164599882438779, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11470 + }, + { + "epoch": 0.05567592537583068, + "grad_norm": 2.507628596504219e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11480 + }, + { + "epoch": 0.055724423568666775, + "grad_norm": 2.1095383999636397e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11490 + }, + { + "epoch": 0.055772921761502864, + "grad_norm": 2.0506455257418565e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11500 + }, + { + "epoch": 0.05582141995433895, + "grad_norm": 2.08102665055776e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11510 + }, + { + "epoch": 0.05586991814717504, + "grad_norm": 2.9322089176275767e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11520 + }, + { + "epoch": 0.05591841634001113, + "grad_norm": 1.7837386621977203e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11530 + }, + { + "epoch": 0.05596691453284722, + "grad_norm": 1.6148571376106702e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11540 + }, + { + "epoch": 0.05601541272568331, + "grad_norm": 1.9342953237355687e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11550 + }, + { + "epoch": 0.0560639109185194, + "grad_norm": 1.6058625988080166e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11560 + }, + { + "epoch": 0.05611240911135549, + "grad_norm": 1.5650817658752203e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11570 + }, + { + "epoch": 0.05616090730419158, + "grad_norm": 1.528153552499134e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11580 + }, + { + "epoch": 0.056209405497027666, + "grad_norm": 1.2588055142259691e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11590 + }, + { + "epoch": 0.056257903689863754, + "grad_norm": 1.1849772818095516e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11600 + }, + { + "epoch": 0.05630640188269984, + "grad_norm": 1.475080080126645e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11610 + }, + { + "epoch": 0.05635490007553594, + "grad_norm": 1.4556434507539961e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11620 + }, + { + "epoch": 0.056403398268372026, + "grad_norm": 1.4328184988698922e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11630 + }, + { + "epoch": 0.056451896461208115, + "grad_norm": 1.055444408848416e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11640 + }, + { + "epoch": 0.0565003946540442, + "grad_norm": 1.0305478099326137e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11650 + }, + { + "epoch": 0.05654889284688029, + "grad_norm": 0.00014374739839695394, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11660 + }, + { + "epoch": 0.05659739103971638, + "grad_norm": 1.4891128557792399e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11670 + }, + { + "epoch": 0.056645889232552475, + "grad_norm": 1.2957269973412622e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11680 + }, + { + "epoch": 0.056694387425388564, + "grad_norm": 1.012346547213383e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11690 + }, + { + "epoch": 0.05674288561822465, + "grad_norm": 8.446214451396372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11700 + }, + { + "epoch": 0.05679138381106074, + "grad_norm": 1.2167749446234666e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11710 + }, + { + "epoch": 0.05683988200389683, + "grad_norm": 1.397819596604677e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11720 + }, + { + "epoch": 0.05688838019673292, + "grad_norm": 1.2254068678885233e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11730 + }, + { + "epoch": 0.056936878389569005, + "grad_norm": 8.535973393009044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11740 + }, + { + "epoch": 0.0569853765824051, + "grad_norm": 8.091907147900201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11750 + }, + { + "epoch": 0.05703387477524119, + "grad_norm": 0.00014916791405994445, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11760 + }, + { + "epoch": 0.05708237296807728, + "grad_norm": 1.0985717381117865e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11770 + }, + { + "epoch": 0.057130871160913366, + "grad_norm": 9.897871677821968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11780 + }, + { + "epoch": 0.057179369353749454, + "grad_norm": 8.586201147409156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11790 + }, + { + "epoch": 0.05722786754658554, + "grad_norm": 8.351827091246378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11800 + }, + { + "epoch": 0.05727636573942164, + "grad_norm": 1.0156704774999525e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11810 + }, + { + "epoch": 0.057324863932257726, + "grad_norm": 8.948689355747774e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11820 + }, + { + "epoch": 0.057373362125093814, + "grad_norm": 8.85082954482641e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11830 + }, + { + "epoch": 0.0574218603179299, + "grad_norm": 8.842878742143512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11840 + }, + { + "epoch": 0.05747035851076599, + "grad_norm": 8.623343092040159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11850 + }, + { + "epoch": 0.05751885670360208, + "grad_norm": 8.431291462329682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11860 + }, + { + "epoch": 0.057567354896438175, + "grad_norm": 7.926279067760333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11870 + }, + { + "epoch": 0.05761585308927426, + "grad_norm": 8.121510290948208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11880 + }, + { + "epoch": 0.05766435128211035, + "grad_norm": 7.262471171998186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11890 + }, + { + "epoch": 0.05771284947494644, + "grad_norm": 6.119023964856751e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11900 + }, + { + "epoch": 0.05776134766778253, + "grad_norm": 9.644483725423925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11910 + }, + { + "epoch": 0.05780984586061862, + "grad_norm": 7.752562851237599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11920 + }, + { + "epoch": 0.057858344053454705, + "grad_norm": 7.4203708209097385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11930 + }, + { + "epoch": 0.0579068422462908, + "grad_norm": 6.020677574269939e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11940 + }, + { + "epoch": 0.05795534043912689, + "grad_norm": 5.580329343501944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11950 + }, + { + "epoch": 0.05800383863196298, + "grad_norm": 8.641599379188847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11960 + }, + { + "epoch": 0.058052336824799065, + "grad_norm": 8.233764674514532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 11970 + }, + { + "epoch": 0.058100835017635154, + "grad_norm": 1.6703263099770993e-05, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 11980 + }, + { + "epoch": 0.05814933321047124, + "grad_norm": 4.5719294575974345e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 11990 + }, + { + "epoch": 0.05819783140330734, + "grad_norm": 9.848875924944878e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12000 + }, + { + "epoch": 0.058246329596143426, + "grad_norm": 5.054990833741613e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 12010 + }, + { + "epoch": 0.058294827788979514, + "grad_norm": 3.479682345641777e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12020 + }, + { + "epoch": 0.0583433259818156, + "grad_norm": 3.0263840017141774e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12030 + }, + { + "epoch": 0.05839182417465169, + "grad_norm": 1.2773120033671148e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12040 + }, + { + "epoch": 0.05844032236748778, + "grad_norm": 1.3094450878270436e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12050 + }, + { + "epoch": 0.05848882056032387, + "grad_norm": 2.693448004720267e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12060 + }, + { + "epoch": 0.05853731875315996, + "grad_norm": 1.978775253519416e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12070 + }, + { + "epoch": 0.05858581694599605, + "grad_norm": 1.8218026525573805e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12080 + }, + { + "epoch": 0.05863431513883214, + "grad_norm": 1.0681519597710576e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12090 + }, + { + "epoch": 0.05868281333166823, + "grad_norm": 0.00021944277978036553, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12100 + }, + { + "epoch": 0.058731311524504316, + "grad_norm": 1.432143835700117e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12110 + }, + { + "epoch": 0.058779809717340405, + "grad_norm": 1.598077687958721e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12120 + }, + { + "epoch": 0.0588283079101765, + "grad_norm": 1.2905218682135455e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12130 + }, + { + "epoch": 0.05887680610301259, + "grad_norm": 8.17573891254142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12140 + }, + { + "epoch": 0.05892530429584868, + "grad_norm": 7.961803021316882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12150 + }, + { + "epoch": 0.058973802488684765, + "grad_norm": 1.1486662515380885e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12160 + }, + { + "epoch": 0.05902230068152085, + "grad_norm": 1.0461857527843677e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12170 + }, + { + "epoch": 0.05907079887435694, + "grad_norm": 9.885652616503648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12180 + }, + { + "epoch": 0.05911929706719304, + "grad_norm": 6.9895668275421485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12190 + }, + { + "epoch": 0.059167795260029125, + "grad_norm": 7.763629582768772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12200 + }, + { + "epoch": 0.059216293452865214, + "grad_norm": 9.349068932351656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12210 + }, + { + "epoch": 0.0592647916457013, + "grad_norm": 8.73904900799971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12220 + }, + { + "epoch": 0.05931328983853739, + "grad_norm": 9.262197636417113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12230 + }, + { + "epoch": 0.05936178803137348, + "grad_norm": 6.853078957647085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12240 + }, + { + "epoch": 0.05941028622420957, + "grad_norm": 6.750432930857642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12250 + }, + { + "epoch": 0.05945878441704566, + "grad_norm": 8.289699508168269e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12260 + }, + { + "epoch": 0.05950728260988175, + "grad_norm": 8.918625098885968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12270 + }, + { + "epoch": 0.05955578080271784, + "grad_norm": 7.747496965748724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12280 + }, + { + "epoch": 0.05960427899555393, + "grad_norm": 6.132477210485376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12290 + }, + { + "epoch": 0.059652777188390016, + "grad_norm": 5.617330316454172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12300 + }, + { + "epoch": 0.059701275381226104, + "grad_norm": 7.782743523421232e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12310 + }, + { + "epoch": 0.0597497735740622, + "grad_norm": 7.530819857493043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12320 + }, + { + "epoch": 0.05979827176689829, + "grad_norm": 7.74822910898365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12330 + }, + { + "epoch": 0.059846769959734376, + "grad_norm": 5.418045930127846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12340 + }, + { + "epoch": 0.059895268152570465, + "grad_norm": 5.6031221902230754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12350 + }, + { + "epoch": 0.05994376634540655, + "grad_norm": 7.207685030152788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12360 + }, + { + "epoch": 0.05999226453824264, + "grad_norm": 6.7209584813099355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12370 + }, + { + "epoch": 0.06004076273107873, + "grad_norm": 1.2230938409629744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12380 + }, + { + "epoch": 0.060089260923914825, + "grad_norm": 5.012604106013896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12390 + }, + { + "epoch": 0.06013775911675091, + "grad_norm": 4.646986326406477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12400 + }, + { + "epoch": 0.060186257309587, + "grad_norm": 6.332207703962922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12410 + }, + { + "epoch": 0.06023475550242309, + "grad_norm": 6.571663561771857e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12420 + }, + { + "epoch": 0.06028325369525918, + "grad_norm": 6.362512976920698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12430 + }, + { + "epoch": 0.06033175188809527, + "grad_norm": 4.856052782997722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12440 + }, + { + "epoch": 0.06038025008093136, + "grad_norm": 2.1144580387044698e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12450 + }, + { + "epoch": 0.06042874827376745, + "grad_norm": 5.687448265234707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12460 + }, + { + "epoch": 0.06047724646660354, + "grad_norm": 6.067655249353265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12470 + }, + { + "epoch": 0.06052574465943963, + "grad_norm": 5.263765160634648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12480 + }, + { + "epoch": 0.060574242852275716, + "grad_norm": 5.279014658299275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12490 + }, + { + "epoch": 0.060622741045111804, + "grad_norm": 1.4500376892101485e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12500 + }, + { + "epoch": 0.0606712392379479, + "grad_norm": 5.445530405268073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12510 + }, + { + "epoch": 0.06071973743078399, + "grad_norm": 5.47821127838688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12520 + }, + { + "epoch": 0.060768235623620076, + "grad_norm": 5.712945039704209e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12530 + }, + { + "epoch": 0.060816733816456164, + "grad_norm": 5.815319127577823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12540 + }, + { + "epoch": 0.06086523200929225, + "grad_norm": 3.8882171793375164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12550 + }, + { + "epoch": 0.06091373020212834, + "grad_norm": 5.192013304622378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12560 + }, + { + "epoch": 0.06096222839496443, + "grad_norm": 5.821735612698831e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12570 + }, + { + "epoch": 0.061010726587800525, + "grad_norm": 4.595379323291127e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12580 + }, + { + "epoch": 0.06105922478063661, + "grad_norm": 5.275592229736503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12590 + }, + { + "epoch": 0.0611077229734727, + "grad_norm": 4.1279995457443874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12600 + }, + { + "epoch": 0.06115622116630879, + "grad_norm": 5.036626134824473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12610 + }, + { + "epoch": 0.06120471935914488, + "grad_norm": 9.1088068074896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12620 + }, + { + "epoch": 0.06125321755198097, + "grad_norm": 6.03677563049132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12630 + }, + { + "epoch": 0.06130171574481706, + "grad_norm": 4.209737198834773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12640 + }, + { + "epoch": 0.06135021393765315, + "grad_norm": 4.1048442653845996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12650 + }, + { + "epoch": 0.06139871213048924, + "grad_norm": 4.802159764949465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12660 + }, + { + "epoch": 0.06144721032332533, + "grad_norm": 4.619710125552956e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12670 + }, + { + "epoch": 0.061495708516161415, + "grad_norm": 4.586518116411753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12680 + }, + { + "epoch": 0.061544206708997504, + "grad_norm": 3.6193400774209294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12690 + }, + { + "epoch": 0.06159270490183359, + "grad_norm": 3.445836910032085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12700 + }, + { + "epoch": 0.06164120309466969, + "grad_norm": 4.44789657194633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12710 + }, + { + "epoch": 0.061689701287505776, + "grad_norm": 4.484507826418849e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12720 + }, + { + "epoch": 0.061738199480341864, + "grad_norm": 4.293099209462525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12730 + }, + { + "epoch": 0.06178669767317795, + "grad_norm": 3.2998930237226887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12740 + }, + { + "epoch": 0.06183519586601404, + "grad_norm": 3.5016503261431353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12750 + }, + { + "epoch": 0.06188369405885013, + "grad_norm": 4.449444531928748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12760 + }, + { + "epoch": 0.061932192251686224, + "grad_norm": 4.196528152533574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12770 + }, + { + "epoch": 0.06198069044452231, + "grad_norm": 4.566743427858455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12780 + }, + { + "epoch": 0.0620291886373584, + "grad_norm": 3.632077323345584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12790 + }, + { + "epoch": 0.06207768683019449, + "grad_norm": 3.784907448789454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12800 + }, + { + "epoch": 0.06212618502303058, + "grad_norm": 2.8310429115663283e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12810 + }, + { + "epoch": 0.062174683215866666, + "grad_norm": 3.5813920931104803e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12820 + }, + { + "epoch": 0.06222318140870276, + "grad_norm": 4.03835065299063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12830 + }, + { + "epoch": 0.06227167960153885, + "grad_norm": 3.291489974799333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12840 + }, + { + "epoch": 0.06232017779437494, + "grad_norm": 3.1047729862621054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12850 + }, + { + "epoch": 0.06236867598721103, + "grad_norm": 4.367784640635364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12860 + }, + { + "epoch": 0.062417174180047115, + "grad_norm": 3.479125780359027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12870 + }, + { + "epoch": 0.0624656723728832, + "grad_norm": 3.6175913464830955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12880 + }, + { + "epoch": 0.0625141705657193, + "grad_norm": 3.0519140636897646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12890 + }, + { + "epoch": 0.06256266875855539, + "grad_norm": 2.992675263158162e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12900 + }, + { + "epoch": 0.06261116695139148, + "grad_norm": 3.2281395760946907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12910 + }, + { + "epoch": 0.06265966514422756, + "grad_norm": 3.1872830277279718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12920 + }, + { + "epoch": 0.06270816333706365, + "grad_norm": 3.5389150525588775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12930 + }, + { + "epoch": 0.06275666152989974, + "grad_norm": 3.0822009193798294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12940 + }, + { + "epoch": 0.06280515972273583, + "grad_norm": 3.417502284719376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12950 + }, + { + "epoch": 0.06285365791557192, + "grad_norm": 3.277168843851541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12960 + }, + { + "epoch": 0.062902156108408, + "grad_norm": 6.692755960102659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12970 + }, + { + "epoch": 0.0629506543012441, + "grad_norm": 3.3125941172329476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12980 + }, + { + "epoch": 0.0629991524940802, + "grad_norm": 2.5372551135660615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 12990 + }, + { + "epoch": 0.06304765068691628, + "grad_norm": 2.700746790651465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13000 + }, + { + "epoch": 0.06309614887975237, + "grad_norm": 3.4535069062258117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13010 + }, + { + "epoch": 0.06314464707258846, + "grad_norm": 3.0785367926000617e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13020 + }, + { + "epoch": 0.06319314526542455, + "grad_norm": 3.003855226779706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13030 + }, + { + "epoch": 0.06324164345826064, + "grad_norm": 2.6186646664427826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13040 + }, + { + "epoch": 0.06329014165109673, + "grad_norm": 2.4714067876629997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13050 + }, + { + "epoch": 0.06333863984393281, + "grad_norm": 3.1026893339003436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13060 + }, + { + "epoch": 0.0633871380367689, + "grad_norm": 3.698335149238119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13070 + }, + { + "epoch": 0.06343563622960499, + "grad_norm": 5.3716385082225315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13080 + }, + { + "epoch": 0.06348413442244108, + "grad_norm": 2.9327243282750715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13090 + }, + { + "epoch": 0.06353263261527717, + "grad_norm": 3.3346304917358793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13100 + }, + { + "epoch": 0.06358113080811326, + "grad_norm": 3.399180968699511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13110 + }, + { + "epoch": 0.06362962900094936, + "grad_norm": 3.823103725153487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13120 + }, + { + "epoch": 0.06367812719378545, + "grad_norm": 2.9367447496042587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13130 + }, + { + "epoch": 0.06372662538662154, + "grad_norm": 2.586617256383761e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13140 + }, + { + "epoch": 0.06377512357945762, + "grad_norm": 2.4253936317109037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13150 + }, + { + "epoch": 0.06382362177229371, + "grad_norm": 2.747536200331524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13160 + }, + { + "epoch": 0.0638721199651298, + "grad_norm": 3.134259031867259e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13170 + }, + { + "epoch": 0.06392061815796589, + "grad_norm": 2.849435531970812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13180 + }, + { + "epoch": 0.06396911635080198, + "grad_norm": 2.4750593183853198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13190 + }, + { + "epoch": 0.06401761454363807, + "grad_norm": 2.5767353690753225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13200 + }, + { + "epoch": 0.06406611273647415, + "grad_norm": 2.5065207864827244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13210 + }, + { + "epoch": 0.06411461092931024, + "grad_norm": 2.9308946523087798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13220 + }, + { + "epoch": 0.06416310912214633, + "grad_norm": 2.603370148790418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13230 + }, + { + "epoch": 0.06421160731498242, + "grad_norm": 2.346236215089448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13240 + }, + { + "epoch": 0.06426010550781852, + "grad_norm": 0.00013334464165382087, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13250 + }, + { + "epoch": 0.06430860370065461, + "grad_norm": 2.501570861568325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13260 + }, + { + "epoch": 0.0643571018934907, + "grad_norm": 3.2868824746401515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13270 + }, + { + "epoch": 0.06440560008632679, + "grad_norm": 2.614981667647953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13280 + }, + { + "epoch": 0.06445409827916287, + "grad_norm": 2.238275556010194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13290 + }, + { + "epoch": 0.06450259647199896, + "grad_norm": 2.0994004898966523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13300 + }, + { + "epoch": 0.06455109466483505, + "grad_norm": 2.489714006514987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13310 + }, + { + "epoch": 0.06459959285767114, + "grad_norm": 2.4683949959580787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13320 + }, + { + "epoch": 0.06464809105050723, + "grad_norm": 2.466598061801051e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13330 + }, + { + "epoch": 0.06469658924334332, + "grad_norm": 2.4946270968939643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13340 + }, + { + "epoch": 0.0647450874361794, + "grad_norm": 2.448509121677489e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13350 + }, + { + "epoch": 0.0647935856290155, + "grad_norm": 2.666118007255136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13360 + }, + { + "epoch": 0.06484208382185158, + "grad_norm": 2.622376314320718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13370 + }, + { + "epoch": 0.06489058201468768, + "grad_norm": 2.3823167794034816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13380 + }, + { + "epoch": 0.06493908020752377, + "grad_norm": 2.2194681150722317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13390 + }, + { + "epoch": 0.06498757840035986, + "grad_norm": 2.0344621134427143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13400 + }, + { + "epoch": 0.06503607659319595, + "grad_norm": 3.500310640447424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13410 + }, + { + "epoch": 0.06508457478603204, + "grad_norm": 2.237379248981597e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13420 + }, + { + "epoch": 0.06513307297886813, + "grad_norm": 2.279409500260954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13430 + }, + { + "epoch": 0.06518157117170421, + "grad_norm": 2.0476009012782015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13440 + }, + { + "epoch": 0.0652300693645403, + "grad_norm": 2.1458263290696777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13450 + }, + { + "epoch": 0.06527856755737639, + "grad_norm": 2.3237557797983754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13460 + }, + { + "epoch": 0.06532706575021248, + "grad_norm": 2.5816241304710275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13470 + }, + { + "epoch": 0.06537556394304857, + "grad_norm": 2.256505695186206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13480 + }, + { + "epoch": 0.06542406213588466, + "grad_norm": 2.1003183974244166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13490 + }, + { + "epoch": 0.06547256032872074, + "grad_norm": 2.0010356820421293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13500 + }, + { + "epoch": 0.06552105852155685, + "grad_norm": 2.108904936903855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13510 + }, + { + "epoch": 0.06556955671439293, + "grad_norm": 1.993560317714582e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13520 + }, + { + "epoch": 0.06561805490722902, + "grad_norm": 2.1418218238977715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13530 + }, + { + "epoch": 0.06566655310006511, + "grad_norm": 2.0157797280262457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13540 + }, + { + "epoch": 0.0657150512929012, + "grad_norm": 2.1667699456884293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13550 + }, + { + "epoch": 0.06576354948573729, + "grad_norm": 2.1583045963780023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13560 + }, + { + "epoch": 0.06581204767857338, + "grad_norm": 2.0786615095857996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13570 + }, + { + "epoch": 0.06586054587140946, + "grad_norm": 2.2351403003995074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13580 + }, + { + "epoch": 0.06590904406424555, + "grad_norm": 2.4840585410856875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13590 + }, + { + "epoch": 0.06595754225708164, + "grad_norm": 1.8637406355992425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13600 + }, + { + "epoch": 0.06600604044991773, + "grad_norm": 2.034388671745546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13610 + }, + { + "epoch": 0.06605453864275382, + "grad_norm": 2.187343170589884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13620 + }, + { + "epoch": 0.06610303683558992, + "grad_norm": 1.941758000612026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13630 + }, + { + "epoch": 0.06615153502842601, + "grad_norm": 1.7175769926325302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13640 + }, + { + "epoch": 0.0662000332212621, + "grad_norm": 1.897720608212694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13650 + }, + { + "epoch": 0.06624853141409819, + "grad_norm": 1.9044734926865203e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13660 + }, + { + "epoch": 0.06629702960693427, + "grad_norm": 2.087850816678838e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13670 + }, + { + "epoch": 0.06634552779977036, + "grad_norm": 2.467493914082297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13680 + }, + { + "epoch": 0.06639402599260645, + "grad_norm": 1.6783282035248703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13690 + }, + { + "epoch": 0.06644252418544254, + "grad_norm": 1.7614210037208977e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13700 + }, + { + "epoch": 0.06649102237827863, + "grad_norm": 1.8673931663215626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13710 + }, + { + "epoch": 0.06653952057111472, + "grad_norm": 1.8665707557374844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13720 + }, + { + "epoch": 0.0665880187639508, + "grad_norm": 2.208944351878017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13730 + }, + { + "epoch": 0.06663651695678689, + "grad_norm": 1.7670726037977147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13740 + }, + { + "epoch": 0.06668501514962298, + "grad_norm": 2.036681735262391e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13750 + }, + { + "epoch": 0.06673351334245908, + "grad_norm": 2.003885583690135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13760 + }, + { + "epoch": 0.06678201153529517, + "grad_norm": 1.8850427068173303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13770 + }, + { + "epoch": 0.06683050972813126, + "grad_norm": 1.9932253962906543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13780 + }, + { + "epoch": 0.06687900792096735, + "grad_norm": 1.6959642152869492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13790 + }, + { + "epoch": 0.06692750611380344, + "grad_norm": 1.7095475186579279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13800 + }, + { + "epoch": 0.06697600430663952, + "grad_norm": 1.847149974310014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13810 + }, + { + "epoch": 0.06702450249947561, + "grad_norm": 2.0266538740543183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13820 + }, + { + "epoch": 0.0670730006923117, + "grad_norm": 1.8539087704994017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13830 + }, + { + "epoch": 0.06712149888514779, + "grad_norm": 1.8415679505778826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13840 + }, + { + "epoch": 0.06716999707798388, + "grad_norm": 2.6767611416289583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13850 + }, + { + "epoch": 0.06721849527081997, + "grad_norm": 2.032205884461291e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13860 + }, + { + "epoch": 0.06726699346365606, + "grad_norm": 1.7877781601782772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13870 + }, + { + "epoch": 0.06731549165649214, + "grad_norm": 1.6155203184098355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13880 + }, + { + "epoch": 0.06736398984932825, + "grad_norm": 9.518791921436787e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13890 + }, + { + "epoch": 0.06741248804216433, + "grad_norm": 1.6013846106943674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13900 + }, + { + "epoch": 0.06746098623500042, + "grad_norm": 1.751143599904026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13910 + }, + { + "epoch": 0.06750948442783651, + "grad_norm": 2.1366636246966664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13920 + }, + { + "epoch": 0.0675579826206726, + "grad_norm": 2.5033168640220538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13930 + }, + { + "epoch": 0.06760648081350869, + "grad_norm": 1.5587481811962789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13940 + }, + { + "epoch": 0.06765497900634478, + "grad_norm": 1.6965342410912854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13950 + }, + { + "epoch": 0.06770347719918086, + "grad_norm": 1.7114045931521105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13960 + }, + { + "epoch": 0.06775197539201695, + "grad_norm": 1.7291364429183886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13970 + }, + { + "epoch": 0.06780047358485304, + "grad_norm": 2.5720823941810522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13980 + }, + { + "epoch": 0.06784897177768913, + "grad_norm": 1.580026037117932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 13990 + }, + { + "epoch": 0.06789746997052522, + "grad_norm": 1.5648167845938588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14000 + }, + { + "epoch": 0.0679459681633613, + "grad_norm": 1.70693874679273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14010 + }, + { + "epoch": 0.06799446635619741, + "grad_norm": 1.5326412494687247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14020 + }, + { + "epoch": 0.0680429645490335, + "grad_norm": 1.8204598291049479e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14030 + }, + { + "epoch": 0.06809146274186959, + "grad_norm": 1.5172010989772389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14040 + }, + { + "epoch": 0.06813996093470567, + "grad_norm": 1.4603865565732121e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14050 + }, + { + "epoch": 0.06818845912754176, + "grad_norm": 1.5276224303306662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14060 + }, + { + "epoch": 0.06823695732037785, + "grad_norm": 1.558177586957754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14070 + }, + { + "epoch": 0.06828545551321394, + "grad_norm": 1.5507199577768915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14080 + }, + { + "epoch": 0.06833395370605003, + "grad_norm": 1.3857419389751158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14090 + }, + { + "epoch": 0.06838245189888612, + "grad_norm": 1.36949643092521e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14100 + }, + { + "epoch": 0.0684309500917222, + "grad_norm": 1.4566559229933773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14110 + }, + { + "epoch": 0.06847944828455829, + "grad_norm": 1.703991870272148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14120 + }, + { + "epoch": 0.06852794647739438, + "grad_norm": 1.5231692032102728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14130 + }, + { + "epoch": 0.06857644467023047, + "grad_norm": 1.4920201465429273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14140 + }, + { + "epoch": 0.06862494286306657, + "grad_norm": 1.3858048077963758e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14150 + }, + { + "epoch": 0.06867344105590266, + "grad_norm": 1.5550656371488003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14160 + }, + { + "epoch": 0.06872193924873875, + "grad_norm": 1.5132751514101983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14170 + }, + { + "epoch": 0.06877043744157484, + "grad_norm": 1.4179094023347716e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14180 + }, + { + "epoch": 0.06881893563441092, + "grad_norm": 1.5397545212181285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14190 + }, + { + "epoch": 0.06886743382724701, + "grad_norm": 1.417627458977222e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14200 + }, + { + "epoch": 0.0689159320200831, + "grad_norm": 1.5505354440392694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14210 + }, + { + "epoch": 0.06896443021291919, + "grad_norm": 1.4502568319585407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14220 + }, + { + "epoch": 0.06901292840575528, + "grad_norm": 1.406866203979007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14230 + }, + { + "epoch": 0.06906142659859137, + "grad_norm": 1.3280152870720485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14240 + }, + { + "epoch": 0.06910992479142745, + "grad_norm": 1.378165165988321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14250 + }, + { + "epoch": 0.06915842298426354, + "grad_norm": 2.542063612054335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14260 + }, + { + "epoch": 0.06920692117709965, + "grad_norm": 1.3960972182758269e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14270 + }, + { + "epoch": 0.06925541936993573, + "grad_norm": 1.560479631734779e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14280 + }, + { + "epoch": 0.06930391756277182, + "grad_norm": 1.4118721765044029e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14290 + }, + { + "epoch": 0.06935241575560791, + "grad_norm": 1.3680444226338295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14300 + }, + { + "epoch": 0.069400913948444, + "grad_norm": 2.255107801829581e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14310 + }, + { + "epoch": 0.06944941214128009, + "grad_norm": 1.5711057130829431e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14320 + }, + { + "epoch": 0.06949791033411618, + "grad_norm": 1.47883645240654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14330 + }, + { + "epoch": 0.06954640852695226, + "grad_norm": 1.2376226550259162e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14340 + }, + { + "epoch": 0.06959490671978835, + "grad_norm": 1.2553906572065898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14350 + }, + { + "epoch": 0.06964340491262444, + "grad_norm": 1.3210553788667312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14360 + }, + { + "epoch": 0.06969190310546053, + "grad_norm": 1.4628914186687325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14370 + }, + { + "epoch": 0.06974040129829662, + "grad_norm": 1.3094892210574471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14380 + }, + { + "epoch": 0.0697888994911327, + "grad_norm": 1.2633568076125812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14390 + }, + { + "epoch": 0.06983739768396881, + "grad_norm": 1.1251852356508607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14400 + }, + { + "epoch": 0.0698858958768049, + "grad_norm": 1.4007255231263116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14410 + }, + { + "epoch": 0.06993439406964098, + "grad_norm": 1.2868320027337177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14420 + }, + { + "epoch": 0.06998289226247707, + "grad_norm": 1.31331626107567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14430 + }, + { + "epoch": 0.07003139045531316, + "grad_norm": 1.2651690894927015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14440 + }, + { + "epoch": 0.07007988864814925, + "grad_norm": 1.1573768006201135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14450 + }, + { + "epoch": 0.07012838684098534, + "grad_norm": 1.3757207852904685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14460 + }, + { + "epoch": 0.07017688503382143, + "grad_norm": 1.243987753696274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14470 + }, + { + "epoch": 0.07022538322665751, + "grad_norm": 1.4180576499711606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14480 + }, + { + "epoch": 0.0702738814194936, + "grad_norm": 1.1298509434709558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14490 + }, + { + "epoch": 0.07032237961232969, + "grad_norm": 1.112172412831569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14500 + }, + { + "epoch": 0.07037087780516578, + "grad_norm": 1.1126981007691938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14510 + }, + { + "epoch": 0.07041937599800187, + "grad_norm": 1.7069673958758358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14520 + }, + { + "epoch": 0.07046787419083797, + "grad_norm": 1.1700207096509985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14530 + }, + { + "epoch": 0.07051637238367406, + "grad_norm": 1.0965135288643069e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14540 + }, + { + "epoch": 0.07056487057651015, + "grad_norm": 1.0319600960428943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14550 + }, + { + "epoch": 0.07061336876934624, + "grad_norm": 1.1868340834553237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14560 + }, + { + "epoch": 0.07066186696218232, + "grad_norm": 1.1948303608733113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14570 + }, + { + "epoch": 0.07071036515501841, + "grad_norm": 1.1271348512309487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14580 + }, + { + "epoch": 0.0707588633478545, + "grad_norm": 1.0438648132549133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14590 + }, + { + "epoch": 0.07080736154069059, + "grad_norm": 1.0615585779305547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14600 + }, + { + "epoch": 0.07085585973352668, + "grad_norm": 1.0601021358525031e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14610 + }, + { + "epoch": 0.07090435792636277, + "grad_norm": 1.2139616956119426e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14620 + }, + { + "epoch": 0.07095285611919885, + "grad_norm": 1.2757232070725877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14630 + }, + { + "epoch": 0.07100135431203494, + "grad_norm": 1.1661926464512362e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14640 + }, + { + "epoch": 0.07104985250487103, + "grad_norm": 1.0134161811947706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14650 + }, + { + "epoch": 0.07109835069770713, + "grad_norm": 1.1328850177960703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14660 + }, + { + "epoch": 0.07114684889054322, + "grad_norm": 1.2184648312540958e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14670 + }, + { + "epoch": 0.07119534708337931, + "grad_norm": 1.0630785709508928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14680 + }, + { + "epoch": 0.0712438452762154, + "grad_norm": 9.97615757114545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14690 + }, + { + "epoch": 0.07129234346905149, + "grad_norm": 1.2217320772833773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14700 + }, + { + "epoch": 0.07134084166188757, + "grad_norm": 1.002724161480728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14710 + }, + { + "epoch": 0.07138933985472366, + "grad_norm": 1.268230107598356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14720 + }, + { + "epoch": 0.07143783804755975, + "grad_norm": 1.216611508425558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14730 + }, + { + "epoch": 0.07148633624039584, + "grad_norm": 1.029638156069268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14740 + }, + { + "epoch": 0.07153483443323193, + "grad_norm": 1.4295798109742464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14750 + }, + { + "epoch": 0.07158333262606802, + "grad_norm": 1.0565473758106236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14760 + }, + { + "epoch": 0.0716318308189041, + "grad_norm": 1.0508902050787583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14770 + }, + { + "epoch": 0.0716803290117402, + "grad_norm": 9.987309113057563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14780 + }, + { + "epoch": 0.0717288272045763, + "grad_norm": 1.0412858273411985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14790 + }, + { + "epoch": 0.07177732539741238, + "grad_norm": 9.016679882734024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14800 + }, + { + "epoch": 0.07182582359024847, + "grad_norm": 1.0163875003854628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14810 + }, + { + "epoch": 0.07187432178308456, + "grad_norm": 9.825588449530187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14820 + }, + { + "epoch": 0.07192281997592065, + "grad_norm": 1.0010489859269e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14830 + }, + { + "epoch": 0.07197131816875674, + "grad_norm": 1.0209757874690695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14840 + }, + { + "epoch": 0.07201981636159283, + "grad_norm": 9.37171193982067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14850 + }, + { + "epoch": 0.07206831455442891, + "grad_norm": 1.0301284874003613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14860 + }, + { + "epoch": 0.072116812747265, + "grad_norm": 1.1962756616412662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14870 + }, + { + "epoch": 0.07216531094010109, + "grad_norm": 2.1215000742813572e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14880 + }, + { + "epoch": 0.07221380913293718, + "grad_norm": 8.953612109507958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14890 + }, + { + "epoch": 0.07226230732577327, + "grad_norm": 8.603508376836544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14900 + }, + { + "epoch": 0.07231080551860937, + "grad_norm": 9.46169905091665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14910 + }, + { + "epoch": 0.07235930371144546, + "grad_norm": 1.1896822798007634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14920 + }, + { + "epoch": 0.07240780190428155, + "grad_norm": 9.781676908460213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14930 + }, + { + "epoch": 0.07245630009711763, + "grad_norm": 8.860008051669865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14940 + }, + { + "epoch": 0.07250479828995372, + "grad_norm": 8.463362632937788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14950 + }, + { + "epoch": 0.07255329648278981, + "grad_norm": 9.652461585574201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14960 + }, + { + "epoch": 0.0726017946756259, + "grad_norm": 9.247149819202605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14970 + }, + { + "epoch": 0.07265029286846199, + "grad_norm": 8.854989914652833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14980 + }, + { + "epoch": 0.07269879106129808, + "grad_norm": 7.902120273683977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 14990 + }, + { + "epoch": 0.07274728925413416, + "grad_norm": 8.950966616794176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15000 + }, + { + "epoch": 0.07279578744697025, + "grad_norm": 8.685180432621564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15010 + }, + { + "epoch": 0.07284428563980634, + "grad_norm": 9.023098073157598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15020 + }, + { + "epoch": 0.07289278383264243, + "grad_norm": 9.43487236781948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15030 + }, + { + "epoch": 0.07294128202547853, + "grad_norm": 9.239490168511111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15040 + }, + { + "epoch": 0.07298978021831462, + "grad_norm": 8.472296144645952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15050 + }, + { + "epoch": 0.07303827841115071, + "grad_norm": 9.288675641982991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15060 + }, + { + "epoch": 0.0730867766039868, + "grad_norm": 1.0360724900237983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15070 + }, + { + "epoch": 0.07313527479682289, + "grad_norm": 9.330954071629094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15080 + }, + { + "epoch": 0.07318377298965897, + "grad_norm": 8.547822289983742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15090 + }, + { + "epoch": 0.07323227118249506, + "grad_norm": 9.400746989740583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15100 + }, + { + "epoch": 0.07328076937533115, + "grad_norm": 8.993368965093396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15110 + }, + { + "epoch": 0.07332926756816724, + "grad_norm": 8.695123483448697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15120 + }, + { + "epoch": 0.07337776576100333, + "grad_norm": 8.607359518464364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15130 + }, + { + "epoch": 0.07342626395383942, + "grad_norm": 8.468316536891507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15140 + }, + { + "epoch": 0.0734747621466755, + "grad_norm": 8.195223699658527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15150 + }, + { + "epoch": 0.07352326033951159, + "grad_norm": 8.256304795395408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15160 + }, + { + "epoch": 0.0735717585323477, + "grad_norm": 1.043061274685897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15170 + }, + { + "epoch": 0.07362025672518378, + "grad_norm": 8.72146699748555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15180 + }, + { + "epoch": 0.07366875491801987, + "grad_norm": 9.478836204834806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15190 + }, + { + "epoch": 0.07371725311085596, + "grad_norm": 8.551338055440283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15200 + }, + { + "epoch": 0.07376575130369205, + "grad_norm": 8.746563366912596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15210 + }, + { + "epoch": 0.07381424949652814, + "grad_norm": 7.885365675974754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15220 + }, + { + "epoch": 0.07386274768936422, + "grad_norm": 8.21651951810054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15230 + }, + { + "epoch": 0.07391124588220031, + "grad_norm": 1.0288500561728142e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15240 + }, + { + "epoch": 0.0739597440750364, + "grad_norm": 7.317037216125755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15250 + }, + { + "epoch": 0.07400824226787249, + "grad_norm": 7.8505894407499e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15260 + }, + { + "epoch": 0.07405674046070858, + "grad_norm": 2.192133479184122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15270 + }, + { + "epoch": 0.07410523865354467, + "grad_norm": 8.211367230614997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15280 + }, + { + "epoch": 0.07415373684638076, + "grad_norm": 8.000180287126568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15290 + }, + { + "epoch": 0.07420223503921686, + "grad_norm": 7.304935252250289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15300 + }, + { + "epoch": 0.07425073323205295, + "grad_norm": 1.462397108298319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15310 + }, + { + "epoch": 0.07429923142488903, + "grad_norm": 8.544633942619839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15320 + }, + { + "epoch": 0.07434772961772512, + "grad_norm": 7.839196314307628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15330 + }, + { + "epoch": 0.07439622781056121, + "grad_norm": 7.868988518566766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15340 + }, + { + "epoch": 0.0744447260033973, + "grad_norm": 6.97824191320251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15350 + }, + { + "epoch": 0.07449322419623339, + "grad_norm": 7.920818347884051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15360 + }, + { + "epoch": 0.07454172238906948, + "grad_norm": 7.659593279640831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15370 + }, + { + "epoch": 0.07459022058190556, + "grad_norm": 7.019905865490728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15380 + }, + { + "epoch": 0.07463871877474165, + "grad_norm": 7.620399742336303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15390 + }, + { + "epoch": 0.07468721696757774, + "grad_norm": 6.915515200489608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15400 + }, + { + "epoch": 0.07473571516041383, + "grad_norm": 6.746956273673277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15410 + }, + { + "epoch": 0.07478421335324992, + "grad_norm": 7.13854376499512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15420 + }, + { + "epoch": 0.07483271154608602, + "grad_norm": 7.068309741953271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15430 + }, + { + "epoch": 0.07488120973892211, + "grad_norm": 6.716043685628392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15440 + }, + { + "epoch": 0.0749297079317582, + "grad_norm": 7.949391829242813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15450 + }, + { + "epoch": 0.07497820612459428, + "grad_norm": 7.385065146081615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15460 + }, + { + "epoch": 0.07502670431743037, + "grad_norm": 6.633866291849699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15470 + }, + { + "epoch": 0.07507520251026646, + "grad_norm": 7.511167723350809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15480 + }, + { + "epoch": 0.07512370070310255, + "grad_norm": 7.880656198722136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15490 + }, + { + "epoch": 0.07517219889593864, + "grad_norm": 6.525053777295398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15500 + }, + { + "epoch": 0.07522069708877473, + "grad_norm": 7.113558240234852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15510 + }, + { + "epoch": 0.07526919528161082, + "grad_norm": 6.430093435483286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15520 + }, + { + "epoch": 0.0753176934744469, + "grad_norm": 6.201742053235648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15530 + }, + { + "epoch": 0.07536619166728299, + "grad_norm": 8.01142221007467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15540 + }, + { + "epoch": 0.0754146898601191, + "grad_norm": 6.369775178427517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15550 + }, + { + "epoch": 0.07546318805295518, + "grad_norm": 6.351409638227778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15560 + }, + { + "epoch": 0.07551168624579127, + "grad_norm": 9.237434710485104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15570 + }, + { + "epoch": 0.07556018443862736, + "grad_norm": 6.602361963814474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15580 + }, + { + "epoch": 0.07560868263146345, + "grad_norm": 6.81508936395403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15590 + }, + { + "epoch": 0.07565718082429954, + "grad_norm": 6.160393013487919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15600 + }, + { + "epoch": 0.07570567901713562, + "grad_norm": 6.395779337253771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15610 + }, + { + "epoch": 0.07575417720997171, + "grad_norm": 6.683250148853404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15620 + }, + { + "epoch": 0.0758026754028078, + "grad_norm": 6.868431228213012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15630 + }, + { + "epoch": 0.07585117359564389, + "grad_norm": 6.042076847734279e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15640 + }, + { + "epoch": 0.07589967178847998, + "grad_norm": 6.101396365920664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15650 + }, + { + "epoch": 0.07594816998131607, + "grad_norm": 6.944374035811052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15660 + }, + { + "epoch": 0.07599666817415215, + "grad_norm": 6.587387133549782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15670 + }, + { + "epoch": 0.07604516636698826, + "grad_norm": 5.779238563263789e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15680 + }, + { + "epoch": 0.07609366455982434, + "grad_norm": 5.479576543621079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15690 + }, + { + "epoch": 0.07614216275266043, + "grad_norm": 6.134503678367764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15700 + }, + { + "epoch": 0.07619066094549652, + "grad_norm": 6.003694466016896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15710 + }, + { + "epoch": 0.07623915913833261, + "grad_norm": 6.323943466668425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15720 + }, + { + "epoch": 0.0762876573311687, + "grad_norm": 7.450718157997471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15730 + }, + { + "epoch": 0.07633615552400479, + "grad_norm": 5.425271751846594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15740 + }, + { + "epoch": 0.07638465371684088, + "grad_norm": 5.703652163902007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15750 + }, + { + "epoch": 0.07643315190967696, + "grad_norm": 6.254143158912484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15760 + }, + { + "epoch": 0.07648165010251305, + "grad_norm": 5.576389412453864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15770 + }, + { + "epoch": 0.07653014829534914, + "grad_norm": 5.760424528489239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15780 + }, + { + "epoch": 0.07657864648818523, + "grad_norm": 6.24362485268648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15790 + }, + { + "epoch": 0.07662714468102132, + "grad_norm": 5.961946953902952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15800 + }, + { + "epoch": 0.07667564287385742, + "grad_norm": 5.767535071754537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15810 + }, + { + "epoch": 0.07672414106669351, + "grad_norm": 5.839909817950684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15820 + }, + { + "epoch": 0.0767726392595296, + "grad_norm": 6.039577442606969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15830 + }, + { + "epoch": 0.07682113745236568, + "grad_norm": 6.497205617961299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15840 + }, + { + "epoch": 0.07686963564520177, + "grad_norm": 5.395131097429839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15850 + }, + { + "epoch": 0.07691813383803786, + "grad_norm": 5.482090728037292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15860 + }, + { + "epoch": 0.07696663203087395, + "grad_norm": 6.432787813537288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15870 + }, + { + "epoch": 0.07701513022371004, + "grad_norm": 6.342145866256033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15880 + }, + { + "epoch": 0.07706362841654613, + "grad_norm": 4.795558083969809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15890 + }, + { + "epoch": 0.07711212660938221, + "grad_norm": 4.817717922378506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15900 + }, + { + "epoch": 0.0771606248022183, + "grad_norm": 9.91262936622661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15910 + }, + { + "epoch": 0.07720912299505439, + "grad_norm": 5.456067810882814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15920 + }, + { + "epoch": 0.07725762118789048, + "grad_norm": 5.036978905081924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15930 + }, + { + "epoch": 0.07730611938072658, + "grad_norm": 5.882864115847042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15940 + }, + { + "epoch": 0.07735461757356267, + "grad_norm": 4.94843504839082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15950 + }, + { + "epoch": 0.07740311576639876, + "grad_norm": 5.263281082079629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15960 + }, + { + "epoch": 0.07745161395923485, + "grad_norm": 5.354800123313908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15970 + }, + { + "epoch": 0.07750011215207094, + "grad_norm": 5.026762437410071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15980 + }, + { + "epoch": 0.07754861034490702, + "grad_norm": 4.815996703655401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 15990 + }, + { + "epoch": 0.07759710853774311, + "grad_norm": 4.910812663183606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16000 + }, + { + "epoch": 0.0776456067305792, + "grad_norm": 5.378694822866237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16010 + }, + { + "epoch": 0.07769410492341529, + "grad_norm": 5.785461212326481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16020 + }, + { + "epoch": 0.07774260311625138, + "grad_norm": 5.313785891303269e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16030 + }, + { + "epoch": 0.07779110130908747, + "grad_norm": 4.478299899801641e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16040 + }, + { + "epoch": 0.07783959950192355, + "grad_norm": 4.6580925072703394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16050 + }, + { + "epoch": 0.07788809769475966, + "grad_norm": 5.06268804656429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16060 + }, + { + "epoch": 0.07793659588759574, + "grad_norm": 5.028535952078528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16070 + }, + { + "epoch": 0.07798509408043183, + "grad_norm": 5.251447987575375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16080 + }, + { + "epoch": 0.07803359227326792, + "grad_norm": 4.830351372220321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16090 + }, + { + "epoch": 0.07808209046610401, + "grad_norm": 5.191297418605245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16100 + }, + { + "epoch": 0.0781305886589401, + "grad_norm": 4.969712676938798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16110 + }, + { + "epoch": 0.07817908685177619, + "grad_norm": 4.836940661334665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16120 + }, + { + "epoch": 0.07822758504461227, + "grad_norm": 5.446580644274945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16130 + }, + { + "epoch": 0.07827608323744836, + "grad_norm": 4.922603125123715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16140 + }, + { + "epoch": 0.07832458143028445, + "grad_norm": 5.741840709561075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16150 + }, + { + "epoch": 0.07837307962312054, + "grad_norm": 5.161326726010884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16160 + }, + { + "epoch": 0.07842157781595663, + "grad_norm": 5.384886208048556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16170 + }, + { + "epoch": 0.07847007600879272, + "grad_norm": 5.667915274898405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16180 + }, + { + "epoch": 0.07851857420162882, + "grad_norm": 5.096411541671841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16190 + }, + { + "epoch": 0.0785670723944649, + "grad_norm": 4.19185340660988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16200 + }, + { + "epoch": 0.078615570587301, + "grad_norm": 5.000528631171619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16210 + }, + { + "epoch": 0.07866406878013708, + "grad_norm": 5.130786462359538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16220 + }, + { + "epoch": 0.07871256697297317, + "grad_norm": 4.5988065267010825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16230 + }, + { + "epoch": 0.07876106516580926, + "grad_norm": 4.199351621991809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16240 + }, + { + "epoch": 0.07880956335864535, + "grad_norm": 4.2822520640584116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16250 + }, + { + "epoch": 0.07885806155148144, + "grad_norm": 4.785163696396921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16260 + }, + { + "epoch": 0.07890655974431753, + "grad_norm": 6.278499427025963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16270 + }, + { + "epoch": 0.07895505793715361, + "grad_norm": 5.283186510496307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16280 + }, + { + "epoch": 0.0790035561299897, + "grad_norm": 4.0994606820277113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16290 + }, + { + "epoch": 0.07905205432282579, + "grad_norm": 4.7461779217883304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16300 + }, + { + "epoch": 0.07910055251566188, + "grad_norm": 4.5926790903649817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16310 + }, + { + "epoch": 0.07914905070849798, + "grad_norm": 4.910078814646113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16320 + }, + { + "epoch": 0.07919754890133407, + "grad_norm": 4.426626105669129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16330 + }, + { + "epoch": 0.07924604709417016, + "grad_norm": 3.839236057956441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16340 + }, + { + "epoch": 0.07929454528700625, + "grad_norm": 4.310907115723239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16350 + }, + { + "epoch": 0.07934304347984233, + "grad_norm": 4.4812307464781043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16360 + }, + { + "epoch": 0.07939154167267842, + "grad_norm": 4.7618712528674223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16370 + }, + { + "epoch": 0.07944003986551451, + "grad_norm": 4.274687910310604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16380 + }, + { + "epoch": 0.0794885380583506, + "grad_norm": 4.070663806032826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16390 + }, + { + "epoch": 0.07953703625118669, + "grad_norm": 4.428982549598004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16400 + }, + { + "epoch": 0.07958553444402278, + "grad_norm": 4.3717500375350937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16410 + }, + { + "epoch": 0.07963403263685886, + "grad_norm": 5.03524063333316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16420 + }, + { + "epoch": 0.07968253082969495, + "grad_norm": 4.7181890749925515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16430 + }, + { + "epoch": 0.07973102902253104, + "grad_norm": 3.801841330641764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16440 + }, + { + "epoch": 0.07977952721536714, + "grad_norm": 4.0658781585989345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16450 + }, + { + "epoch": 0.07982802540820323, + "grad_norm": 4.4773261720365554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16460 + }, + { + "epoch": 0.07987652360103932, + "grad_norm": 4.0526938960283587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16470 + }, + { + "epoch": 0.07992502179387541, + "grad_norm": 4.1523026084178127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16480 + }, + { + "epoch": 0.0799735199867115, + "grad_norm": 3.823079737230728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16490 + }, + { + "epoch": 0.08002201817954759, + "grad_norm": 4.38768665844691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16500 + }, + { + "epoch": 0.08007051637238367, + "grad_norm": 4.551384620299359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16510 + }, + { + "epoch": 0.08011901456521976, + "grad_norm": 4.042111356739042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16520 + }, + { + "epoch": 0.08016751275805585, + "grad_norm": 3.9366315718325495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16530 + }, + { + "epoch": 0.08021601095089194, + "grad_norm": 4.078041229149676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16540 + }, + { + "epoch": 0.08026450914372803, + "grad_norm": 3.945914386349614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16550 + }, + { + "epoch": 0.08031300733656412, + "grad_norm": 4.0343977047996304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16560 + }, + { + "epoch": 0.0803615055294002, + "grad_norm": 4.1743160750229436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16570 + }, + { + "epoch": 0.0804100037222363, + "grad_norm": 3.988244259289786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16580 + }, + { + "epoch": 0.0804585019150724, + "grad_norm": 3.777764163714892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16590 + }, + { + "epoch": 0.08050700010790848, + "grad_norm": 4.511319673383696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16600 + }, + { + "epoch": 0.08055549830074457, + "grad_norm": 4.150419954385143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16610 + }, + { + "epoch": 0.08060399649358066, + "grad_norm": 3.9848589494795306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16620 + }, + { + "epoch": 0.08065249468641675, + "grad_norm": 4.3138558680766437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16630 + }, + { + "epoch": 0.08070099287925284, + "grad_norm": 4.017923060928297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16640 + }, + { + "epoch": 0.08074949107208892, + "grad_norm": 5.182085374144663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16650 + }, + { + "epoch": 0.08079798926492501, + "grad_norm": 4.3513114178495016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16660 + }, + { + "epoch": 0.0808464874577611, + "grad_norm": 3.8215688391574076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16670 + }, + { + "epoch": 0.08089498565059719, + "grad_norm": 3.8507505450979806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16680 + }, + { + "epoch": 0.08094348384343328, + "grad_norm": 3.8518740552717645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16690 + }, + { + "epoch": 0.08099198203626938, + "grad_norm": 4.3501393065525917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16700 + }, + { + "epoch": 0.08104048022910547, + "grad_norm": 4.5572835460916394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16710 + }, + { + "epoch": 0.08108897842194156, + "grad_norm": 4.5617949240295275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16720 + }, + { + "epoch": 0.08113747661477765, + "grad_norm": 3.6915704981765884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16730 + }, + { + "epoch": 0.08118597480761373, + "grad_norm": 3.471918148534314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16740 + }, + { + "epoch": 0.08123447300044982, + "grad_norm": 3.774735830575082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16750 + }, + { + "epoch": 0.08128297119328591, + "grad_norm": 4.1296249264632934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16760 + }, + { + "epoch": 0.081331469386122, + "grad_norm": 4.0233700815406337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16770 + }, + { + "epoch": 0.08137996757895809, + "grad_norm": 3.922480118490057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16780 + }, + { + "epoch": 0.08142846577179418, + "grad_norm": 3.460341133632028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16790 + }, + { + "epoch": 0.08147696396463026, + "grad_norm": 4.961084982824104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16800 + }, + { + "epoch": 0.08152546215746635, + "grad_norm": 3.705936819642375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16810 + }, + { + "epoch": 0.08157396035030244, + "grad_norm": 3.819380651748361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16820 + }, + { + "epoch": 0.08162245854313854, + "grad_norm": 3.844629645755049e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16830 + }, + { + "epoch": 0.08167095673597463, + "grad_norm": 3.235607550777786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16840 + }, + { + "epoch": 0.08171945492881072, + "grad_norm": 3.085303603711509e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16850 + }, + { + "epoch": 0.08176795312164681, + "grad_norm": 4.11486325901933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16860 + }, + { + "epoch": 0.0818164513144829, + "grad_norm": 4.0945579371509666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16870 + }, + { + "epoch": 0.08186494950731898, + "grad_norm": 3.2509910852240864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16880 + }, + { + "epoch": 0.08191344770015507, + "grad_norm": 2.905654525875434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16890 + }, + { + "epoch": 0.08196194589299116, + "grad_norm": 3.1462681704397255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16900 + }, + { + "epoch": 0.08201044408582725, + "grad_norm": 4.6195796699066705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16910 + }, + { + "epoch": 0.08205894227866334, + "grad_norm": 3.7828192489541834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16920 + }, + { + "epoch": 0.08210744047149943, + "grad_norm": 3.756718172098772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16930 + }, + { + "epoch": 0.08215593866433551, + "grad_norm": 2.880733802612667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16940 + }, + { + "epoch": 0.0822044368571716, + "grad_norm": 3.006451549936173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16950 + }, + { + "epoch": 0.0822529350500077, + "grad_norm": 3.261743870552891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16960 + }, + { + "epoch": 0.0823014332428438, + "grad_norm": 3.497438854083157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16970 + }, + { + "epoch": 0.08234993143567988, + "grad_norm": 3.480105021935742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16980 + }, + { + "epoch": 0.08239842962851597, + "grad_norm": 2.832281040809903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 16990 + }, + { + "epoch": 0.08244692782135206, + "grad_norm": 3.1878275308372395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17000 + }, + { + "epoch": 0.08249542601418815, + "grad_norm": 3.3246001862607955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17010 + }, + { + "epoch": 0.08254392420702424, + "grad_norm": 3.404538517770561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17020 + }, + { + "epoch": 0.08259242239986032, + "grad_norm": 3.8293995885396725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17030 + }, + { + "epoch": 0.08264092059269641, + "grad_norm": 2.9461662620633433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17040 + }, + { + "epoch": 0.0826894187855325, + "grad_norm": 3.0270555839706503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17050 + }, + { + "epoch": 0.08273791697836859, + "grad_norm": 3.6652548374149774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17060 + }, + { + "epoch": 0.08278641517120468, + "grad_norm": 3.217156461232662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17070 + }, + { + "epoch": 0.08283491336404077, + "grad_norm": 3.517784818996006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17080 + }, + { + "epoch": 0.08288341155687687, + "grad_norm": 2.968903061173478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17090 + }, + { + "epoch": 0.08293190974971296, + "grad_norm": 3.183117485150433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17100 + }, + { + "epoch": 0.08298040794254904, + "grad_norm": 4.101157173863612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17110 + }, + { + "epoch": 0.08302890613538513, + "grad_norm": 3.5441431123217626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17120 + }, + { + "epoch": 0.08307740432822122, + "grad_norm": 3.485011745851807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17130 + }, + { + "epoch": 0.08312590252105731, + "grad_norm": 2.9269440915413725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17140 + }, + { + "epoch": 0.0831744007138934, + "grad_norm": 2.8455789902182005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17150 + }, + { + "epoch": 0.08322289890672949, + "grad_norm": 3.0684984153594996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17160 + }, + { + "epoch": 0.08327139709956558, + "grad_norm": 4.317628281569341e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17170 + }, + { + "epoch": 0.08331989529240166, + "grad_norm": 0.03805973008275032, + "learning_rate": 0.0002, + "loss": 0.0018, + "step": 17180 + }, + { + "epoch": 0.08336839348523775, + "grad_norm": 1.5737643479951657e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 17190 + }, + { + "epoch": 0.08341689167807384, + "grad_norm": 4.9505433707963675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17200 + }, + { + "epoch": 0.08346538987090993, + "grad_norm": 0.0004129848093725741, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 17210 + }, + { + "epoch": 0.08351388806374603, + "grad_norm": 6.206114630913362e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17220 + }, + { + "epoch": 0.08356238625658212, + "grad_norm": 2.4251537979580462e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17230 + }, + { + "epoch": 0.08361088444941821, + "grad_norm": 1.1366977560101077e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17240 + }, + { + "epoch": 0.0836593826422543, + "grad_norm": 1.3424332792055793e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17250 + }, + { + "epoch": 0.08370788083509038, + "grad_norm": 1.1298021490802057e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17260 + }, + { + "epoch": 0.08375637902792647, + "grad_norm": 9.104207856580615e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17270 + }, + { + "epoch": 0.08380487722076256, + "grad_norm": 2.0384246454341337e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17280 + }, + { + "epoch": 0.08385337541359865, + "grad_norm": 0.006789239123463631, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 17290 + }, + { + "epoch": 0.08390187360643474, + "grad_norm": 2.3988435714272782e-05, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 17300 + }, + { + "epoch": 0.08395037179927083, + "grad_norm": 0.011647118255496025, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 17310 + }, + { + "epoch": 0.08399886999210691, + "grad_norm": 0.003220898797735572, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17320 + }, + { + "epoch": 0.084047368184943, + "grad_norm": 2.1245366951916367e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17330 + }, + { + "epoch": 0.0840958663777791, + "grad_norm": 1.9130113287246786e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17340 + }, + { + "epoch": 0.0841443645706152, + "grad_norm": 4.606503352988511e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 17350 + }, + { + "epoch": 0.08419286276345128, + "grad_norm": 0.016321582719683647, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 17360 + }, + { + "epoch": 0.08424136095628737, + "grad_norm": 8.75561308930628e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 17370 + }, + { + "epoch": 0.08428985914912346, + "grad_norm": 0.0004900817293673754, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 17380 + }, + { + "epoch": 0.08433835734195955, + "grad_norm": 0.0005103069124743342, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 17390 + }, + { + "epoch": 0.08438685553479564, + "grad_norm": 0.12338358163833618, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 17400 + }, + { + "epoch": 0.08443535372763172, + "grad_norm": 0.026079293340444565, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 17410 + }, + { + "epoch": 0.08448385192046781, + "grad_norm": 3.893829489243217e-05, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 17420 + }, + { + "epoch": 0.0845323501133039, + "grad_norm": 0.00011916671064682305, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 17430 + }, + { + "epoch": 0.08458084830613999, + "grad_norm": 0.000303605425870046, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17440 + }, + { + "epoch": 0.08462934649897608, + "grad_norm": 0.0004896114696748555, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17450 + }, + { + "epoch": 0.08467784469181217, + "grad_norm": 0.00017241637397091836, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17460 + }, + { + "epoch": 0.08472634288464827, + "grad_norm": 0.00026777360471896827, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 17470 + }, + { + "epoch": 0.08477484107748436, + "grad_norm": 0.12591950595378876, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 17480 + }, + { + "epoch": 0.08482333927032044, + "grad_norm": 4.146261926507577e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17490 + }, + { + "epoch": 0.08487183746315653, + "grad_norm": 8.530041668564081e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17500 + }, + { + "epoch": 0.08492033565599262, + "grad_norm": 0.00011867805005749688, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17510 + }, + { + "epoch": 0.08496883384882871, + "grad_norm": 3.0131854146020487e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17520 + }, + { + "epoch": 0.0850173320416648, + "grad_norm": 2.956891512440052e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17530 + }, + { + "epoch": 0.08506583023450089, + "grad_norm": 8.369009265152272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17540 + }, + { + "epoch": 0.08511432842733697, + "grad_norm": 8.997598342830315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17550 + }, + { + "epoch": 0.08516282662017306, + "grad_norm": 2.533871156629175e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17560 + }, + { + "epoch": 0.08521132481300915, + "grad_norm": 2.1468948034453206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17570 + }, + { + "epoch": 0.08525982300584524, + "grad_norm": 5.089523256174289e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17580 + }, + { + "epoch": 0.08530832119868133, + "grad_norm": 6.930369181645801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17590 + }, + { + "epoch": 0.08535681939151743, + "grad_norm": 7.0823143687448464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17600 + }, + { + "epoch": 0.08540531758435352, + "grad_norm": 1.6720186977181584e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17610 + }, + { + "epoch": 0.0854538157771896, + "grad_norm": 1.66739755513845e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17620 + }, + { + "epoch": 0.0855023139700257, + "grad_norm": 1.6233934729825705e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17630 + }, + { + "epoch": 0.08555081216286178, + "grad_norm": 6.6826114561990835e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 17640 + }, + { + "epoch": 0.08559931035569787, + "grad_norm": 5.166599748918088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17650 + }, + { + "epoch": 0.08564780854853396, + "grad_norm": 1.160154897661414e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17660 + }, + { + "epoch": 0.08569630674137005, + "grad_norm": 1.0889073564612772e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17670 + }, + { + "epoch": 0.08574480493420614, + "grad_norm": 1.1115719644294586e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17680 + }, + { + "epoch": 0.08579330312704223, + "grad_norm": 5.012562724004965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17690 + }, + { + "epoch": 0.08584180131987831, + "grad_norm": 5.05612297274638e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17700 + }, + { + "epoch": 0.0858902995127144, + "grad_norm": 1.2956343198311515e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17710 + }, + { + "epoch": 0.08593879770555049, + "grad_norm": 7.988771358213853e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 17720 + }, + { + "epoch": 0.08598729589838659, + "grad_norm": 1.2702797903330065e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17730 + }, + { + "epoch": 0.08603579409122268, + "grad_norm": 7.0447706093546e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17740 + }, + { + "epoch": 0.08608429228405877, + "grad_norm": 9.883662278298289e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17750 + }, + { + "epoch": 0.08613279047689486, + "grad_norm": 8.693516065250151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17760 + }, + { + "epoch": 0.08618128866973095, + "grad_norm": 8.348989467776846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17770 + }, + { + "epoch": 0.08622978686256703, + "grad_norm": 1.0486638529982883e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17780 + }, + { + "epoch": 0.08627828505540312, + "grad_norm": 1.6707090253476053e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17790 + }, + { + "epoch": 0.08632678324823921, + "grad_norm": 9.582031452737283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17800 + }, + { + "epoch": 0.0863752814410753, + "grad_norm": 7.463240763172507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17810 + }, + { + "epoch": 0.08642377963391139, + "grad_norm": 6.2243429965747055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17820 + }, + { + "epoch": 0.08647227782674748, + "grad_norm": 8.512659405823797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17830 + }, + { + "epoch": 0.08652077601958356, + "grad_norm": 9.684139513410628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17840 + }, + { + "epoch": 0.08656927421241965, + "grad_norm": 6.124693754827604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17850 + }, + { + "epoch": 0.08661777240525576, + "grad_norm": 6.7197688622400165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17860 + }, + { + "epoch": 0.08666627059809184, + "grad_norm": 5.008386779081775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17870 + }, + { + "epoch": 0.08671476879092793, + "grad_norm": 6.4862183535296936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17880 + }, + { + "epoch": 0.08676326698376402, + "grad_norm": 3.705607923620846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17890 + }, + { + "epoch": 0.08681176517660011, + "grad_norm": 3.2630803161737276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17900 + }, + { + "epoch": 0.0868602633694362, + "grad_norm": 7.4676095209724735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17910 + }, + { + "epoch": 0.08690876156227229, + "grad_norm": 4.973624982085312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17920 + }, + { + "epoch": 0.08695725975510837, + "grad_norm": 6.267269327508984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17930 + }, + { + "epoch": 0.08700575794794446, + "grad_norm": 3.8727935134375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17940 + }, + { + "epoch": 0.08705425614078055, + "grad_norm": 3.2044351883087074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17950 + }, + { + "epoch": 0.08710275433361664, + "grad_norm": 5.428662007034291e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17960 + }, + { + "epoch": 0.08715125252645273, + "grad_norm": 8.017209438548889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17970 + }, + { + "epoch": 0.08719975071928883, + "grad_norm": 4.934381649945863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17980 + }, + { + "epoch": 0.08724824891212492, + "grad_norm": 4.014420937892282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 17990 + }, + { + "epoch": 0.087296747104961, + "grad_norm": 2.956097887363285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18000 + }, + { + "epoch": 0.0873452452977971, + "grad_norm": 4.700912995758699e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18010 + }, + { + "epoch": 0.08739374349063318, + "grad_norm": 4.652796633308753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18020 + }, + { + "epoch": 0.08744224168346927, + "grad_norm": 4.494223503570538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18030 + }, + { + "epoch": 0.08749073987630536, + "grad_norm": 3.0480798614007654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18040 + }, + { + "epoch": 0.08753923806914145, + "grad_norm": 3.333385393489152e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18050 + }, + { + "epoch": 0.08758773626197754, + "grad_norm": 4.0852464735507965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18060 + }, + { + "epoch": 0.08763623445481362, + "grad_norm": 4.561277819448151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18070 + }, + { + "epoch": 0.08768473264764971, + "grad_norm": 6.60963178233942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18080 + }, + { + "epoch": 0.0877332308404858, + "grad_norm": 3.2654575079504866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18090 + }, + { + "epoch": 0.08778172903332189, + "grad_norm": 2.7904802664124873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18100 + }, + { + "epoch": 0.08783022722615799, + "grad_norm": 4.530747901299037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18110 + }, + { + "epoch": 0.08787872541899408, + "grad_norm": 4.441698365553748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18120 + }, + { + "epoch": 0.08792722361183017, + "grad_norm": 4.4292191887507215e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18130 + }, + { + "epoch": 0.08797572180466626, + "grad_norm": 2.5332178665848915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18140 + }, + { + "epoch": 0.08802421999750235, + "grad_norm": 2.510339754735469e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18150 + }, + { + "epoch": 0.08807271819033843, + "grad_norm": 4.550046924123308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18160 + }, + { + "epoch": 0.08812121638317452, + "grad_norm": 4.077734956808854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18170 + }, + { + "epoch": 0.08816971457601061, + "grad_norm": 5.208767106523737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18180 + }, + { + "epoch": 0.0882182127688467, + "grad_norm": 2.705229917410179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18190 + }, + { + "epoch": 0.08826671096168279, + "grad_norm": 2.625142997203511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18200 + }, + { + "epoch": 0.08831520915451888, + "grad_norm": 3.4868080547312275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18210 + }, + { + "epoch": 0.08836370734735496, + "grad_norm": 3.881154498230899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18220 + }, + { + "epoch": 0.08841220554019105, + "grad_norm": 4.337236077844864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18230 + }, + { + "epoch": 0.08846070373302715, + "grad_norm": 3.0357723517226987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18240 + }, + { + "epoch": 0.08850920192586324, + "grad_norm": 2.451154614391271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18250 + }, + { + "epoch": 0.08855770011869933, + "grad_norm": 3.5767527606367366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18260 + }, + { + "epoch": 0.08860619831153542, + "grad_norm": 3.3330136375298025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18270 + }, + { + "epoch": 0.08865469650437151, + "grad_norm": 3.5590107927419012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18280 + }, + { + "epoch": 0.0887031946972076, + "grad_norm": 2.304652298334986e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18290 + }, + { + "epoch": 0.08875169289004368, + "grad_norm": 2.4089470116450684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18300 + }, + { + "epoch": 0.08880019108287977, + "grad_norm": 3.4160800623794785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18310 + }, + { + "epoch": 0.08884868927571586, + "grad_norm": 3.373932486283593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18320 + }, + { + "epoch": 0.08889718746855195, + "grad_norm": 3.2879213449632516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18330 + }, + { + "epoch": 0.08894568566138804, + "grad_norm": 2.4139205834217137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18340 + }, + { + "epoch": 0.08899418385422413, + "grad_norm": 3.647021230790415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18350 + }, + { + "epoch": 0.08904268204706021, + "grad_norm": 3.2556094993196893e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18360 + }, + { + "epoch": 0.08909118023989632, + "grad_norm": 4.280511802789988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18370 + }, + { + "epoch": 0.0891396784327324, + "grad_norm": 3.896967882610625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18380 + }, + { + "epoch": 0.0891881766255685, + "grad_norm": 2.3017273633740842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18390 + }, + { + "epoch": 0.08923667481840458, + "grad_norm": 4.53981920145452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18400 + }, + { + "epoch": 0.08928517301124067, + "grad_norm": 3.079824637097772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18410 + }, + { + "epoch": 0.08933367120407676, + "grad_norm": 3.068434580200119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18420 + }, + { + "epoch": 0.08938216939691285, + "grad_norm": 3.2853870379767613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18430 + }, + { + "epoch": 0.08943066758974894, + "grad_norm": 2.691967665668926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18440 + }, + { + "epoch": 0.08947916578258502, + "grad_norm": 2.3286877421924146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18450 + }, + { + "epoch": 0.08952766397542111, + "grad_norm": 2.520085672585992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18460 + }, + { + "epoch": 0.0895761621682572, + "grad_norm": 2.981628540510428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18470 + }, + { + "epoch": 0.08962466036109329, + "grad_norm": 2.656491233210545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18480 + }, + { + "epoch": 0.08967315855392938, + "grad_norm": 1.9213121049688198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18490 + }, + { + "epoch": 0.08972165674676548, + "grad_norm": 0.0006565847434103489, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18500 + }, + { + "epoch": 0.08977015493960157, + "grad_norm": 2.9798159175697947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18510 + }, + { + "epoch": 0.08981865313243766, + "grad_norm": 2.79722235063673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18520 + }, + { + "epoch": 0.08986715132527374, + "grad_norm": 2.8328452117420966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18530 + }, + { + "epoch": 0.08991564951810983, + "grad_norm": 2.2136014194984455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18540 + }, + { + "epoch": 0.08996414771094592, + "grad_norm": 1.882129026853363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18550 + }, + { + "epoch": 0.09001264590378201, + "grad_norm": 2.8562292300193803e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18560 + }, + { + "epoch": 0.0900611440966181, + "grad_norm": 2.6975901619152864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18570 + }, + { + "epoch": 0.09010964228945419, + "grad_norm": 2.785514880088158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18580 + }, + { + "epoch": 0.09015814048229027, + "grad_norm": 2.0317643247835804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18590 + }, + { + "epoch": 0.09020663867512636, + "grad_norm": 1.8732771422946826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18600 + }, + { + "epoch": 0.09025513686796245, + "grad_norm": 2.464974613758386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18610 + }, + { + "epoch": 0.09030363506079855, + "grad_norm": 2.769282900771941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18620 + }, + { + "epoch": 0.09035213325363464, + "grad_norm": 2.7337596293364186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18630 + }, + { + "epoch": 0.09040063144647073, + "grad_norm": 1.8516652744438034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18640 + }, + { + "epoch": 0.09044912963930682, + "grad_norm": 1.9051261688218801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18650 + }, + { + "epoch": 0.09049762783214291, + "grad_norm": 2.7883741040568566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18660 + }, + { + "epoch": 0.090546126024979, + "grad_norm": 3.3448227441112977e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18670 + }, + { + "epoch": 0.09059462421781508, + "grad_norm": 2.6009813609562116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18680 + }, + { + "epoch": 0.09064312241065117, + "grad_norm": 1.7711673763187719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18690 + }, + { + "epoch": 0.09069162060348726, + "grad_norm": 1.7495981410320383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18700 + }, + { + "epoch": 0.09074011879632335, + "grad_norm": 2.317154894626583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18710 + }, + { + "epoch": 0.09078861698915944, + "grad_norm": 2.7131518436362967e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18720 + }, + { + "epoch": 0.09083711518199553, + "grad_norm": 2.268287516926648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18730 + }, + { + "epoch": 0.09088561337483161, + "grad_norm": 1.7303339063801104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18740 + }, + { + "epoch": 0.09093411156766772, + "grad_norm": 1.8798092469296535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18750 + }, + { + "epoch": 0.0909826097605038, + "grad_norm": 2.302700295331306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18760 + }, + { + "epoch": 0.0910311079533399, + "grad_norm": 2.2858073407405755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18770 + }, + { + "epoch": 0.09107960614617598, + "grad_norm": 8.422685823461507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18780 + }, + { + "epoch": 0.09112810433901207, + "grad_norm": 1.7193952999150497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18790 + }, + { + "epoch": 0.09117660253184816, + "grad_norm": 1.7530040850033402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18800 + }, + { + "epoch": 0.09122510072468425, + "grad_norm": 2.6981617793353507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18810 + }, + { + "epoch": 0.09127359891752033, + "grad_norm": 2.0771351501025492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18820 + }, + { + "epoch": 0.09132209711035642, + "grad_norm": 2.2868036921863677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18830 + }, + { + "epoch": 0.09137059530319251, + "grad_norm": 1.6848792938617407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18840 + }, + { + "epoch": 0.0914190934960286, + "grad_norm": 1.6958449577941792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18850 + }, + { + "epoch": 0.09146759168886469, + "grad_norm": 2.4071639472822426e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18860 + }, + { + "epoch": 0.09151608988170078, + "grad_norm": 2.051127012236975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18870 + }, + { + "epoch": 0.09156458807453688, + "grad_norm": 2.1278362964949338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18880 + }, + { + "epoch": 0.09161308626737297, + "grad_norm": 1.5493272940148017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18890 + }, + { + "epoch": 0.09166158446020906, + "grad_norm": 1.5608479770889971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18900 + }, + { + "epoch": 0.09171008265304514, + "grad_norm": 2.004170482905465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18910 + }, + { + "epoch": 0.09175858084588123, + "grad_norm": 7.170116077759303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18920 + }, + { + "epoch": 0.09180707903871732, + "grad_norm": 2.3586073893966386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18930 + }, + { + "epoch": 0.09185557723155341, + "grad_norm": 1.44119837841572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18940 + }, + { + "epoch": 0.0919040754243895, + "grad_norm": 2.3170366603153525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18950 + }, + { + "epoch": 0.09195257361722559, + "grad_norm": 0.022134926170110703, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18960 + }, + { + "epoch": 0.09200107181006167, + "grad_norm": 4.6772529458394274e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18970 + }, + { + "epoch": 0.09204957000289776, + "grad_norm": 3.3592734780540923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18980 + }, + { + "epoch": 0.09209806819573385, + "grad_norm": 3.2443690543004777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 18990 + }, + { + "epoch": 0.09214656638856994, + "grad_norm": 2.6896834697254235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19000 + }, + { + "epoch": 0.09219506458140604, + "grad_norm": 3.6098415421292884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19010 + }, + { + "epoch": 0.09224356277424213, + "grad_norm": 3.473329570624628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19020 + }, + { + "epoch": 0.09229206096707822, + "grad_norm": 4.1425746530876495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19030 + }, + { + "epoch": 0.0923405591599143, + "grad_norm": 2.6463685571798123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19040 + }, + { + "epoch": 0.0923890573527504, + "grad_norm": 2.6182856345258188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19050 + }, + { + "epoch": 0.09243755554558648, + "grad_norm": 4.7505218390142545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19060 + }, + { + "epoch": 0.09248605373842257, + "grad_norm": 3.467893293418456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19070 + }, + { + "epoch": 0.09253455193125866, + "grad_norm": 3.2403809200332034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19080 + }, + { + "epoch": 0.09258305012409475, + "grad_norm": 3.218493475287687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19090 + }, + { + "epoch": 0.09263154831693084, + "grad_norm": 2.2497542886412703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19100 + }, + { + "epoch": 0.09268004650976693, + "grad_norm": 3.1354011298390105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19110 + }, + { + "epoch": 0.09272854470260301, + "grad_norm": 3.094201019848697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19120 + }, + { + "epoch": 0.0927770428954391, + "grad_norm": 4.143698788539041e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19130 + }, + { + "epoch": 0.0928255410882752, + "grad_norm": 2.583023388069705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19140 + }, + { + "epoch": 0.09287403928111129, + "grad_norm": 2.1665264284820296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19150 + }, + { + "epoch": 0.09292253747394738, + "grad_norm": 2.632480800457415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19160 + }, + { + "epoch": 0.09297103566678347, + "grad_norm": 3.689290679176338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19170 + }, + { + "epoch": 0.09301953385961956, + "grad_norm": 3.3487901873741066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19180 + }, + { + "epoch": 0.09306803205245565, + "grad_norm": 2.4407643195445416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19190 + }, + { + "epoch": 0.09311653024529173, + "grad_norm": 2.048274154731189e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19200 + }, + { + "epoch": 0.09316502843812782, + "grad_norm": 3.034179144378868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19210 + }, + { + "epoch": 0.09321352663096391, + "grad_norm": 2.8704241685773013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19220 + }, + { + "epoch": 0.0932620248238, + "grad_norm": 2.3829129531804938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19230 + }, + { + "epoch": 0.09331052301663609, + "grad_norm": 2.2092133349360665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19240 + }, + { + "epoch": 0.09335902120947218, + "grad_norm": 2.009931677093846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19250 + }, + { + "epoch": 0.09340751940230828, + "grad_norm": 2.511697175577865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19260 + }, + { + "epoch": 0.09345601759514437, + "grad_norm": 2.48993092100136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19270 + }, + { + "epoch": 0.09350451578798046, + "grad_norm": 2.547053554735612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19280 + }, + { + "epoch": 0.09355301398081654, + "grad_norm": 1.9879616957041435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19290 + }, + { + "epoch": 0.09360151217365263, + "grad_norm": 1.9924939351767534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19300 + }, + { + "epoch": 0.09365001036648872, + "grad_norm": 2.5183715024468256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19310 + }, + { + "epoch": 0.09369850855932481, + "grad_norm": 2.5333620214951225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19320 + }, + { + "epoch": 0.0937470067521609, + "grad_norm": 2.084472271235427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19330 + }, + { + "epoch": 0.09379550494499699, + "grad_norm": 1.668733034421166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19340 + }, + { + "epoch": 0.09384400313783307, + "grad_norm": 1.8322506321055698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19350 + }, + { + "epoch": 0.09389250133066916, + "grad_norm": 2.2229537535167765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19360 + }, + { + "epoch": 0.09394099952350525, + "grad_norm": 2.065525450234418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19370 + }, + { + "epoch": 0.09398949771634134, + "grad_norm": 2.1317487153282855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19380 + }, + { + "epoch": 0.09403799590917744, + "grad_norm": 1.6743430251153768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19390 + }, + { + "epoch": 0.09408649410201353, + "grad_norm": 1.669042603680282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19400 + }, + { + "epoch": 0.09413499229484962, + "grad_norm": 2.7902358397113858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19410 + }, + { + "epoch": 0.0941834904876857, + "grad_norm": 2.080309968732763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19420 + }, + { + "epoch": 0.0942319886805218, + "grad_norm": 1.922382807606482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19430 + }, + { + "epoch": 0.09428048687335788, + "grad_norm": 1.603944951966696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19440 + }, + { + "epoch": 0.09432898506619397, + "grad_norm": 1.5206254602162517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19450 + }, + { + "epoch": 0.09437748325903006, + "grad_norm": 3.512666125971009e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19460 + }, + { + "epoch": 0.09442598145186615, + "grad_norm": 2.1357079731387785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19470 + }, + { + "epoch": 0.09447447964470224, + "grad_norm": 1.936706439664704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19480 + }, + { + "epoch": 0.09452297783753832, + "grad_norm": 1.5103963733054115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19490 + }, + { + "epoch": 0.09457147603037441, + "grad_norm": 1.5786929452588083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19500 + }, + { + "epoch": 0.0946199742232105, + "grad_norm": 2.166707190554007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19510 + }, + { + "epoch": 0.0946684724160466, + "grad_norm": 1.604471322025347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19520 + }, + { + "epoch": 0.09471697060888269, + "grad_norm": 1.94129825104028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19530 + }, + { + "epoch": 0.09476546880171878, + "grad_norm": 1.3118431070324732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19540 + }, + { + "epoch": 0.09481396699455487, + "grad_norm": 1.3516572607841226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19550 + }, + { + "epoch": 0.09486246518739096, + "grad_norm": 1.8054907968689804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19560 + }, + { + "epoch": 0.09491096338022705, + "grad_norm": 1.971890469576465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19570 + }, + { + "epoch": 0.09495946157306313, + "grad_norm": 1.8399214241071604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19580 + }, + { + "epoch": 0.09500795976589922, + "grad_norm": 1.5263840396073647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19590 + }, + { + "epoch": 0.09505645795873531, + "grad_norm": 1.4463972775047296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19600 + }, + { + "epoch": 0.0951049561515714, + "grad_norm": 1.7423593590137898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19610 + }, + { + "epoch": 0.09515345434440749, + "grad_norm": 1.6412990362368873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19620 + }, + { + "epoch": 0.09520195253724358, + "grad_norm": 1.9239641915191896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19630 + }, + { + "epoch": 0.09525045073007966, + "grad_norm": 1.303108319916646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19640 + }, + { + "epoch": 0.09529894892291577, + "grad_norm": 1.3691156937056803e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19650 + }, + { + "epoch": 0.09534744711575185, + "grad_norm": 1.5759519556013402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19660 + }, + { + "epoch": 0.09539594530858794, + "grad_norm": 1.6770574120528181e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19670 + }, + { + "epoch": 0.09544444350142403, + "grad_norm": 1.6381726481995429e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19680 + }, + { + "epoch": 0.09549294169426012, + "grad_norm": 1.2723085092147812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19690 + }, + { + "epoch": 0.09554143988709621, + "grad_norm": 1.4118952549324604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19700 + }, + { + "epoch": 0.0955899380799323, + "grad_norm": 1.4487475255009485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19710 + }, + { + "epoch": 0.09563843627276838, + "grad_norm": 1.5510127013840247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19720 + }, + { + "epoch": 0.09568693446560447, + "grad_norm": 1.4258844203141052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19730 + }, + { + "epoch": 0.09573543265844056, + "grad_norm": 1.2020141184621025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19740 + }, + { + "epoch": 0.09578393085127665, + "grad_norm": 1.1676331723720068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19750 + }, + { + "epoch": 0.09583242904411274, + "grad_norm": 1.721931653264619e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19760 + }, + { + "epoch": 0.09588092723694883, + "grad_norm": 1.3846707815901027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19770 + }, + { + "epoch": 0.09592942542978493, + "grad_norm": 1.4056962527320138e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19780 + }, + { + "epoch": 0.09597792362262102, + "grad_norm": 1.1928323147003539e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19790 + }, + { + "epoch": 0.0960264218154571, + "grad_norm": 2.7717987904907204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19800 + }, + { + "epoch": 0.0960749200082932, + "grad_norm": 1.5215857729344862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19810 + }, + { + "epoch": 0.09612341820112928, + "grad_norm": 1.3933533864474157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19820 + }, + { + "epoch": 0.09617191639396537, + "grad_norm": 1.3994864502819837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19830 + }, + { + "epoch": 0.09622041458680146, + "grad_norm": 9.687853435025318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19840 + }, + { + "epoch": 0.09626891277963755, + "grad_norm": 1.0552606681812904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19850 + }, + { + "epoch": 0.09631741097247364, + "grad_norm": 1.203691908813198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19860 + }, + { + "epoch": 0.09636590916530972, + "grad_norm": 1.1726572211046005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19870 + }, + { + "epoch": 0.09641440735814581, + "grad_norm": 1.3539050769395544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19880 + }, + { + "epoch": 0.0964629055509819, + "grad_norm": 1.0921107786998618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19890 + }, + { + "epoch": 0.096511403743818, + "grad_norm": 1.0219922614851384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19900 + }, + { + "epoch": 0.09655990193665409, + "grad_norm": 1.221785282723431e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19910 + }, + { + "epoch": 0.09660840012949018, + "grad_norm": 1.319326770499174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19920 + }, + { + "epoch": 0.09665689832232627, + "grad_norm": 1.3655952670887928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19930 + }, + { + "epoch": 0.09670539651516236, + "grad_norm": 9.45780300298793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19940 + }, + { + "epoch": 0.09675389470799844, + "grad_norm": 9.969759275918477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19950 + }, + { + "epoch": 0.09680239290083453, + "grad_norm": 2.3923769276734674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19960 + }, + { + "epoch": 0.09685089109367062, + "grad_norm": 1.2957237913724384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19970 + }, + { + "epoch": 0.09689938928650671, + "grad_norm": 1.1346739938744577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19980 + }, + { + "epoch": 0.0969478874793428, + "grad_norm": 8.641493991490279e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 19990 + }, + { + "epoch": 0.09699638567217889, + "grad_norm": 9.899437145577394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20000 + }, + { + "epoch": 0.09704488386501497, + "grad_norm": 1.2385381751300883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20010 + }, + { + "epoch": 0.09709338205785106, + "grad_norm": 1.4503340253213537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20020 + }, + { + "epoch": 0.09714188025068717, + "grad_norm": 1.1447101542216842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20030 + }, + { + "epoch": 0.09719037844352325, + "grad_norm": 9.743746431922773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20040 + }, + { + "epoch": 0.09723887663635934, + "grad_norm": 8.505538744429941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20050 + }, + { + "epoch": 0.09728737482919543, + "grad_norm": 1.1724888508979348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20060 + }, + { + "epoch": 0.09733587302203152, + "grad_norm": 1.2212897217978025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20070 + }, + { + "epoch": 0.09738437121486761, + "grad_norm": 1.117742385758902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20080 + }, + { + "epoch": 0.0974328694077037, + "grad_norm": 8.724947520022397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20090 + }, + { + "epoch": 0.09748136760053978, + "grad_norm": 7.731408686595387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20100 + }, + { + "epoch": 0.09752986579337587, + "grad_norm": 1.0107265779879526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20110 + }, + { + "epoch": 0.09757836398621196, + "grad_norm": 1.2341031379037304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20120 + }, + { + "epoch": 0.09762686217904805, + "grad_norm": 4.699032160715433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20130 + }, + { + "epoch": 0.09767536037188414, + "grad_norm": 8.665609811941977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20140 + }, + { + "epoch": 0.09772385856472023, + "grad_norm": 6.886324968036206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20150 + }, + { + "epoch": 0.09777235675755633, + "grad_norm": 1.0018541161116445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20160 + }, + { + "epoch": 0.09782085495039242, + "grad_norm": 1.0784526693896623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20170 + }, + { + "epoch": 0.0978693531432285, + "grad_norm": 9.090478556572634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20180 + }, + { + "epoch": 0.09791785133606459, + "grad_norm": 7.703486630816769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20190 + }, + { + "epoch": 0.09796634952890068, + "grad_norm": 8.331101071235025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20200 + }, + { + "epoch": 0.09801484772173677, + "grad_norm": 9.546561159368139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20210 + }, + { + "epoch": 0.09806334591457286, + "grad_norm": 1.0348411478844355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20220 + }, + { + "epoch": 0.09811184410740895, + "grad_norm": 1.7099522665375844e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20230 + }, + { + "epoch": 0.09816034230024503, + "grad_norm": 8.782951681496343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20240 + }, + { + "epoch": 0.09820884049308112, + "grad_norm": 6.896180479998293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20250 + }, + { + "epoch": 0.09825733868591721, + "grad_norm": 1.0092347793033696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20260 + }, + { + "epoch": 0.0983058368787533, + "grad_norm": 1.0979927083099028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20270 + }, + { + "epoch": 0.09835433507158939, + "grad_norm": 8.942297995417903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20280 + }, + { + "epoch": 0.09840283326442549, + "grad_norm": 7.737818350506132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20290 + }, + { + "epoch": 0.09845133145726158, + "grad_norm": 7.287746939255157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20300 + }, + { + "epoch": 0.09849982965009767, + "grad_norm": 8.949626817411627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20310 + }, + { + "epoch": 0.09854832784293376, + "grad_norm": 8.531705475434137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20320 + }, + { + "epoch": 0.09859682603576984, + "grad_norm": 9.204479169966362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20330 + }, + { + "epoch": 0.09864532422860593, + "grad_norm": 6.055503263269202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20340 + }, + { + "epoch": 0.09869382242144202, + "grad_norm": 6.924840931787912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20350 + }, + { + "epoch": 0.09874232061427811, + "grad_norm": 8.385323440052161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20360 + }, + { + "epoch": 0.0987908188071142, + "grad_norm": 1.0110489938597311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20370 + }, + { + "epoch": 0.09883931699995029, + "grad_norm": 9.852774383034557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20380 + }, + { + "epoch": 0.09888781519278637, + "grad_norm": 5.893029992876109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20390 + }, + { + "epoch": 0.09893631338562246, + "grad_norm": 7.453293733306054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20400 + }, + { + "epoch": 0.09898481157845855, + "grad_norm": 8.616641480330145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20410 + }, + { + "epoch": 0.09903330977129465, + "grad_norm": 8.274554375020671e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20420 + }, + { + "epoch": 0.09908180796413074, + "grad_norm": 1.0230132829747163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20430 + }, + { + "epoch": 0.09913030615696683, + "grad_norm": 6.009162234477117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20440 + }, + { + "epoch": 0.09917880434980292, + "grad_norm": 5.462789545163105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20450 + }, + { + "epoch": 0.099227302542639, + "grad_norm": 7.983967407199088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20460 + }, + { + "epoch": 0.0992758007354751, + "grad_norm": 9.188345870825287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20470 + }, + { + "epoch": 0.09932429892831118, + "grad_norm": 7.814416562723636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20480 + }, + { + "epoch": 0.09937279712114727, + "grad_norm": 7.133576787055063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20490 + }, + { + "epoch": 0.09942129531398336, + "grad_norm": 1.050631453836104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20500 + }, + { + "epoch": 0.09946979350681945, + "grad_norm": 7.983945806699921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20510 + }, + { + "epoch": 0.09951829169965554, + "grad_norm": 7.543387141595304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20520 + }, + { + "epoch": 0.09956678989249163, + "grad_norm": 1.3798797908748384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20530 + }, + { + "epoch": 0.09961528808532773, + "grad_norm": 6.587553116332856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20540 + }, + { + "epoch": 0.09966378627816382, + "grad_norm": 6.375594239216298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20550 + }, + { + "epoch": 0.0997122844709999, + "grad_norm": 7.134474913073063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20560 + }, + { + "epoch": 0.09976078266383599, + "grad_norm": 7.520268923144613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20570 + }, + { + "epoch": 0.09980928085667208, + "grad_norm": 7.393413739009702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20580 + }, + { + "epoch": 0.09985777904950817, + "grad_norm": 5.275890657685522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20590 + }, + { + "epoch": 0.09990627724234426, + "grad_norm": 5.924204060647753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20600 + }, + { + "epoch": 0.09995477543518035, + "grad_norm": 7.551957992291136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20610 + }, + { + "epoch": 0.10000327362801643, + "grad_norm": 7.377470296887623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20620 + }, + { + "epoch": 0.10005177182085252, + "grad_norm": 7.211145316432521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20630 + }, + { + "epoch": 0.10010027001368861, + "grad_norm": 5.016741511099099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20640 + }, + { + "epoch": 0.1001487682065247, + "grad_norm": 4.3887391143471177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20650 + }, + { + "epoch": 0.10019726639936079, + "grad_norm": 7.207324870250886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20660 + }, + { + "epoch": 0.10024576459219689, + "grad_norm": 7.218881137305289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20670 + }, + { + "epoch": 0.10029426278503298, + "grad_norm": 6.312631057880935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20680 + }, + { + "epoch": 0.10034276097786907, + "grad_norm": 5.226550001680152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20690 + }, + { + "epoch": 0.10039125917070515, + "grad_norm": 5.859604357283388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20700 + }, + { + "epoch": 0.10043975736354124, + "grad_norm": 6.606741749237699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20710 + }, + { + "epoch": 0.10048825555637733, + "grad_norm": 6.673070060969621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20720 + }, + { + "epoch": 0.10053675374921342, + "grad_norm": 6.198134201440553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20730 + }, + { + "epoch": 0.10058525194204951, + "grad_norm": 5.102324394101743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20740 + }, + { + "epoch": 0.1006337501348856, + "grad_norm": 5.034441983298166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20750 + }, + { + "epoch": 0.10068224832772169, + "grad_norm": 6.62740148982266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20760 + }, + { + "epoch": 0.10073074652055777, + "grad_norm": 2.6724155759438872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20770 + }, + { + "epoch": 0.10077924471339386, + "grad_norm": 5.812407835037448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20780 + }, + { + "epoch": 0.10082774290622995, + "grad_norm": 4.0881479890231276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20790 + }, + { + "epoch": 0.10087624109906605, + "grad_norm": 5.207247681937588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20800 + }, + { + "epoch": 0.10092473929190214, + "grad_norm": 6.602027156077384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20810 + }, + { + "epoch": 0.10097323748473823, + "grad_norm": 5.888615532967378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20820 + }, + { + "epoch": 0.10102173567757432, + "grad_norm": 5.759746954936418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20830 + }, + { + "epoch": 0.1010702338704104, + "grad_norm": 4.1424581809224037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20840 + }, + { + "epoch": 0.1011187320632465, + "grad_norm": 4.62143390222991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20850 + }, + { + "epoch": 0.10116723025608258, + "grad_norm": 5.739790935876954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20860 + }, + { + "epoch": 0.10121572844891867, + "grad_norm": 7.159312644944293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20870 + }, + { + "epoch": 0.10126422664175476, + "grad_norm": 5.898997983422305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20880 + }, + { + "epoch": 0.10131272483459085, + "grad_norm": 4.7333139718830353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20890 + }, + { + "epoch": 0.10136122302742694, + "grad_norm": 4.4299909518485947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20900 + }, + { + "epoch": 0.10140972122026302, + "grad_norm": 6.112887263043376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20910 + }, + { + "epoch": 0.10145821941309911, + "grad_norm": 5.160080718269455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20920 + }, + { + "epoch": 0.10150671760593521, + "grad_norm": 5.807415277558903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20930 + }, + { + "epoch": 0.1015552157987713, + "grad_norm": 4.06186359214189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20940 + }, + { + "epoch": 0.10160371399160739, + "grad_norm": 3.9556081787850417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20950 + }, + { + "epoch": 0.10165221218444348, + "grad_norm": 6.186508016980952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20960 + }, + { + "epoch": 0.10170071037727957, + "grad_norm": 6.077576699681231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20970 + }, + { + "epoch": 0.10174920857011566, + "grad_norm": 6.660950475634309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20980 + }, + { + "epoch": 0.10179770676295175, + "grad_norm": 3.73890117089104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 20990 + }, + { + "epoch": 0.10184620495578783, + "grad_norm": 4.483336510929803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21000 + }, + { + "epoch": 0.10189470314862392, + "grad_norm": 5.666757942890399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21010 + }, + { + "epoch": 0.10194320134146001, + "grad_norm": 5.677616172761191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21020 + }, + { + "epoch": 0.1019916995342961, + "grad_norm": 4.851928565585695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21030 + }, + { + "epoch": 0.10204019772713219, + "grad_norm": 3.6225299027137225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21040 + }, + { + "epoch": 0.10208869591996828, + "grad_norm": 4.4428142587094044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21050 + }, + { + "epoch": 0.10213719411280438, + "grad_norm": 6.004468104947591e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21060 + }, + { + "epoch": 0.10218569230564047, + "grad_norm": 5.49535172922333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21070 + }, + { + "epoch": 0.10223419049847655, + "grad_norm": 5.706205001843045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21080 + }, + { + "epoch": 0.10228268869131264, + "grad_norm": 3.6934355307494116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21090 + }, + { + "epoch": 0.10233118688414873, + "grad_norm": 5.094914854453236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21100 + }, + { + "epoch": 0.10237968507698482, + "grad_norm": 5.445751298793766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21110 + }, + { + "epoch": 0.10242818326982091, + "grad_norm": 5.153028155291395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21120 + }, + { + "epoch": 0.102476681462657, + "grad_norm": 5.38497431534779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21130 + }, + { + "epoch": 0.10252517965549308, + "grad_norm": 3.895063400705112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21140 + }, + { + "epoch": 0.10257367784832917, + "grad_norm": 3.6861095509266306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21150 + }, + { + "epoch": 0.10262217604116526, + "grad_norm": 5.379033041208459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21160 + }, + { + "epoch": 0.10267067423400135, + "grad_norm": 4.80501569199987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21170 + }, + { + "epoch": 0.10271917242683745, + "grad_norm": 0.014801470562815666, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21180 + }, + { + "epoch": 0.10276767061967354, + "grad_norm": 7.506379233745974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21190 + }, + { + "epoch": 0.10281616881250963, + "grad_norm": 2.6979682843375485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21200 + }, + { + "epoch": 0.10286466700534572, + "grad_norm": 1.553499350848142e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21210 + }, + { + "epoch": 0.1029131651981818, + "grad_norm": 0.0004518455534707755, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21220 + }, + { + "epoch": 0.1029616633910179, + "grad_norm": 4.180615178484004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21230 + }, + { + "epoch": 0.10301016158385398, + "grad_norm": 1.0373975101174437e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21240 + }, + { + "epoch": 0.10305865977669007, + "grad_norm": 6.537944727824652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21250 + }, + { + "epoch": 0.10310715796952616, + "grad_norm": 1.0696786603148212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21260 + }, + { + "epoch": 0.10315565616236225, + "grad_norm": 1.1058326663260232e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21270 + }, + { + "epoch": 0.10320415435519834, + "grad_norm": 1.2072898698534118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21280 + }, + { + "epoch": 0.10325265254803442, + "grad_norm": 7.644687229912961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21290 + }, + { + "epoch": 0.10330115074087051, + "grad_norm": 7.215260211523855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21300 + }, + { + "epoch": 0.10334964893370661, + "grad_norm": 9.874015631794464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21310 + }, + { + "epoch": 0.1033981471265427, + "grad_norm": 8.386707577301422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21320 + }, + { + "epoch": 0.10344664531937879, + "grad_norm": 1.1086038966823253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21330 + }, + { + "epoch": 0.10349514351221488, + "grad_norm": 7.982899887792883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21340 + }, + { + "epoch": 0.10354364170505097, + "grad_norm": 6.391039164554968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21350 + }, + { + "epoch": 0.10359213989788706, + "grad_norm": 1.346480871688982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21360 + }, + { + "epoch": 0.10364063809072314, + "grad_norm": 1.101803604797169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21370 + }, + { + "epoch": 0.10368913628355923, + "grad_norm": 9.409077392774634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21380 + }, + { + "epoch": 0.10373763447639532, + "grad_norm": 7.956479635140568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21390 + }, + { + "epoch": 0.10378613266923141, + "grad_norm": 5.530770295081311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21400 + }, + { + "epoch": 0.1038346308620675, + "grad_norm": 9.684860060588107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21410 + }, + { + "epoch": 0.10388312905490359, + "grad_norm": 8.33775970932038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21420 + }, + { + "epoch": 0.10393162724773967, + "grad_norm": 8.410263490077341e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21430 + }, + { + "epoch": 0.10398012544057578, + "grad_norm": 5.846393946740136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21440 + }, + { + "epoch": 0.10402862363341187, + "grad_norm": 4.988536943528743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21450 + }, + { + "epoch": 0.10407712182624795, + "grad_norm": 8.077652182691963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21460 + }, + { + "epoch": 0.10412562001908404, + "grad_norm": 7.161908683883667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21470 + }, + { + "epoch": 0.10417411821192013, + "grad_norm": 7.300513402697106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21480 + }, + { + "epoch": 0.10422261640475622, + "grad_norm": 4.201545209525648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21490 + }, + { + "epoch": 0.10427111459759231, + "grad_norm": 7.886474691076728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21500 + }, + { + "epoch": 0.1043196127904284, + "grad_norm": 8.018528774300648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21510 + }, + { + "epoch": 0.10436811098326448, + "grad_norm": 9.076887863557204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21520 + }, + { + "epoch": 0.10441660917610057, + "grad_norm": 6.960354994589579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21530 + }, + { + "epoch": 0.10446510736893666, + "grad_norm": 3.497692375731276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21540 + }, + { + "epoch": 0.10451360556177275, + "grad_norm": 3.9439203192159766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21550 + }, + { + "epoch": 0.10456210375460884, + "grad_norm": 5.50356844541966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21560 + }, + { + "epoch": 0.10461060194744494, + "grad_norm": 7.149569114517362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21570 + }, + { + "epoch": 0.10465910014028103, + "grad_norm": 6.803826977375138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21580 + }, + { + "epoch": 0.10470759833311712, + "grad_norm": 4.0523542566006654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21590 + }, + { + "epoch": 0.1047560965259532, + "grad_norm": 1.7988007812164142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21600 + }, + { + "epoch": 0.10480459471878929, + "grad_norm": 6.51296886644559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21610 + }, + { + "epoch": 0.10485309291162538, + "grad_norm": 5.51456594166666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21620 + }, + { + "epoch": 0.10490159110446147, + "grad_norm": 6.872344897601579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21630 + }, + { + "epoch": 0.10495008929729756, + "grad_norm": 4.044048296236724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21640 + }, + { + "epoch": 0.10499858749013365, + "grad_norm": 3.994568373855145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21650 + }, + { + "epoch": 0.10504708568296973, + "grad_norm": 6.695740353279689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21660 + }, + { + "epoch": 0.10509558387580582, + "grad_norm": 5.264544142846717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21670 + }, + { + "epoch": 0.10514408206864191, + "grad_norm": 5.42930422398058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21680 + }, + { + "epoch": 0.105192580261478, + "grad_norm": 3.2852582876330416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21690 + }, + { + "epoch": 0.1052410784543141, + "grad_norm": 5.051211360296293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21700 + }, + { + "epoch": 0.10528957664715019, + "grad_norm": 6.074937459743524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21710 + }, + { + "epoch": 0.10533807483998628, + "grad_norm": 5.50036645563523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21720 + }, + { + "epoch": 0.10538657303282237, + "grad_norm": 6.76115462283633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21730 + }, + { + "epoch": 0.10543507122565846, + "grad_norm": 4.014144678876619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21740 + }, + { + "epoch": 0.10548356941849454, + "grad_norm": 3.57643074266889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21750 + }, + { + "epoch": 0.10553206761133063, + "grad_norm": 0.23360250890254974, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 21760 + }, + { + "epoch": 0.10558056580416672, + "grad_norm": 7.776712664053775e-06, + "learning_rate": 0.0002, + "loss": 0.0042, + "step": 21770 + }, + { + "epoch": 0.10562906399700281, + "grad_norm": 0.0006266055861487985, + "learning_rate": 0.0002, + "loss": 0.0018, + "step": 21780 + }, + { + "epoch": 0.1056775621898389, + "grad_norm": 0.0010826231446117163, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 21790 + }, + { + "epoch": 0.10572606038267499, + "grad_norm": 0.00036675206501968205, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 21800 + }, + { + "epoch": 0.10577455857551107, + "grad_norm": 0.0002457602240610868, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 21810 + }, + { + "epoch": 0.10582305676834718, + "grad_norm": 0.01875431276857853, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 21820 + }, + { + "epoch": 0.10587155496118326, + "grad_norm": 0.00017285092326346785, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21830 + }, + { + "epoch": 0.10592005315401935, + "grad_norm": 0.0008555006934329867, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21840 + }, + { + "epoch": 0.10596855134685544, + "grad_norm": 6.20639548287727e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21850 + }, + { + "epoch": 0.10601704953969153, + "grad_norm": 0.00011582093429751694, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21860 + }, + { + "epoch": 0.10606554773252762, + "grad_norm": 5.155202961759642e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21870 + }, + { + "epoch": 0.1061140459253637, + "grad_norm": 2.8882303013233468e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21880 + }, + { + "epoch": 0.1061625441181998, + "grad_norm": 9.633894478611182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21890 + }, + { + "epoch": 0.10621104231103588, + "grad_norm": 8.008999429875985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21900 + }, + { + "epoch": 0.10625954050387197, + "grad_norm": 1.4888773876009509e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21910 + }, + { + "epoch": 0.10630803869670806, + "grad_norm": 1.6868405509740114e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21920 + }, + { + "epoch": 0.10635653688954415, + "grad_norm": 1.526873165857978e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21930 + }, + { + "epoch": 0.10640503508238024, + "grad_norm": 7.794832526997197e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21940 + }, + { + "epoch": 0.10645353327521634, + "grad_norm": 5.343002158042509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21950 + }, + { + "epoch": 0.10650203146805243, + "grad_norm": 2.8454154744395055e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21960 + }, + { + "epoch": 0.10655052966088852, + "grad_norm": 7.909132546046749e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21970 + }, + { + "epoch": 0.1065990278537246, + "grad_norm": 1.1869598893099464e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21980 + }, + { + "epoch": 0.10664752604656069, + "grad_norm": 5.437820618681144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 21990 + }, + { + "epoch": 0.10669602423939678, + "grad_norm": 5.42151565241511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22000 + }, + { + "epoch": 0.10674452243223287, + "grad_norm": 9.437713742954656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22010 + }, + { + "epoch": 0.10679302062506896, + "grad_norm": 9.533388947602361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22020 + }, + { + "epoch": 0.10684151881790505, + "grad_norm": 1.1240921594435349e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22030 + }, + { + "epoch": 0.10689001701074113, + "grad_norm": 4.670089310820913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22040 + }, + { + "epoch": 0.10693851520357722, + "grad_norm": 1.3153666259313468e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22050 + }, + { + "epoch": 0.10698701339641331, + "grad_norm": 8.714850082469638e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22060 + }, + { + "epoch": 0.1070355115892494, + "grad_norm": 1.705924660200253e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22070 + }, + { + "epoch": 0.1070840097820855, + "grad_norm": 8.397927558689844e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 22080 + }, + { + "epoch": 0.10713250797492159, + "grad_norm": 3.5992068205814576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22090 + }, + { + "epoch": 0.10718100616775768, + "grad_norm": 2.029834831773769e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22100 + }, + { + "epoch": 0.10722950436059377, + "grad_norm": 9.299699740950018e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22110 + }, + { + "epoch": 0.10727800255342985, + "grad_norm": 8.073537173913792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22120 + }, + { + "epoch": 0.10732650074626594, + "grad_norm": 2.394507646386046e-05, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 22130 + }, + { + "epoch": 0.10737499893910203, + "grad_norm": 5.379521826398559e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22140 + }, + { + "epoch": 0.10742349713193812, + "grad_norm": 0.00013074571324978024, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22150 + }, + { + "epoch": 0.10747199532477421, + "grad_norm": 9.148874232778326e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22160 + }, + { + "epoch": 0.1075204935176103, + "grad_norm": 9.387901809532195e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22170 + }, + { + "epoch": 0.10756899171044638, + "grad_norm": 3.541908517945558e-05, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 22180 + }, + { + "epoch": 0.10761748990328247, + "grad_norm": 4.438284577190643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22190 + }, + { + "epoch": 0.10766598809611856, + "grad_norm": 5.858428266947158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22200 + }, + { + "epoch": 0.10771448628895466, + "grad_norm": 0.014751948416233063, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 22210 + }, + { + "epoch": 0.10776298448179075, + "grad_norm": 0.0009222623193636537, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 22220 + }, + { + "epoch": 0.10781148267462684, + "grad_norm": 0.00026052678003907204, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22230 + }, + { + "epoch": 0.10785998086746293, + "grad_norm": 3.9676255255471915e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22240 + }, + { + "epoch": 0.10790847906029902, + "grad_norm": 2.5132852897513658e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22250 + }, + { + "epoch": 0.1079569772531351, + "grad_norm": 6.08181107963901e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22260 + }, + { + "epoch": 0.1080054754459712, + "grad_norm": 3.94379640056286e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22270 + }, + { + "epoch": 0.10805397363880728, + "grad_norm": 3.294203997938894e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22280 + }, + { + "epoch": 0.10810247183164337, + "grad_norm": 1.7796988686313853e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22290 + }, + { + "epoch": 0.10815097002447946, + "grad_norm": 1.4341861060529482e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22300 + }, + { + "epoch": 0.10819946821731555, + "grad_norm": 3.993170685134828e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22310 + }, + { + "epoch": 0.10824796641015164, + "grad_norm": 1.7617941921344027e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22320 + }, + { + "epoch": 0.10829646460298772, + "grad_norm": 1.718161001917906e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22330 + }, + { + "epoch": 0.10834496279582383, + "grad_norm": 2.1034982637502253e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 22340 + }, + { + "epoch": 0.10839346098865991, + "grad_norm": 7.274966628756374e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22350 + }, + { + "epoch": 0.108441959181496, + "grad_norm": 0.016658147796988487, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 22360 + }, + { + "epoch": 0.10849045737433209, + "grad_norm": 0.0016002609627321362, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 22370 + }, + { + "epoch": 0.10853895556716818, + "grad_norm": 5.324827725416981e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 22380 + }, + { + "epoch": 0.10858745376000427, + "grad_norm": 5.790680916106794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22390 + }, + { + "epoch": 0.10863595195284036, + "grad_norm": 8.608653843111824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22400 + }, + { + "epoch": 0.10868445014567645, + "grad_norm": 1.1782580259023234e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22410 + }, + { + "epoch": 0.10873294833851253, + "grad_norm": 9.284390216635074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22420 + }, + { + "epoch": 0.10878144653134862, + "grad_norm": 9.172625141218305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22430 + }, + { + "epoch": 0.10882994472418471, + "grad_norm": 4.564365099213319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22440 + }, + { + "epoch": 0.1088784429170208, + "grad_norm": 4.946683020534692e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22450 + }, + { + "epoch": 0.1089269411098569, + "grad_norm": 8.520705705450382e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22460 + }, + { + "epoch": 0.10897543930269299, + "grad_norm": 7.295157502085203e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22470 + }, + { + "epoch": 0.10902393749552908, + "grad_norm": 7.860144251026213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22480 + }, + { + "epoch": 0.10907243568836517, + "grad_norm": 4.895467554888455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22490 + }, + { + "epoch": 0.10912093388120125, + "grad_norm": 4.236668701196322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22500 + }, + { + "epoch": 0.10916943207403734, + "grad_norm": 6.715736617479706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22510 + }, + { + "epoch": 0.10921793026687343, + "grad_norm": 7.831509719835594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22520 + }, + { + "epoch": 0.10926642845970952, + "grad_norm": 8.731284651730675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22530 + }, + { + "epoch": 0.10931492665254561, + "grad_norm": 3.7661711758119054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22540 + }, + { + "epoch": 0.1093634248453817, + "grad_norm": 4.070969225722365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22550 + }, + { + "epoch": 0.10941192303821778, + "grad_norm": 2.7346997740096413e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22560 + }, + { + "epoch": 0.10946042123105387, + "grad_norm": 1.3580548511527013e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22570 + }, + { + "epoch": 0.10950891942388996, + "grad_norm": 7.38797098165378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22580 + }, + { + "epoch": 0.10955741761672606, + "grad_norm": 2.646017492224928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22590 + }, + { + "epoch": 0.10960591580956215, + "grad_norm": 3.0674971185362665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22600 + }, + { + "epoch": 0.10965441400239824, + "grad_norm": 4.945105047227116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22610 + }, + { + "epoch": 0.10970291219523433, + "grad_norm": 4.484401415538741e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22620 + }, + { + "epoch": 0.10975141038807042, + "grad_norm": 5.391961167333648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22630 + }, + { + "epoch": 0.1097999085809065, + "grad_norm": 3.6853500660072314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22640 + }, + { + "epoch": 0.1098484067737426, + "grad_norm": 2.673648623385816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22650 + }, + { + "epoch": 0.10989690496657868, + "grad_norm": 4.63565083919093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22660 + }, + { + "epoch": 0.10994540315941477, + "grad_norm": 3.8960192796366755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22670 + }, + { + "epoch": 0.10999390135225086, + "grad_norm": 3.799500518653076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22680 + }, + { + "epoch": 0.11004239954508695, + "grad_norm": 2.445063273626147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22690 + }, + { + "epoch": 0.11009089773792304, + "grad_norm": 2.408650743745966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22700 + }, + { + "epoch": 0.11013939593075912, + "grad_norm": 2.309771480213385e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22710 + }, + { + "epoch": 0.11018789412359523, + "grad_norm": 4.345735305832932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22720 + }, + { + "epoch": 0.11023639231643131, + "grad_norm": 3.6004578305437462e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22730 + }, + { + "epoch": 0.1102848905092674, + "grad_norm": 2.426954324619146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22740 + }, + { + "epoch": 0.11033338870210349, + "grad_norm": 2.784251819321071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22750 + }, + { + "epoch": 0.11038188689493958, + "grad_norm": 3.864761310978793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22760 + }, + { + "epoch": 0.11043038508777567, + "grad_norm": 3.899209787050495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22770 + }, + { + "epoch": 0.11047888328061176, + "grad_norm": 3.4804531878762646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22780 + }, + { + "epoch": 0.11052738147344784, + "grad_norm": 3.673887113109231e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22790 + }, + { + "epoch": 0.11057587966628393, + "grad_norm": 1.9468773189146305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22800 + }, + { + "epoch": 0.11062437785912002, + "grad_norm": 3.43999909091508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22810 + }, + { + "epoch": 0.11067287605195611, + "grad_norm": 1.0432790986669715e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22820 + }, + { + "epoch": 0.1107213742447922, + "grad_norm": 3.9461589040001854e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22830 + }, + { + "epoch": 0.11076987243762829, + "grad_norm": 2.0322870568634244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22840 + }, + { + "epoch": 0.11081837063046439, + "grad_norm": 1.766784180290415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22850 + }, + { + "epoch": 0.11086686882330048, + "grad_norm": 3.332879487061291e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22860 + }, + { + "epoch": 0.11091536701613657, + "grad_norm": 1.696867730061058e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22870 + }, + { + "epoch": 0.11096386520897265, + "grad_norm": 5.16671343575581e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22880 + }, + { + "epoch": 0.11101236340180874, + "grad_norm": 2.078442548736348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22890 + }, + { + "epoch": 0.11106086159464483, + "grad_norm": 2.659028041307465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22900 + }, + { + "epoch": 0.11110935978748092, + "grad_norm": 5.74626938032452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22910 + }, + { + "epoch": 0.111157857980317, + "grad_norm": 3.4008867260126863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22920 + }, + { + "epoch": 0.1112063561731531, + "grad_norm": 3.6432111301110126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22930 + }, + { + "epoch": 0.11125485436598918, + "grad_norm": 1.640051550566568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22940 + }, + { + "epoch": 0.11130335255882527, + "grad_norm": 1.636706542740285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22950 + }, + { + "epoch": 0.11135185075166136, + "grad_norm": 2.942754690593574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22960 + }, + { + "epoch": 0.11140034894449745, + "grad_norm": 2.5069034563784953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22970 + }, + { + "epoch": 0.11144884713733355, + "grad_norm": 2.7980972845398355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22980 + }, + { + "epoch": 0.11149734533016964, + "grad_norm": 1.972799509530887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 22990 + }, + { + "epoch": 0.11154584352300573, + "grad_norm": 1.451983735023532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23000 + }, + { + "epoch": 0.11159434171584182, + "grad_norm": 2.4769576612015953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23010 + }, + { + "epoch": 0.1116428399086779, + "grad_norm": 2.8308425044087926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23020 + }, + { + "epoch": 0.11169133810151399, + "grad_norm": 7.220468432933558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23030 + }, + { + "epoch": 0.11173983629435008, + "grad_norm": 1.423821004209458e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23040 + }, + { + "epoch": 0.11178833448718617, + "grad_norm": 3.98254769606865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23050 + }, + { + "epoch": 0.11183683268002226, + "grad_norm": 2.648459940246539e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23060 + }, + { + "epoch": 0.11188533087285835, + "grad_norm": 3.0729947866348084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23070 + }, + { + "epoch": 0.11193382906569443, + "grad_norm": 2.346853989365627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23080 + }, + { + "epoch": 0.11198232725853052, + "grad_norm": 1.3454475720209302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23090 + }, + { + "epoch": 0.11203082545136663, + "grad_norm": 1.7302849073530524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23100 + }, + { + "epoch": 0.11207932364420271, + "grad_norm": 2.4092969397315755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23110 + }, + { + "epoch": 0.1121278218370388, + "grad_norm": 2.330798906768905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23120 + }, + { + "epoch": 0.11217632002987489, + "grad_norm": 2.29740840040904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23130 + }, + { + "epoch": 0.11222481822271098, + "grad_norm": 1.1682257081702119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23140 + }, + { + "epoch": 0.11227331641554707, + "grad_norm": 1.2917631693198928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23150 + }, + { + "epoch": 0.11232181460838316, + "grad_norm": 2.1104237930558156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23160 + }, + { + "epoch": 0.11237031280121924, + "grad_norm": 3.3094161153712776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23170 + }, + { + "epoch": 0.11241881099405533, + "grad_norm": 1.929852487592143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23180 + }, + { + "epoch": 0.11246730918689142, + "grad_norm": 1.1468665661595878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23190 + }, + { + "epoch": 0.11251580737972751, + "grad_norm": 1.2841240959460265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23200 + }, + { + "epoch": 0.1125643055725636, + "grad_norm": 2.1999117052473594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23210 + }, + { + "epoch": 0.11261280376539969, + "grad_norm": 4.192541382508352e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23220 + }, + { + "epoch": 0.11266130195823579, + "grad_norm": 2.240008825538098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23230 + }, + { + "epoch": 0.11270980015107188, + "grad_norm": 1.6200856407522224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23240 + }, + { + "epoch": 0.11275829834390796, + "grad_norm": 2.0056520497746533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23250 + }, + { + "epoch": 0.11280679653674405, + "grad_norm": 2.151191210941761e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23260 + }, + { + "epoch": 0.11285529472958014, + "grad_norm": 2.1132211259100586e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23270 + }, + { + "epoch": 0.11290379292241623, + "grad_norm": 1.8649519688551663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23280 + }, + { + "epoch": 0.11295229111525232, + "grad_norm": 1.0769995242299046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23290 + }, + { + "epoch": 0.1130007893080884, + "grad_norm": 1.8351857988818665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23300 + }, + { + "epoch": 0.1130492875009245, + "grad_norm": 2.2477049697045004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23310 + }, + { + "epoch": 0.11309778569376058, + "grad_norm": 1.7930681224243017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23320 + }, + { + "epoch": 0.11314628388659667, + "grad_norm": 2.5061381165869534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23330 + }, + { + "epoch": 0.11319478207943276, + "grad_norm": 1.6541937384317862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23340 + }, + { + "epoch": 0.11324328027226885, + "grad_norm": 1.202475004902226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23350 + }, + { + "epoch": 0.11329177846510495, + "grad_norm": 1.962250962606049e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23360 + }, + { + "epoch": 0.11334027665794104, + "grad_norm": 2.0554527964122826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23370 + }, + { + "epoch": 0.11338877485077713, + "grad_norm": 1.6416239532190957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23380 + }, + { + "epoch": 0.11343727304361322, + "grad_norm": 1.1839182434414397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23390 + }, + { + "epoch": 0.1134857712364493, + "grad_norm": 1.7136068208856159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23400 + }, + { + "epoch": 0.11353426942928539, + "grad_norm": 1.6962823110588943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23410 + }, + { + "epoch": 0.11358276762212148, + "grad_norm": 2.503222503946745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23420 + }, + { + "epoch": 0.11363126581495757, + "grad_norm": 2.0393740669533145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23430 + }, + { + "epoch": 0.11367976400779366, + "grad_norm": 1.2195438330309116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23440 + }, + { + "epoch": 0.11372826220062975, + "grad_norm": 1.134100898525503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23450 + }, + { + "epoch": 0.11377676039346583, + "grad_norm": 1.6008549437174224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23460 + }, + { + "epoch": 0.11382525858630192, + "grad_norm": 1.489040300839406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23470 + }, + { + "epoch": 0.11387375677913801, + "grad_norm": 1.7102203173635644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23480 + }, + { + "epoch": 0.11392225497197411, + "grad_norm": 1.0227582833977067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23490 + }, + { + "epoch": 0.1139707531648102, + "grad_norm": 9.71014173956064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23500 + }, + { + "epoch": 0.11401925135764629, + "grad_norm": 3.2311779705196386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23510 + }, + { + "epoch": 0.11406774955048238, + "grad_norm": 1.6039922456911881e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23520 + }, + { + "epoch": 0.11411624774331847, + "grad_norm": 1.4211090046956087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23530 + }, + { + "epoch": 0.11416474593615455, + "grad_norm": 1.157888732450374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23540 + }, + { + "epoch": 0.11421324412899064, + "grad_norm": 9.951926358553465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23550 + }, + { + "epoch": 0.11426174232182673, + "grad_norm": 1.4882878076605266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23560 + }, + { + "epoch": 0.11431024051466282, + "grad_norm": 1.7744047227097326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23570 + }, + { + "epoch": 0.11435873870749891, + "grad_norm": 1.6619790130789625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23580 + }, + { + "epoch": 0.114407236900335, + "grad_norm": 8.389038157474715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23590 + }, + { + "epoch": 0.11445573509317108, + "grad_norm": 1.3773330920230364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23600 + }, + { + "epoch": 0.11450423328600719, + "grad_norm": 1.4840593394183088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23610 + }, + { + "epoch": 0.11455273147884328, + "grad_norm": 1.432342287444044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23620 + }, + { + "epoch": 0.11460122967167936, + "grad_norm": 1.4934142882339074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23630 + }, + { + "epoch": 0.11464972786451545, + "grad_norm": 1.097206109079707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23640 + }, + { + "epoch": 0.11469822605735154, + "grad_norm": 8.037367251745309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23650 + }, + { + "epoch": 0.11474672425018763, + "grad_norm": 1.4019000218468136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23660 + }, + { + "epoch": 0.11479522244302372, + "grad_norm": 1.3244136880530277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23670 + }, + { + "epoch": 0.1148437206358598, + "grad_norm": 1.362612351840653e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23680 + }, + { + "epoch": 0.1148922188286959, + "grad_norm": 8.533428399459808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23690 + }, + { + "epoch": 0.11494071702153198, + "grad_norm": 1.259074224435608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23700 + }, + { + "epoch": 0.11498921521436807, + "grad_norm": 1.2290342965570744e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23710 + }, + { + "epoch": 0.11503771340720416, + "grad_norm": 1.1979859664279502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23720 + }, + { + "epoch": 0.11508621160004025, + "grad_norm": 1.3106354117553565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23730 + }, + { + "epoch": 0.11513470979287635, + "grad_norm": 2.3316235910897376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23740 + }, + { + "epoch": 0.11518320798571244, + "grad_norm": 7.653832199139288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23750 + }, + { + "epoch": 0.11523170617854853, + "grad_norm": 1.1372443395885057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23760 + }, + { + "epoch": 0.11528020437138461, + "grad_norm": 1.204953036904044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23770 + }, + { + "epoch": 0.1153287025642207, + "grad_norm": 1.3708323649552767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23780 + }, + { + "epoch": 0.11537720075705679, + "grad_norm": 9.105532399189542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23790 + }, + { + "epoch": 0.11542569894989288, + "grad_norm": 1.3210761835580342e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23800 + }, + { + "epoch": 0.11547419714272897, + "grad_norm": 1.2779697726728045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23810 + }, + { + "epoch": 0.11552269533556506, + "grad_norm": 9.409483027411625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23820 + }, + { + "epoch": 0.11557119352840114, + "grad_norm": 1.1744756420739577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23830 + }, + { + "epoch": 0.11561969172123723, + "grad_norm": 9.696061624708818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23840 + }, + { + "epoch": 0.11566818991407332, + "grad_norm": 7.793070153638837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23850 + }, + { + "epoch": 0.11571668810690941, + "grad_norm": 1.164658442576183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23860 + }, + { + "epoch": 0.11576518629974551, + "grad_norm": 1.4348102013173047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23870 + }, + { + "epoch": 0.1158136844925816, + "grad_norm": 1.2152532917752978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23880 + }, + { + "epoch": 0.11586218268541769, + "grad_norm": 8.790254923951579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23890 + }, + { + "epoch": 0.11591068087825378, + "grad_norm": 7.191612212409382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23900 + }, + { + "epoch": 0.11595917907108987, + "grad_norm": 1.3725341432291316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23910 + }, + { + "epoch": 0.11600767726392595, + "grad_norm": 1.1355210745023214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23920 + }, + { + "epoch": 0.11605617545676204, + "grad_norm": 1.172150632555713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23930 + }, + { + "epoch": 0.11610467364959813, + "grad_norm": 1.0832767429747037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23940 + }, + { + "epoch": 0.11615317184243422, + "grad_norm": 6.608524358853174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23950 + }, + { + "epoch": 0.11620167003527031, + "grad_norm": 2.251599426017492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23960 + }, + { + "epoch": 0.1162501682281064, + "grad_norm": 1.0712005860114004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23970 + }, + { + "epoch": 0.11629866642094248, + "grad_norm": 1.1315481742713018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23980 + }, + { + "epoch": 0.11634716461377857, + "grad_norm": 9.961420346371597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 23990 + }, + { + "epoch": 0.11639566280661467, + "grad_norm": 8.210527084884234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24000 + }, + { + "epoch": 0.11644416099945076, + "grad_norm": 1.1258169934080797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24010 + }, + { + "epoch": 0.11649265919228685, + "grad_norm": 9.638359870223212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24020 + }, + { + "epoch": 0.11654115738512294, + "grad_norm": 1.3130029401509091e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24030 + }, + { + "epoch": 0.11658965557795903, + "grad_norm": 6.605906150980445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24040 + }, + { + "epoch": 0.11663815377079512, + "grad_norm": 1.2133864402130712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24050 + }, + { + "epoch": 0.1166866519636312, + "grad_norm": 1.0242629286949523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24060 + }, + { + "epoch": 0.1167351501564673, + "grad_norm": 3.0745518415642437e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24070 + }, + { + "epoch": 0.11678364834930338, + "grad_norm": 1.037932406688924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24080 + }, + { + "epoch": 0.11683214654213947, + "grad_norm": 6.911274681442592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24090 + }, + { + "epoch": 0.11688064473497556, + "grad_norm": 6.282934350565483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24100 + }, + { + "epoch": 0.11692914292781165, + "grad_norm": 1.1463655482657487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24110 + }, + { + "epoch": 0.11697764112064774, + "grad_norm": 1.0937379784081713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24120 + }, + { + "epoch": 0.11702613931348384, + "grad_norm": 1.0269303629684146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24130 + }, + { + "epoch": 0.11707463750631993, + "grad_norm": 6.523794695567631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24140 + }, + { + "epoch": 0.11712313569915601, + "grad_norm": 6.588843461940996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24150 + }, + { + "epoch": 0.1171716338919921, + "grad_norm": 1.067805101229169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24160 + }, + { + "epoch": 0.11722013208482819, + "grad_norm": 9.132868967753893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24170 + }, + { + "epoch": 0.11726863027766428, + "grad_norm": 9.592722562956624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24180 + }, + { + "epoch": 0.11731712847050037, + "grad_norm": 1.1641102446446894e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24190 + }, + { + "epoch": 0.11736562666333646, + "grad_norm": 7.033439715087297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24200 + }, + { + "epoch": 0.11741412485617254, + "grad_norm": 4.519475623965263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24210 + }, + { + "epoch": 0.11746262304900863, + "grad_norm": 1.0738582432168187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24220 + }, + { + "epoch": 0.11751112124184472, + "grad_norm": 9.478621336711512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24230 + }, + { + "epoch": 0.11755961943468081, + "grad_norm": 9.514681096334243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24240 + }, + { + "epoch": 0.11760811762751691, + "grad_norm": 5.863761884938867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24250 + }, + { + "epoch": 0.117656615820353, + "grad_norm": 8.897502539184643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24260 + }, + { + "epoch": 0.11770511401318909, + "grad_norm": 8.52082905566931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24270 + }, + { + "epoch": 0.11775361220602518, + "grad_norm": 1.1257531014052802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24280 + }, + { + "epoch": 0.11780211039886127, + "grad_norm": 5.661435693582462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24290 + }, + { + "epoch": 0.11785060859169735, + "grad_norm": 1.2149487247370416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24300 + }, + { + "epoch": 0.11789910678453344, + "grad_norm": 8.183420732166269e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24310 + }, + { + "epoch": 0.11794760497736953, + "grad_norm": 5.6006265367614105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24320 + }, + { + "epoch": 0.11799610317020562, + "grad_norm": 8.621996698821022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24330 + }, + { + "epoch": 0.1180446013630417, + "grad_norm": 7.418016707561037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24340 + }, + { + "epoch": 0.1180930995558778, + "grad_norm": 6.334742579383601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24350 + }, + { + "epoch": 0.11814159774871388, + "grad_norm": 8.780368716543308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24360 + }, + { + "epoch": 0.11819009594154997, + "grad_norm": 7.940062687339378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24370 + }, + { + "epoch": 0.11823859413438607, + "grad_norm": 2.6166628686041804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24380 + }, + { + "epoch": 0.11828709232722216, + "grad_norm": 3.3887286008393858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24390 + }, + { + "epoch": 0.11833559052005825, + "grad_norm": 5.079967309029598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24400 + }, + { + "epoch": 0.11838408871289434, + "grad_norm": 8.319643143295252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24410 + }, + { + "epoch": 0.11843258690573043, + "grad_norm": 9.762508170751971e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24420 + }, + { + "epoch": 0.11848108509856652, + "grad_norm": 2.5041626940947026e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24430 + }, + { + "epoch": 0.1185295832914026, + "grad_norm": 4.7319383611466037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24440 + }, + { + "epoch": 0.11857808148423869, + "grad_norm": 5.300230441207532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24450 + }, + { + "epoch": 0.11862657967707478, + "grad_norm": 8.143445029418217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24460 + }, + { + "epoch": 0.11867507786991087, + "grad_norm": 8.531781077181222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24470 + }, + { + "epoch": 0.11872357606274696, + "grad_norm": 7.790856102474208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24480 + }, + { + "epoch": 0.11877207425558305, + "grad_norm": 5.055800897935114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24490 + }, + { + "epoch": 0.11882057244841913, + "grad_norm": 7.721524184489681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24500 + }, + { + "epoch": 0.11886907064125524, + "grad_norm": 1.0064475191029487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24510 + }, + { + "epoch": 0.11891756883409133, + "grad_norm": 7.259660037561844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24520 + }, + { + "epoch": 0.11896606702692741, + "grad_norm": 8.292914230878523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24530 + }, + { + "epoch": 0.1190145652197635, + "grad_norm": 4.899346777165192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24540 + }, + { + "epoch": 0.11906306341259959, + "grad_norm": 5.684291295438015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24550 + }, + { + "epoch": 0.11911156160543568, + "grad_norm": 7.462356848009222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24560 + }, + { + "epoch": 0.11916005979827177, + "grad_norm": 8.789465937297791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24570 + }, + { + "epoch": 0.11920855799110786, + "grad_norm": 8.096568535620463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24580 + }, + { + "epoch": 0.11925705618394394, + "grad_norm": 5.280483037495287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24590 + }, + { + "epoch": 0.11930555437678003, + "grad_norm": 4.97840233038005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24600 + }, + { + "epoch": 0.11935405256961612, + "grad_norm": 7.677744520151464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24610 + }, + { + "epoch": 0.11940255076245221, + "grad_norm": 7.566629278699111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24620 + }, + { + "epoch": 0.1194510489552883, + "grad_norm": 7.315925927287026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24630 + }, + { + "epoch": 0.1194995471481244, + "grad_norm": 5.567265475292515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24640 + }, + { + "epoch": 0.11954804534096049, + "grad_norm": 4.792091772287677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24650 + }, + { + "epoch": 0.11959654353379658, + "grad_norm": 8.403271749557462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24660 + }, + { + "epoch": 0.11964504172663266, + "grad_norm": 1.425901245966088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24670 + }, + { + "epoch": 0.11969353991946875, + "grad_norm": 7.272221296261705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24680 + }, + { + "epoch": 0.11974203811230484, + "grad_norm": 4.313216948048648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24690 + }, + { + "epoch": 0.11979053630514093, + "grad_norm": 5.521060870705696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24700 + }, + { + "epoch": 0.11983903449797702, + "grad_norm": 7.268889703482273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24710 + }, + { + "epoch": 0.1198875326908131, + "grad_norm": 7.423469696732354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24720 + }, + { + "epoch": 0.1199360308836492, + "grad_norm": 9.449173035136482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24730 + }, + { + "epoch": 0.11998452907648528, + "grad_norm": 4.066650944878347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24740 + }, + { + "epoch": 0.12003302726932137, + "grad_norm": 4.363062373613502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24750 + }, + { + "epoch": 0.12008152546215746, + "grad_norm": 6.478043701463321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24760 + }, + { + "epoch": 0.12013002365499356, + "grad_norm": 7.776350230415119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24770 + }, + { + "epoch": 0.12017852184782965, + "grad_norm": 6.666542731181835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24780 + }, + { + "epoch": 0.12022702004066574, + "grad_norm": 4.176289110091602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24790 + }, + { + "epoch": 0.12027551823350183, + "grad_norm": 4.2166541902588506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24800 + }, + { + "epoch": 0.12032401642633792, + "grad_norm": 1.4412793461815454e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24810 + }, + { + "epoch": 0.120372514619174, + "grad_norm": 7.320001600419346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24820 + }, + { + "epoch": 0.12042101281201009, + "grad_norm": 6.56933366371959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24830 + }, + { + "epoch": 0.12046951100484618, + "grad_norm": 4.831263140658848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24840 + }, + { + "epoch": 0.12051800919768227, + "grad_norm": 2.2930332761461614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24850 + }, + { + "epoch": 0.12056650739051836, + "grad_norm": 6.925498610144132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24860 + }, + { + "epoch": 0.12061500558335445, + "grad_norm": 6.061622457309568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24870 + }, + { + "epoch": 0.12066350377619053, + "grad_norm": 5.832770284541766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24880 + }, + { + "epoch": 0.12071200196902664, + "grad_norm": 3.8176929706423834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24890 + }, + { + "epoch": 0.12076050016186272, + "grad_norm": 3.544338085248455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24900 + }, + { + "epoch": 0.12080899835469881, + "grad_norm": 5.798162874270929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24910 + }, + { + "epoch": 0.1208574965475349, + "grad_norm": 5.516622536561044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24920 + }, + { + "epoch": 0.12090599474037099, + "grad_norm": 1.3698851262233802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24930 + }, + { + "epoch": 0.12095449293320708, + "grad_norm": 3.888562503107096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24940 + }, + { + "epoch": 0.12100299112604317, + "grad_norm": 5.044763042860723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24950 + }, + { + "epoch": 0.12105148931887925, + "grad_norm": 1.9295516722195316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24960 + }, + { + "epoch": 0.12109998751171534, + "grad_norm": 2.6956017791235354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24970 + }, + { + "epoch": 0.12114848570455143, + "grad_norm": 1.9587453152780654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24980 + }, + { + "epoch": 0.12119698389738752, + "grad_norm": 7.49525668197748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 24990 + }, + { + "epoch": 0.12124548209022361, + "grad_norm": 7.513336299780349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25000 + }, + { + "epoch": 0.1212939802830597, + "grad_norm": 1.6965052509476664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25010 + }, + { + "epoch": 0.1213424784758958, + "grad_norm": 1.9815840914816363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25020 + }, + { + "epoch": 0.12139097666873189, + "grad_norm": 6.6162870098196436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25030 + }, + { + "epoch": 0.12143947486156798, + "grad_norm": 8.836864822114876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25040 + }, + { + "epoch": 0.12148797305440406, + "grad_norm": 7.792801852701814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25050 + }, + { + "epoch": 0.12153647124724015, + "grad_norm": 1.5960612245180528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25060 + }, + { + "epoch": 0.12158496944007624, + "grad_norm": 1.2461707683542045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25070 + }, + { + "epoch": 0.12163346763291233, + "grad_norm": 1.5243903135342407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25080 + }, + { + "epoch": 0.12168196582574842, + "grad_norm": 5.579064463745453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25090 + }, + { + "epoch": 0.1217304640185845, + "grad_norm": 4.875524268754816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25100 + }, + { + "epoch": 0.1217789622114206, + "grad_norm": 1.592155399521289e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25110 + }, + { + "epoch": 0.12182746040425668, + "grad_norm": 2.800976972139324e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25120 + }, + { + "epoch": 0.12187595859709277, + "grad_norm": 1.1374011137377238e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25130 + }, + { + "epoch": 0.12192445678992886, + "grad_norm": 4.363805601315107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25140 + }, + { + "epoch": 0.12197295498276496, + "grad_norm": 4.440197471922147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25150 + }, + { + "epoch": 0.12202145317560105, + "grad_norm": 1.2624083183254697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25160 + }, + { + "epoch": 0.12206995136843714, + "grad_norm": 1.1838334330604994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25170 + }, + { + "epoch": 0.12211844956127323, + "grad_norm": 1.0089727311424213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25180 + }, + { + "epoch": 0.12216694775410931, + "grad_norm": 4.640326096705394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25190 + }, + { + "epoch": 0.1222154459469454, + "grad_norm": 4.331502623244887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25200 + }, + { + "epoch": 0.12226394413978149, + "grad_norm": 1.5834310715945321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25210 + }, + { + "epoch": 0.12231244233261758, + "grad_norm": 1.0430773045300157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25220 + }, + { + "epoch": 0.12236094052545367, + "grad_norm": 9.595075880497461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25230 + }, + { + "epoch": 0.12240943871828976, + "grad_norm": 4.447383332717436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25240 + }, + { + "epoch": 0.12245793691112584, + "grad_norm": 4.81529468743247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25250 + }, + { + "epoch": 0.12250643510396193, + "grad_norm": 9.257954616259667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25260 + }, + { + "epoch": 0.12255493329679802, + "grad_norm": 7.995236046554055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25270 + }, + { + "epoch": 0.12260343148963412, + "grad_norm": 9.148863000518759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25280 + }, + { + "epoch": 0.12265192968247021, + "grad_norm": 4.456994702195516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25290 + }, + { + "epoch": 0.1227004278753063, + "grad_norm": 5.075573312751658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25300 + }, + { + "epoch": 0.12274892606814239, + "grad_norm": 7.865529028094898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25310 + }, + { + "epoch": 0.12279742426097848, + "grad_norm": 9.049601885635639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25320 + }, + { + "epoch": 0.12284592245381457, + "grad_norm": 8.160052971106779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25330 + }, + { + "epoch": 0.12289442064665065, + "grad_norm": 3.974096784986614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25340 + }, + { + "epoch": 0.12294291883948674, + "grad_norm": 3.8243993571995816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25350 + }, + { + "epoch": 0.12299141703232283, + "grad_norm": 7.29676685295999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25360 + }, + { + "epoch": 0.12303991522515892, + "grad_norm": 6.872452331663226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25370 + }, + { + "epoch": 0.12308841341799501, + "grad_norm": 1.7288436993112555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25380 + }, + { + "epoch": 0.1231369116108311, + "grad_norm": 5.032621288592054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25390 + }, + { + "epoch": 0.12318540980366718, + "grad_norm": 4.407393419114669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25400 + }, + { + "epoch": 0.12323390799650329, + "grad_norm": 7.202206120382471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25410 + }, + { + "epoch": 0.12328240618933937, + "grad_norm": 7.770502747916908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25420 + }, + { + "epoch": 0.12333090438217546, + "grad_norm": 8.79501214967604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25430 + }, + { + "epoch": 0.12337940257501155, + "grad_norm": 4.5462996922651655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25440 + }, + { + "epoch": 0.12342790076784764, + "grad_norm": 7.050099952721212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25450 + }, + { + "epoch": 0.12347639896068373, + "grad_norm": 7.477483450202271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25460 + }, + { + "epoch": 0.12352489715351982, + "grad_norm": 7.468530043297505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25470 + }, + { + "epoch": 0.1235733953463559, + "grad_norm": 6.219251531547343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25480 + }, + { + "epoch": 0.123621893539192, + "grad_norm": 6.920471946614271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25490 + }, + { + "epoch": 0.12367039173202808, + "grad_norm": 3.4985734487236186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25500 + }, + { + "epoch": 0.12371888992486417, + "grad_norm": 6.891222028571065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25510 + }, + { + "epoch": 0.12376738811770026, + "grad_norm": 6.698455194964481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25520 + }, + { + "epoch": 0.12381588631053636, + "grad_norm": 6.797773721700651e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25530 + }, + { + "epoch": 0.12386438450337245, + "grad_norm": 3.326041166928917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25540 + }, + { + "epoch": 0.12391288269620854, + "grad_norm": 2.963131464639446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25550 + }, + { + "epoch": 0.12396138088904463, + "grad_norm": 6.728404855493864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25560 + }, + { + "epoch": 0.12400987908188071, + "grad_norm": 6.360701263474766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25570 + }, + { + "epoch": 0.1240583772747168, + "grad_norm": 5.705742296413518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25580 + }, + { + "epoch": 0.12410687546755289, + "grad_norm": 2.9991144856467145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25590 + }, + { + "epoch": 0.12415537366038898, + "grad_norm": 3.344196954913059e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25600 + }, + { + "epoch": 0.12420387185322507, + "grad_norm": 6.746111580469005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25610 + }, + { + "epoch": 0.12425237004606116, + "grad_norm": 6.315889322650037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25620 + }, + { + "epoch": 0.12430086823889724, + "grad_norm": 6.008385753375478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25630 + }, + { + "epoch": 0.12434936643173333, + "grad_norm": 3.6762827448910684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25640 + }, + { + "epoch": 0.12439786462456942, + "grad_norm": 2.9494859177248145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25650 + }, + { + "epoch": 0.12444636281740552, + "grad_norm": 5.793577315671428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25660 + }, + { + "epoch": 0.12449486101024161, + "grad_norm": 5.834743319610425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25670 + }, + { + "epoch": 0.1245433592030777, + "grad_norm": 5.837691219312546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25680 + }, + { + "epoch": 0.12459185739591379, + "grad_norm": 2.917306858307711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25690 + }, + { + "epoch": 0.12464035558874988, + "grad_norm": 2.300241021657712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25700 + }, + { + "epoch": 0.12468885378158596, + "grad_norm": 5.369539053390326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25710 + }, + { + "epoch": 0.12473735197442205, + "grad_norm": 5.662326998390199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25720 + }, + { + "epoch": 0.12478585016725814, + "grad_norm": 7.776775419188198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25730 + }, + { + "epoch": 0.12483434836009423, + "grad_norm": 3.350523911649361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25740 + }, + { + "epoch": 0.12488284655293032, + "grad_norm": 2.75679298056275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25750 + }, + { + "epoch": 0.1249313447457664, + "grad_norm": 5.908233902118809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25760 + }, + { + "epoch": 0.1249798429386025, + "grad_norm": 4.835256959268008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25770 + }, + { + "epoch": 0.1250283411314386, + "grad_norm": 5.783580832030566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25780 + }, + { + "epoch": 0.12507683932427469, + "grad_norm": 2.7463275387162867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25790 + }, + { + "epoch": 0.12512533751711077, + "grad_norm": 3.075975314459356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25800 + }, + { + "epoch": 0.12517383570994686, + "grad_norm": 5.381570531426405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25810 + }, + { + "epoch": 0.12522233390278295, + "grad_norm": 4.4790115794057783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25820 + }, + { + "epoch": 0.12527083209561904, + "grad_norm": 5.302439376464463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25830 + }, + { + "epoch": 0.12531933028845513, + "grad_norm": 2.723371039792255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25840 + }, + { + "epoch": 0.12536782848129122, + "grad_norm": 2.752947239059722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25850 + }, + { + "epoch": 0.1254163266741273, + "grad_norm": 5.314238933351589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25860 + }, + { + "epoch": 0.1254648248669634, + "grad_norm": 4.2004063516287715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25870 + }, + { + "epoch": 0.12551332305979948, + "grad_norm": 4.733704486170609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25880 + }, + { + "epoch": 0.12556182125263557, + "grad_norm": 2.915416814630589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25890 + }, + { + "epoch": 0.12561031944547166, + "grad_norm": 2.846904862963129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25900 + }, + { + "epoch": 0.12565881763830775, + "grad_norm": 4.851386847803951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25910 + }, + { + "epoch": 0.12570731583114383, + "grad_norm": 7.377363431260164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25920 + }, + { + "epoch": 0.12575581402397992, + "grad_norm": 5.207467665968579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25930 + }, + { + "epoch": 0.125804312216816, + "grad_norm": 3.0885266255609167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25940 + }, + { + "epoch": 0.1258528104096521, + "grad_norm": 2.777802592390799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25950 + }, + { + "epoch": 0.1259013086024882, + "grad_norm": 4.4051307668269146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25960 + }, + { + "epoch": 0.12594980679532428, + "grad_norm": 4.866427616434521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25970 + }, + { + "epoch": 0.1259983049881604, + "grad_norm": 4.6858031055307947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25980 + }, + { + "epoch": 0.12604680318099648, + "grad_norm": 2.707422481762478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 25990 + }, + { + "epoch": 0.12609530137383257, + "grad_norm": 3.2369038649449067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26000 + }, + { + "epoch": 0.12614379956666866, + "grad_norm": 5.019960553909186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26010 + }, + { + "epoch": 0.12619229775950475, + "grad_norm": 4.215611397739849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26020 + }, + { + "epoch": 0.12624079595234083, + "grad_norm": 4.5085440092407225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26030 + }, + { + "epoch": 0.12628929414517692, + "grad_norm": 2.620145380660688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26040 + }, + { + "epoch": 0.126337792338013, + "grad_norm": 2.7098457167085144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26050 + }, + { + "epoch": 0.1263862905308491, + "grad_norm": 6.546794111272902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26060 + }, + { + "epoch": 0.1264347887236852, + "grad_norm": 4.163508435794938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26070 + }, + { + "epoch": 0.12648328691652128, + "grad_norm": 4.5287308125807613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26080 + }, + { + "epoch": 0.12653178510935736, + "grad_norm": 2.6563722599348694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26090 + }, + { + "epoch": 0.12658028330219345, + "grad_norm": 4.218601930006116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26100 + }, + { + "epoch": 0.12662878149502954, + "grad_norm": 4.080992255239835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26110 + }, + { + "epoch": 0.12667727968786563, + "grad_norm": 4.287571471195406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26120 + }, + { + "epoch": 0.12672577788070172, + "grad_norm": 5.01105205330532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26130 + }, + { + "epoch": 0.1267742760735378, + "grad_norm": 2.589068230918201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26140 + }, + { + "epoch": 0.1268227742663739, + "grad_norm": 2.4284352662107267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26150 + }, + { + "epoch": 0.12687127245920998, + "grad_norm": 3.9712125499136164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26160 + }, + { + "epoch": 0.12691977065204607, + "grad_norm": 4.3013460526708513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26170 + }, + { + "epoch": 0.12696826884488216, + "grad_norm": 4.1198518374585547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26180 + }, + { + "epoch": 0.12701676703771825, + "grad_norm": 2.6083768034368404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26190 + }, + { + "epoch": 0.12706526523055434, + "grad_norm": 2.376836505391111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26200 + }, + { + "epoch": 0.12711376342339042, + "grad_norm": 3.9497140846833645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26210 + }, + { + "epoch": 0.1271622616162265, + "grad_norm": 4.0266874634653504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26220 + }, + { + "epoch": 0.1272107598090626, + "grad_norm": 2.2856190753373085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26230 + }, + { + "epoch": 0.12725925800189872, + "grad_norm": 2.3676618354784296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26240 + }, + { + "epoch": 0.1273077561947348, + "grad_norm": 2.5629728384046757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26250 + }, + { + "epoch": 0.1273562543875709, + "grad_norm": 3.6094434108235873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26260 + }, + { + "epoch": 0.12740475258040698, + "grad_norm": 3.6246976264919795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26270 + }, + { + "epoch": 0.12745325077324307, + "grad_norm": 3.9503265725215897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26280 + }, + { + "epoch": 0.12750174896607916, + "grad_norm": 2.66295927531246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26290 + }, + { + "epoch": 0.12755024715891525, + "grad_norm": 2.502518725577829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26300 + }, + { + "epoch": 0.12759874535175134, + "grad_norm": 3.255950957736786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26310 + }, + { + "epoch": 0.12764724354458742, + "grad_norm": 3.2345536737921066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26320 + }, + { + "epoch": 0.1276957417374235, + "grad_norm": 3.425793977385183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26330 + }, + { + "epoch": 0.1277442399302596, + "grad_norm": 2.2415653688767634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26340 + }, + { + "epoch": 0.1277927381230957, + "grad_norm": 2.537993282203388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26350 + }, + { + "epoch": 0.12784123631593178, + "grad_norm": 3.2803166050143773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26360 + }, + { + "epoch": 0.12788973450876787, + "grad_norm": 3.7190099533290777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26370 + }, + { + "epoch": 0.12793823270160395, + "grad_norm": 3.4933188430841255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26380 + }, + { + "epoch": 0.12798673089444004, + "grad_norm": 2.2252859821492166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26390 + }, + { + "epoch": 0.12803522908727613, + "grad_norm": 1.1444233223301126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26400 + }, + { + "epoch": 0.12808372728011222, + "grad_norm": 3.650691269285744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26410 + }, + { + "epoch": 0.1281322254729483, + "grad_norm": 3.371706043253653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26420 + }, + { + "epoch": 0.1281807236657844, + "grad_norm": 1.4273797273635864, + "learning_rate": 0.0002, + "loss": 0.005, + "step": 26430 + }, + { + "epoch": 0.12822922185862048, + "grad_norm": 0.0024270368739962578, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 26440 + }, + { + "epoch": 0.12827772005145657, + "grad_norm": 0.009270072914659977, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 26450 + }, + { + "epoch": 0.12832621824429266, + "grad_norm": 0.010695001110434532, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 26460 + }, + { + "epoch": 0.12837471643712875, + "grad_norm": 0.000789384008385241, + "learning_rate": 0.0002, + "loss": 0.004, + "step": 26470 + }, + { + "epoch": 0.12842321462996484, + "grad_norm": 0.0009324333514086902, + "learning_rate": 0.0002, + "loss": 0.0031, + "step": 26480 + }, + { + "epoch": 0.12847171282280095, + "grad_norm": 0.7474738955497742, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 26490 + }, + { + "epoch": 0.12852021101563704, + "grad_norm": 0.00015957708819769323, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 26500 + }, + { + "epoch": 0.12856870920847313, + "grad_norm": 0.0011885797139257193, + "learning_rate": 0.0002, + "loss": 0.0111, + "step": 26510 + }, + { + "epoch": 0.12861720740130922, + "grad_norm": 0.0023720243480056524, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 26520 + }, + { + "epoch": 0.1286657055941453, + "grad_norm": 0.0004915093304589391, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 26530 + }, + { + "epoch": 0.1287142037869814, + "grad_norm": 0.0001380989997414872, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 26540 + }, + { + "epoch": 0.12876270197981748, + "grad_norm": 0.00043578995973803103, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 26550 + }, + { + "epoch": 0.12881120017265357, + "grad_norm": 0.00018373447528574616, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 26560 + }, + { + "epoch": 0.12885969836548966, + "grad_norm": 4.6327793825184926e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26570 + }, + { + "epoch": 0.12890819655832575, + "grad_norm": 3.9249560359166935e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26580 + }, + { + "epoch": 0.12895669475116184, + "grad_norm": 9.235091420123354e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26590 + }, + { + "epoch": 0.12900519294399793, + "grad_norm": 5.001180761610158e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26600 + }, + { + "epoch": 0.12905369113683401, + "grad_norm": 9.134411811828613e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26610 + }, + { + "epoch": 0.1291021893296701, + "grad_norm": 3.076059147133492e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26620 + }, + { + "epoch": 0.1291506875225062, + "grad_norm": 2.650660098879598e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26630 + }, + { + "epoch": 0.12919918571534228, + "grad_norm": 2.856854916899465e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26640 + }, + { + "epoch": 0.12924768390817837, + "grad_norm": 2.5691240807645954e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26650 + }, + { + "epoch": 0.12929618210101446, + "grad_norm": 2.2569576685782522e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26660 + }, + { + "epoch": 0.12934468029385054, + "grad_norm": 2.716154085646849e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26670 + }, + { + "epoch": 0.12939317848668663, + "grad_norm": 2.227600089099724e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26680 + }, + { + "epoch": 0.12944167667952272, + "grad_norm": 2.256570223835297e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26690 + }, + { + "epoch": 0.1294901748723588, + "grad_norm": 1.9784927644650452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26700 + }, + { + "epoch": 0.1295386730651949, + "grad_norm": 1.753240758262109e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26710 + }, + { + "epoch": 0.129587171258031, + "grad_norm": 1.7882197425933555e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26720 + }, + { + "epoch": 0.12963566945086707, + "grad_norm": 1.6054020306910388e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26730 + }, + { + "epoch": 0.12968416764370316, + "grad_norm": 1.531203270133119e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26740 + }, + { + "epoch": 0.12973266583653928, + "grad_norm": 2.0830706489505246e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26750 + }, + { + "epoch": 0.12978116402937537, + "grad_norm": 1.4876707609801088e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26760 + }, + { + "epoch": 0.12982966222221146, + "grad_norm": 1.4297133930085693e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26770 + }, + { + "epoch": 0.12987816041504754, + "grad_norm": 1.8217180695501156e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26780 + }, + { + "epoch": 0.12992665860788363, + "grad_norm": 1.443448581994744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26790 + }, + { + "epoch": 0.12997515680071972, + "grad_norm": 2.0025161575176753e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26800 + }, + { + "epoch": 0.1300236549935558, + "grad_norm": 1.3551416486734524e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26810 + }, + { + "epoch": 0.1300721531863919, + "grad_norm": 1.3318476703716442e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26820 + }, + { + "epoch": 0.130120651379228, + "grad_norm": 1.2174181392765604e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26830 + }, + { + "epoch": 0.13016914957206407, + "grad_norm": 1.2608124052349012e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26840 + }, + { + "epoch": 0.13021764776490016, + "grad_norm": 1.0437243872729596e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26850 + }, + { + "epoch": 0.13026614595773625, + "grad_norm": 1.1936014743696433e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26860 + }, + { + "epoch": 0.13031464415057234, + "grad_norm": 1.1593424460443202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26870 + }, + { + "epoch": 0.13036314234340843, + "grad_norm": 1.12252500912291e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26880 + }, + { + "epoch": 0.13041164053624452, + "grad_norm": 9.241817679139785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26890 + }, + { + "epoch": 0.1304601387290806, + "grad_norm": 8.276529115391895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26900 + }, + { + "epoch": 0.1305086369219167, + "grad_norm": 1.336962577624945e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26910 + }, + { + "epoch": 0.13055713511475278, + "grad_norm": 1.1753584658436012e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26920 + }, + { + "epoch": 0.13060563330758887, + "grad_norm": 1.0450777153891977e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26930 + }, + { + "epoch": 0.13065413150042496, + "grad_norm": 1.477755813539261e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26940 + }, + { + "epoch": 0.13070262969326105, + "grad_norm": 8.926574082579464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26950 + }, + { + "epoch": 0.13075112788609713, + "grad_norm": 9.897589734464418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26960 + }, + { + "epoch": 0.13079962607893322, + "grad_norm": 6.598072650376707e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26970 + }, + { + "epoch": 0.1308481242717693, + "grad_norm": 8.572137630835641e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26980 + }, + { + "epoch": 0.1308966224646054, + "grad_norm": 7.722795999143273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 26990 + }, + { + "epoch": 0.1309451206574415, + "grad_norm": 9.9279495771043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27000 + }, + { + "epoch": 0.1309936188502776, + "grad_norm": 9.370699444843922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27010 + }, + { + "epoch": 0.1310421170431137, + "grad_norm": 9.698511348688044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27020 + }, + { + "epoch": 0.13109061523594978, + "grad_norm": 9.774481441127136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27030 + }, + { + "epoch": 0.13113911342878587, + "grad_norm": 6.976275471970439e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27040 + }, + { + "epoch": 0.13118761162162196, + "grad_norm": 6.870442575745983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27050 + }, + { + "epoch": 0.13123610981445805, + "grad_norm": 8.72751661518123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27060 + }, + { + "epoch": 0.13128460800729413, + "grad_norm": 8.427333341387566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27070 + }, + { + "epoch": 0.13133310620013022, + "grad_norm": 7.950777217047289e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27080 + }, + { + "epoch": 0.1313816043929663, + "grad_norm": 5.7646843742986675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27090 + }, + { + "epoch": 0.1314301025858024, + "grad_norm": 6.151993602543371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27100 + }, + { + "epoch": 0.1314786007786385, + "grad_norm": 7.66844823374413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27110 + }, + { + "epoch": 0.13152709897147458, + "grad_norm": 1.2116813195461873e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27120 + }, + { + "epoch": 0.13157559716431066, + "grad_norm": 8.048298695939593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27130 + }, + { + "epoch": 0.13162409535714675, + "grad_norm": 5.267883807391627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27140 + }, + { + "epoch": 0.13167259354998284, + "grad_norm": 5.969833637209376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27150 + }, + { + "epoch": 0.13172109174281893, + "grad_norm": 7.531816663686186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27160 + }, + { + "epoch": 0.13176958993565502, + "grad_norm": 7.87032513471786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27170 + }, + { + "epoch": 0.1318180881284911, + "grad_norm": 7.165295301092556e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27180 + }, + { + "epoch": 0.1318665863213272, + "grad_norm": 5.5374225667037535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27190 + }, + { + "epoch": 0.13191508451416328, + "grad_norm": 5.305750164552592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27200 + }, + { + "epoch": 0.13196358270699937, + "grad_norm": 1.0189045497099869e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27210 + }, + { + "epoch": 0.13201208089983546, + "grad_norm": 7.086697678460041e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27220 + }, + { + "epoch": 0.13206057909267155, + "grad_norm": 6.352362561301561e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27230 + }, + { + "epoch": 0.13210907728550764, + "grad_norm": 4.763957349496195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27240 + }, + { + "epoch": 0.13215757547834373, + "grad_norm": 4.7614403229090385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27250 + }, + { + "epoch": 0.13220607367117984, + "grad_norm": 6.441908681154018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27260 + }, + { + "epoch": 0.13225457186401593, + "grad_norm": 6.834926352894399e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27270 + }, + { + "epoch": 0.13230307005685202, + "grad_norm": 6.178669536893722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27280 + }, + { + "epoch": 0.1323515682496881, + "grad_norm": 4.336985966801876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27290 + }, + { + "epoch": 0.1324000664425242, + "grad_norm": 4.756815542350523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27300 + }, + { + "epoch": 0.13244856463536028, + "grad_norm": 5.989492819935549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27310 + }, + { + "epoch": 0.13249706282819637, + "grad_norm": 5.704235263692681e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27320 + }, + { + "epoch": 0.13254556102103246, + "grad_norm": 5.720984518120531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27330 + }, + { + "epoch": 0.13259405921386855, + "grad_norm": 3.95805727748666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27340 + }, + { + "epoch": 0.13264255740670464, + "grad_norm": 4.42711007053731e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27350 + }, + { + "epoch": 0.13269105559954072, + "grad_norm": 5.73952866034233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27360 + }, + { + "epoch": 0.1327395537923768, + "grad_norm": 5.847681677551009e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27370 + }, + { + "epoch": 0.1327880519852129, + "grad_norm": 5.433284968603402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27380 + }, + { + "epoch": 0.132836550178049, + "grad_norm": 4.282463123672642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27390 + }, + { + "epoch": 0.13288504837088508, + "grad_norm": 4.67269910586765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27400 + }, + { + "epoch": 0.13293354656372117, + "grad_norm": 5.4571119108004496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27410 + }, + { + "epoch": 0.13298204475655725, + "grad_norm": 5.08047969560721e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27420 + }, + { + "epoch": 0.13303054294939334, + "grad_norm": 4.9067575673689134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27430 + }, + { + "epoch": 0.13307904114222943, + "grad_norm": 3.89935075872927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27440 + }, + { + "epoch": 0.13312753933506552, + "grad_norm": 3.722797146110679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27450 + }, + { + "epoch": 0.1331760375279016, + "grad_norm": 4.931749572278932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27460 + }, + { + "epoch": 0.1332245357207377, + "grad_norm": 5.386979864852037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27470 + }, + { + "epoch": 0.13327303391357379, + "grad_norm": 4.681116024585208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27480 + }, + { + "epoch": 0.13332153210640987, + "grad_norm": 3.819440735242097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27490 + }, + { + "epoch": 0.13337003029924596, + "grad_norm": 3.7717838949902216e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27500 + }, + { + "epoch": 0.13341852849208205, + "grad_norm": 5.144677743373904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27510 + }, + { + "epoch": 0.13346702668491817, + "grad_norm": 4.947238721797476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27520 + }, + { + "epoch": 0.13351552487775425, + "grad_norm": 4.449175776244374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27530 + }, + { + "epoch": 0.13356402307059034, + "grad_norm": 3.92057836506865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27540 + }, + { + "epoch": 0.13361252126342643, + "grad_norm": 3.1780268727743533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27550 + }, + { + "epoch": 0.13366101945626252, + "grad_norm": 4.6809136620140634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27560 + }, + { + "epoch": 0.1337095176490986, + "grad_norm": 5.164246431377251e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27570 + }, + { + "epoch": 0.1337580158419347, + "grad_norm": 4.545758656604448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27580 + }, + { + "epoch": 0.13380651403477078, + "grad_norm": 3.850084340228932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27590 + }, + { + "epoch": 0.13385501222760687, + "grad_norm": 2.9012128379690694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27600 + }, + { + "epoch": 0.13390351042044296, + "grad_norm": 4.43001863459358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27610 + }, + { + "epoch": 0.13395200861327905, + "grad_norm": 4.178933068033075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27620 + }, + { + "epoch": 0.13400050680611514, + "grad_norm": 3.895491772709647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27630 + }, + { + "epoch": 0.13404900499895123, + "grad_norm": 2.8536474019347224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27640 + }, + { + "epoch": 0.13409750319178732, + "grad_norm": 3.553162514435826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27650 + }, + { + "epoch": 0.1341460013846234, + "grad_norm": 3.905801804648945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27660 + }, + { + "epoch": 0.1341944995774595, + "grad_norm": 4.119429831916932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27670 + }, + { + "epoch": 0.13424299777029558, + "grad_norm": 4.557126885629259e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27680 + }, + { + "epoch": 0.13429149596313167, + "grad_norm": 4.268251814210089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27690 + }, + { + "epoch": 0.13433999415596776, + "grad_norm": 2.6384602733742213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27700 + }, + { + "epoch": 0.13438849234880385, + "grad_norm": 4.397282737045316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27710 + }, + { + "epoch": 0.13443699054163993, + "grad_norm": 4.075929155078484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27720 + }, + { + "epoch": 0.13448548873447602, + "grad_norm": 4.130184152018046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27730 + }, + { + "epoch": 0.1345339869273121, + "grad_norm": 3.0220153348636813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27740 + }, + { + "epoch": 0.1345824851201482, + "grad_norm": 2.6339603209635243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27750 + }, + { + "epoch": 0.1346309833129843, + "grad_norm": 3.6166638892609626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27760 + }, + { + "epoch": 0.1346794815058204, + "grad_norm": 3.8816924643469974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27770 + }, + { + "epoch": 0.1347279796986565, + "grad_norm": 4.70968052468379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27780 + }, + { + "epoch": 0.13477647789149258, + "grad_norm": 2.4166392904589884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27790 + }, + { + "epoch": 0.13482497608432867, + "grad_norm": 2.312274318683194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27800 + }, + { + "epoch": 0.13487347427716476, + "grad_norm": 3.824064151558559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27810 + }, + { + "epoch": 0.13492197247000084, + "grad_norm": 3.617112952269963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27820 + }, + { + "epoch": 0.13497047066283693, + "grad_norm": 3.2530676890019095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27830 + }, + { + "epoch": 0.13501896885567302, + "grad_norm": 1.0990965165547095e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 27840 + }, + { + "epoch": 0.1350674670485091, + "grad_norm": 5.161984154256061e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27850 + }, + { + "epoch": 0.1351159652413452, + "grad_norm": 0.0012148034293204546, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 27860 + }, + { + "epoch": 0.1351644634341813, + "grad_norm": 0.00047298503341153264, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 27870 + }, + { + "epoch": 0.13521296162701738, + "grad_norm": 0.00010554921755101532, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27880 + }, + { + "epoch": 0.13526145981985346, + "grad_norm": 4.0659826481714845e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27890 + }, + { + "epoch": 0.13530995801268955, + "grad_norm": 1.63020577019779e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27900 + }, + { + "epoch": 0.13535845620552564, + "grad_norm": 1.863946636149194e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27910 + }, + { + "epoch": 0.13540695439836173, + "grad_norm": 1.9235034415032715e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27920 + }, + { + "epoch": 0.13545545259119782, + "grad_norm": 0.00014130244380794466, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 27930 + }, + { + "epoch": 0.1355039507840339, + "grad_norm": 0.0005860092933289707, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27940 + }, + { + "epoch": 0.13555244897687, + "grad_norm": 9.245381988876034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27950 + }, + { + "epoch": 0.13560094716970608, + "grad_norm": 6.007305273669772e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27960 + }, + { + "epoch": 0.13564944536254217, + "grad_norm": 1.4425673725781962e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27970 + }, + { + "epoch": 0.13569794355537826, + "grad_norm": 1.4159210877551232e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27980 + }, + { + "epoch": 0.13574644174821435, + "grad_norm": 6.7784308157570194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 27990 + }, + { + "epoch": 0.13579493994105044, + "grad_norm": 4.668925157602644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28000 + }, + { + "epoch": 0.13584343813388652, + "grad_norm": 1.1813923265435733e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28010 + }, + { + "epoch": 0.1358919363267226, + "grad_norm": 1.0566548553470057e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28020 + }, + { + "epoch": 0.13594043451955873, + "grad_norm": 1.0886108611884993e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28030 + }, + { + "epoch": 0.13598893271239482, + "grad_norm": 4.176258244115161e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28040 + }, + { + "epoch": 0.1360374309052309, + "grad_norm": 5.18901833856944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28050 + }, + { + "epoch": 0.136085929098067, + "grad_norm": 1.0024369657912757e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28060 + }, + { + "epoch": 0.13613442729090308, + "grad_norm": 9.558136298437603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28070 + }, + { + "epoch": 0.13618292548373917, + "grad_norm": 8.608468306192663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28080 + }, + { + "epoch": 0.13623142367657526, + "grad_norm": 3.831664798781276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28090 + }, + { + "epoch": 0.13627992186941135, + "grad_norm": 1.0732642294897232e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28100 + }, + { + "epoch": 0.13632842006224744, + "grad_norm": 8.986093234852888e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28110 + }, + { + "epoch": 0.13637691825508352, + "grad_norm": 4.6058903535595164e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28120 + }, + { + "epoch": 0.1364254164479196, + "grad_norm": 9.624013728171121e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28130 + }, + { + "epoch": 0.1364739146407557, + "grad_norm": 3.367668341525132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28140 + }, + { + "epoch": 0.1365224128335918, + "grad_norm": 3.746482889255276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28150 + }, + { + "epoch": 0.13657091102642788, + "grad_norm": 7.342502613028046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28160 + }, + { + "epoch": 0.13661940921926397, + "grad_norm": 7.483759873139206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28170 + }, + { + "epoch": 0.13666790741210005, + "grad_norm": 8.093744327197783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28180 + }, + { + "epoch": 0.13671640560493614, + "grad_norm": 3.1915099043544615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28190 + }, + { + "epoch": 0.13676490379777223, + "grad_norm": 3.07181562675396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28200 + }, + { + "epoch": 0.13681340199060832, + "grad_norm": 6.472617769759381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28210 + }, + { + "epoch": 0.1368619001834444, + "grad_norm": 6.480189313151641e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28220 + }, + { + "epoch": 0.1369103983762805, + "grad_norm": 7.0009141381888185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28230 + }, + { + "epoch": 0.13695889656911658, + "grad_norm": 5.035351023252588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28240 + }, + { + "epoch": 0.13700739476195267, + "grad_norm": 2.8787151222786633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28250 + }, + { + "epoch": 0.13705589295478876, + "grad_norm": 6.145308361737989e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28260 + }, + { + "epoch": 0.13710439114762485, + "grad_norm": 6.117270004324382e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28270 + }, + { + "epoch": 0.13715288934046094, + "grad_norm": 5.73626584809972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28280 + }, + { + "epoch": 0.13720138753329705, + "grad_norm": 3.02364605886396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28290 + }, + { + "epoch": 0.13724988572613314, + "grad_norm": 2.9599420940940036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28300 + }, + { + "epoch": 0.13729838391896923, + "grad_norm": 5.892801254958613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28310 + }, + { + "epoch": 0.13734688211180532, + "grad_norm": 5.844781298947055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28320 + }, + { + "epoch": 0.1373953803046414, + "grad_norm": 4.785455075762002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28330 + }, + { + "epoch": 0.1374438784974775, + "grad_norm": 2.2652850475424202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28340 + }, + { + "epoch": 0.13749237669031358, + "grad_norm": 2.3137924927141285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28350 + }, + { + "epoch": 0.13754087488314967, + "grad_norm": 4.863771664531669e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28360 + }, + { + "epoch": 0.13758937307598576, + "grad_norm": 4.961494596500415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28370 + }, + { + "epoch": 0.13763787126882185, + "grad_norm": 4.523678853729507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28380 + }, + { + "epoch": 0.13768636946165794, + "grad_norm": 2.870522394005093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28390 + }, + { + "epoch": 0.13773486765449403, + "grad_norm": 2.24086147682101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28400 + }, + { + "epoch": 0.1377833658473301, + "grad_norm": 4.655445081880316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28410 + }, + { + "epoch": 0.1378318640401662, + "grad_norm": 4.884663212578744e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28420 + }, + { + "epoch": 0.1378803622330023, + "grad_norm": 4.518265086517204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28430 + }, + { + "epoch": 0.13792886042583838, + "grad_norm": 2.348818725295132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28440 + }, + { + "epoch": 0.13797735861867447, + "grad_norm": 2.354358457523631e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28450 + }, + { + "epoch": 0.13802585681151056, + "grad_norm": 4.3535678742046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28460 + }, + { + "epoch": 0.13807435500434664, + "grad_norm": 4.271028501534602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28470 + }, + { + "epoch": 0.13812285319718273, + "grad_norm": 3.878632924170233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28480 + }, + { + "epoch": 0.13817135139001882, + "grad_norm": 1.8932114471681416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28490 + }, + { + "epoch": 0.1382198495828549, + "grad_norm": 2.162246346415486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28500 + }, + { + "epoch": 0.138268347775691, + "grad_norm": 3.965819360018941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28510 + }, + { + "epoch": 0.13831684596852709, + "grad_norm": 4.293523488740902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28520 + }, + { + "epoch": 0.13836534416136317, + "grad_norm": 3.924652901332593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28530 + }, + { + "epoch": 0.1384138423541993, + "grad_norm": 2.0767033674928825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28540 + }, + { + "epoch": 0.13846234054703538, + "grad_norm": 4.625521341949934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28550 + }, + { + "epoch": 0.13851083873987147, + "grad_norm": 3.5987695810035802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28560 + }, + { + "epoch": 0.13855933693270756, + "grad_norm": 3.5746006687986664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28570 + }, + { + "epoch": 0.13860783512554364, + "grad_norm": 3.389717676327564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28580 + }, + { + "epoch": 0.13865633331837973, + "grad_norm": 1.8709107507675071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28590 + }, + { + "epoch": 0.13870483151121582, + "grad_norm": 1.7561733329785056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28600 + }, + { + "epoch": 0.1387533297040519, + "grad_norm": 3.34558308168198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28610 + }, + { + "epoch": 0.138801827896888, + "grad_norm": 3.7797660752403317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28620 + }, + { + "epoch": 0.13885032608972409, + "grad_norm": 3.178307906637201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28630 + }, + { + "epoch": 0.13889882428256017, + "grad_norm": 1.951703325175913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28640 + }, + { + "epoch": 0.13894732247539626, + "grad_norm": 1.8666760297492146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28650 + }, + { + "epoch": 0.13899582066823235, + "grad_norm": 3.401071808184497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28660 + }, + { + "epoch": 0.13904431886106844, + "grad_norm": 3.210524255337077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28670 + }, + { + "epoch": 0.13909281705390453, + "grad_norm": 3.087601953666308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28680 + }, + { + "epoch": 0.13914131524674062, + "grad_norm": 1.6562649989282363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28690 + }, + { + "epoch": 0.1391898134395767, + "grad_norm": 2.218501322204247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28700 + }, + { + "epoch": 0.1392383116324128, + "grad_norm": 3.060011295019649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28710 + }, + { + "epoch": 0.13928680982524888, + "grad_norm": 2.845477865776047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28720 + }, + { + "epoch": 0.13933530801808497, + "grad_norm": 3.063672465941636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28730 + }, + { + "epoch": 0.13938380621092106, + "grad_norm": 1.8318163483854732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28740 + }, + { + "epoch": 0.13943230440375715, + "grad_norm": 2.3174825400928967e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28750 + }, + { + "epoch": 0.13948080259659323, + "grad_norm": 2.6014490686065983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28760 + }, + { + "epoch": 0.13952930078942932, + "grad_norm": 2.8664444471360184e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28770 + }, + { + "epoch": 0.1395777989822654, + "grad_norm": 2.728423623921117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28780 + }, + { + "epoch": 0.1396262971751015, + "grad_norm": 1.6467869272673852e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28790 + }, + { + "epoch": 0.13967479536793762, + "grad_norm": 1.547584247418854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28800 + }, + { + "epoch": 0.1397232935607737, + "grad_norm": 2.6118602818314685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28810 + }, + { + "epoch": 0.1397717917536098, + "grad_norm": 2.6005461677414132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28820 + }, + { + "epoch": 0.13982028994644588, + "grad_norm": 2.6154793886234984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28830 + }, + { + "epoch": 0.13986878813928197, + "grad_norm": 1.4973362567616277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28840 + }, + { + "epoch": 0.13991728633211806, + "grad_norm": 1.444596136934706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28850 + }, + { + "epoch": 0.13996578452495415, + "grad_norm": 2.4180699256248772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28860 + }, + { + "epoch": 0.14001428271779023, + "grad_norm": 2.2493691176350694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28870 + }, + { + "epoch": 0.14006278091062632, + "grad_norm": 2.6778338906296995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28880 + }, + { + "epoch": 0.1401112791034624, + "grad_norm": 1.3179112556827022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28890 + }, + { + "epoch": 0.1401597772962985, + "grad_norm": 1.4143205362415756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28900 + }, + { + "epoch": 0.1402082754891346, + "grad_norm": 5.500322004081681e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28910 + }, + { + "epoch": 0.14025677368197068, + "grad_norm": 2.3384361611533677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28920 + }, + { + "epoch": 0.14030527187480676, + "grad_norm": 2.222416469521704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28930 + }, + { + "epoch": 0.14035377006764285, + "grad_norm": 1.3767571545031387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28940 + }, + { + "epoch": 0.14040226826047894, + "grad_norm": 1.4151515870253206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28950 + }, + { + "epoch": 0.14045076645331503, + "grad_norm": 2.139175649062963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28960 + }, + { + "epoch": 0.14049926464615112, + "grad_norm": 2.3185509689938044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28970 + }, + { + "epoch": 0.1405477628389872, + "grad_norm": 2.268848675157642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28980 + }, + { + "epoch": 0.1405962610318233, + "grad_norm": 1.2649514928853023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 28990 + }, + { + "epoch": 0.14064475922465938, + "grad_norm": 1.2786313163815066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29000 + }, + { + "epoch": 0.14069325741749547, + "grad_norm": 2.218521331087686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29010 + }, + { + "epoch": 0.14074175561033156, + "grad_norm": 2.1122916677995818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29020 + }, + { + "epoch": 0.14079025380316765, + "grad_norm": 2.097022615998867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29030 + }, + { + "epoch": 0.14083875199600374, + "grad_norm": 1.3529735269912635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29040 + }, + { + "epoch": 0.14088725018883985, + "grad_norm": 1.3668003475686419e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29050 + }, + { + "epoch": 0.14093574838167594, + "grad_norm": 1.886756422209146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29060 + }, + { + "epoch": 0.14098424657451203, + "grad_norm": 1.9692320165631827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29070 + }, + { + "epoch": 0.14103274476734812, + "grad_norm": 2.0983320609957445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29080 + }, + { + "epoch": 0.1410812429601842, + "grad_norm": 1.1347483450663276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29090 + }, + { + "epoch": 0.1411297411530203, + "grad_norm": 1.1413193305997993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29100 + }, + { + "epoch": 0.14117823934585638, + "grad_norm": 1.9558233361749444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29110 + }, + { + "epoch": 0.14122673753869247, + "grad_norm": 1.8911837287305389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29120 + }, + { + "epoch": 0.14127523573152856, + "grad_norm": 2.24630935008463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29130 + }, + { + "epoch": 0.14132373392436465, + "grad_norm": 1.1560786106201704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29140 + }, + { + "epoch": 0.14137223211720074, + "grad_norm": 1.1933316272916272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29150 + }, + { + "epoch": 0.14142073031003682, + "grad_norm": 1.7678396488918224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29160 + }, + { + "epoch": 0.1414692285028729, + "grad_norm": 1.7216256082974724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29170 + }, + { + "epoch": 0.141517726695709, + "grad_norm": 1.9174481167283375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29180 + }, + { + "epoch": 0.1415662248885451, + "grad_norm": 1.4178691571942181e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29190 + }, + { + "epoch": 0.14161472308138118, + "grad_norm": 1.1693722399286344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29200 + }, + { + "epoch": 0.14166322127421727, + "grad_norm": 1.7774978005036246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29210 + }, + { + "epoch": 0.14171171946705335, + "grad_norm": 1.6649278222757857e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29220 + }, + { + "epoch": 0.14176021765988944, + "grad_norm": 1.657318762227078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29230 + }, + { + "epoch": 0.14180871585272553, + "grad_norm": 1.0465328159625642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29240 + }, + { + "epoch": 0.14185721404556162, + "grad_norm": 1.1895556326635415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29250 + }, + { + "epoch": 0.1419057122383977, + "grad_norm": 1.8311279745830689e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29260 + }, + { + "epoch": 0.1419542104312338, + "grad_norm": 1.7098135458581964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29270 + }, + { + "epoch": 0.14200270862406988, + "grad_norm": 1.9307633465359686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29280 + }, + { + "epoch": 0.14205120681690597, + "grad_norm": 1.2268270666027092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29290 + }, + { + "epoch": 0.14209970500974206, + "grad_norm": 2.74376361630857e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29300 + }, + { + "epoch": 0.14214820320257818, + "grad_norm": 1.9890378553100163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29310 + }, + { + "epoch": 0.14219670139541427, + "grad_norm": 1.5997752598195802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29320 + }, + { + "epoch": 0.14224519958825035, + "grad_norm": 1.6959563708951464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29330 + }, + { + "epoch": 0.14229369778108644, + "grad_norm": 9.745293709784164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29340 + }, + { + "epoch": 0.14234219597392253, + "grad_norm": 1.4080990240472602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29350 + }, + { + "epoch": 0.14239069416675862, + "grad_norm": 1.6395879356423393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29360 + }, + { + "epoch": 0.1424391923595947, + "grad_norm": 1.5874367136348155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29370 + }, + { + "epoch": 0.1424876905524308, + "grad_norm": 1.5949012777127791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29380 + }, + { + "epoch": 0.14253618874526688, + "grad_norm": 1.1000962558682659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29390 + }, + { + "epoch": 0.14258468693810297, + "grad_norm": 1.014552935885149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29400 + }, + { + "epoch": 0.14263318513093906, + "grad_norm": 1.5038043557069614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29410 + }, + { + "epoch": 0.14268168332377515, + "grad_norm": 1.7345403193758102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29420 + }, + { + "epoch": 0.14273018151661124, + "grad_norm": 1.3488959211827023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29430 + }, + { + "epoch": 0.14277867970944733, + "grad_norm": 9.992884315579431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29440 + }, + { + "epoch": 0.14282717790228341, + "grad_norm": 9.294975598095334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29450 + }, + { + "epoch": 0.1428756760951195, + "grad_norm": 1.5323273601097753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29460 + }, + { + "epoch": 0.1429241742879556, + "grad_norm": 1.2780240012943977e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29470 + }, + { + "epoch": 0.14297267248079168, + "grad_norm": 1.5732202882645652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29480 + }, + { + "epoch": 0.14302117067362777, + "grad_norm": 9.149297284238855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29490 + }, + { + "epoch": 0.14306966886646386, + "grad_norm": 9.438162464903144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29500 + }, + { + "epoch": 0.14311816705929994, + "grad_norm": 1.4196310758052277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29510 + }, + { + "epoch": 0.14316666525213603, + "grad_norm": 1.3695379266209784e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29520 + }, + { + "epoch": 0.14321516344497212, + "grad_norm": 1.300630287914828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29530 + }, + { + "epoch": 0.1432636616378082, + "grad_norm": 9.00241673207347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29540 + }, + { + "epoch": 0.1433121598306443, + "grad_norm": 8.29476562103082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29550 + }, + { + "epoch": 0.1433606580234804, + "grad_norm": 1.4328121551443473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29560 + }, + { + "epoch": 0.1434091562163165, + "grad_norm": 1.2755242551065749e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29570 + }, + { + "epoch": 0.1434576544091526, + "grad_norm": 1.3973734667160898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29580 + }, + { + "epoch": 0.14350615260198868, + "grad_norm": 9.000270324577286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29590 + }, + { + "epoch": 0.14355465079482477, + "grad_norm": 8.815067076284322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29600 + }, + { + "epoch": 0.14360314898766086, + "grad_norm": 1.3014456499149674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29610 + }, + { + "epoch": 0.14365164718049694, + "grad_norm": 1.1959974699493614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29620 + }, + { + "epoch": 0.14370014537333303, + "grad_norm": 1.199446387545322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29630 + }, + { + "epoch": 0.14374864356616912, + "grad_norm": 9.68430754255678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29640 + }, + { + "epoch": 0.1437971417590052, + "grad_norm": 8.006077791833377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29650 + }, + { + "epoch": 0.1438456399518413, + "grad_norm": 1.5219126225929358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29660 + }, + { + "epoch": 0.14389413814467739, + "grad_norm": 1.2001448794762837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29670 + }, + { + "epoch": 0.14394263633751347, + "grad_norm": 1.3137389487383189e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29680 + }, + { + "epoch": 0.14399113453034956, + "grad_norm": 7.645337518624729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29690 + }, + { + "epoch": 0.14403963272318565, + "grad_norm": 1.7583284943611943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29700 + }, + { + "epoch": 0.14408813091602174, + "grad_norm": 1.1636057024588808e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29710 + }, + { + "epoch": 0.14413662910885783, + "grad_norm": 1.1639525610007695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29720 + }, + { + "epoch": 0.14418512730169392, + "grad_norm": 1.1654771014946164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29730 + }, + { + "epoch": 0.14423362549453, + "grad_norm": 8.276242624560837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29740 + }, + { + "epoch": 0.1442821236873661, + "grad_norm": 1.5353186881839065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29750 + }, + { + "epoch": 0.14433062188020218, + "grad_norm": 1.1155908623550204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29760 + }, + { + "epoch": 0.14437912007303827, + "grad_norm": 1.1141972890982288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29770 + }, + { + "epoch": 0.14442761826587436, + "grad_norm": 1.1557423249541898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29780 + }, + { + "epoch": 0.14447611645871045, + "grad_norm": 7.249722671076597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29790 + }, + { + "epoch": 0.14452461465154653, + "grad_norm": 8.226257932619774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29800 + }, + { + "epoch": 0.14457311284438262, + "grad_norm": 1.0764262015072745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29810 + }, + { + "epoch": 0.14462161103721874, + "grad_norm": 1.1319881423332845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29820 + }, + { + "epoch": 0.14467010923005483, + "grad_norm": 1.1027112805095385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29830 + }, + { + "epoch": 0.14471860742289092, + "grad_norm": 6.920521968822868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29840 + }, + { + "epoch": 0.144767105615727, + "grad_norm": 8.213551723201817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29850 + }, + { + "epoch": 0.1448156038085631, + "grad_norm": 1.0738782521002577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29860 + }, + { + "epoch": 0.14486410200139918, + "grad_norm": 1.0953134506053175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29870 + }, + { + "epoch": 0.14491260019423527, + "grad_norm": 1.6769548665251932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29880 + }, + { + "epoch": 0.14496109838707136, + "grad_norm": 6.938983005966293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29890 + }, + { + "epoch": 0.14500959657990745, + "grad_norm": 6.782863124499272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29900 + }, + { + "epoch": 0.14505809477274353, + "grad_norm": 1.1240097137488192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29910 + }, + { + "epoch": 0.14510659296557962, + "grad_norm": 1.1889135294040898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29920 + }, + { + "epoch": 0.1451550911584157, + "grad_norm": 9.949817467713729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29930 + }, + { + "epoch": 0.1452035893512518, + "grad_norm": 6.038989681655949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29940 + }, + { + "epoch": 0.1452520875440879, + "grad_norm": 7.852535759411694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29950 + }, + { + "epoch": 0.14530058573692398, + "grad_norm": 1.0131468570762081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29960 + }, + { + "epoch": 0.14534908392976006, + "grad_norm": 1.1851062708956306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29970 + }, + { + "epoch": 0.14539758212259615, + "grad_norm": 1.0974871429425548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29980 + }, + { + "epoch": 0.14544608031543224, + "grad_norm": 1.0075702903122874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 29990 + }, + { + "epoch": 0.14549457850826833, + "grad_norm": 9.66011498348962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30000 + }, + { + "epoch": 0.14554307670110442, + "grad_norm": 1.1264185104664648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30010 + }, + { + "epoch": 0.1455915748939405, + "grad_norm": 1.0840544746315572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30020 + }, + { + "epoch": 0.1456400730867766, + "grad_norm": 9.890138699120143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30030 + }, + { + "epoch": 0.14568857127961268, + "grad_norm": 8.776552249400993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30040 + }, + { + "epoch": 0.14573706947244877, + "grad_norm": 1.1627021194726694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30050 + }, + { + "epoch": 0.14578556766528486, + "grad_norm": 1.0378729484727955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30060 + }, + { + "epoch": 0.14583406585812095, + "grad_norm": 1.3125513760314789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30070 + }, + { + "epoch": 0.14588256405095706, + "grad_norm": 2.3253517156263115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30080 + }, + { + "epoch": 0.14593106224379315, + "grad_norm": 9.08430536128435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30090 + }, + { + "epoch": 0.14597956043662924, + "grad_norm": 1.078269406207255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30100 + }, + { + "epoch": 0.14602805862946533, + "grad_norm": 9.057558258973586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30110 + }, + { + "epoch": 0.14607655682230142, + "grad_norm": 9.55445898398466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30120 + }, + { + "epoch": 0.1461250550151375, + "grad_norm": 9.713868394101155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30130 + }, + { + "epoch": 0.1461735532079736, + "grad_norm": 1.18439675134141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30140 + }, + { + "epoch": 0.14622205140080968, + "grad_norm": 8.622357086096599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30150 + }, + { + "epoch": 0.14627054959364577, + "grad_norm": 9.437987387173052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30160 + }, + { + "epoch": 0.14631904778648186, + "grad_norm": 9.976854471460683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30170 + }, + { + "epoch": 0.14636754597931795, + "grad_norm": 8.798861017567106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30180 + }, + { + "epoch": 0.14641604417215404, + "grad_norm": 7.632050369466015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30190 + }, + { + "epoch": 0.14646454236499012, + "grad_norm": 6.838976105427719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30200 + }, + { + "epoch": 0.1465130405578262, + "grad_norm": 9.545018428980256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30210 + }, + { + "epoch": 0.1465615387506623, + "grad_norm": 9.214492706632882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30220 + }, + { + "epoch": 0.1466100369434984, + "grad_norm": 9.144978889708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30230 + }, + { + "epoch": 0.14665853513633448, + "grad_norm": 6.44633303181763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30240 + }, + { + "epoch": 0.14670703332917057, + "grad_norm": 6.163497232591908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30250 + }, + { + "epoch": 0.14675553152200665, + "grad_norm": 7.989494292814925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30260 + }, + { + "epoch": 0.14680402971484274, + "grad_norm": 8.59793146901211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30270 + }, + { + "epoch": 0.14685252790767883, + "grad_norm": 8.041745900300157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30280 + }, + { + "epoch": 0.14690102610051492, + "grad_norm": 7.081734452185628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30290 + }, + { + "epoch": 0.146949524293351, + "grad_norm": 9.727324368213885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30300 + }, + { + "epoch": 0.1469980224861871, + "grad_norm": 9.144069395006227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30310 + }, + { + "epoch": 0.14704652067902318, + "grad_norm": 8.205119570448005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30320 + }, + { + "epoch": 0.1470950188718593, + "grad_norm": 8.784200531408715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30330 + }, + { + "epoch": 0.1471435170646954, + "grad_norm": 6.570306254616298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30340 + }, + { + "epoch": 0.14719201525753148, + "grad_norm": 6.400151733032544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30350 + }, + { + "epoch": 0.14724051345036757, + "grad_norm": 9.054598990587692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30360 + }, + { + "epoch": 0.14728901164320365, + "grad_norm": 8.45626800582977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30370 + }, + { + "epoch": 0.14733750983603974, + "grad_norm": 8.266357554020942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30380 + }, + { + "epoch": 0.14738600802887583, + "grad_norm": 8.832926141622011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30390 + }, + { + "epoch": 0.14743450622171192, + "grad_norm": 7.53916197027138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30400 + }, + { + "epoch": 0.147483004414548, + "grad_norm": 8.20314937755029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30410 + }, + { + "epoch": 0.1475315026073841, + "grad_norm": 8.275882805719448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30420 + }, + { + "epoch": 0.14758000080022018, + "grad_norm": 8.875208550307434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30430 + }, + { + "epoch": 0.14762849899305627, + "grad_norm": 5.66055007311661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30440 + }, + { + "epoch": 0.14767699718589236, + "grad_norm": 6.484394816652639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30450 + }, + { + "epoch": 0.14772549537872845, + "grad_norm": 8.121241421576997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30460 + }, + { + "epoch": 0.14777399357156454, + "grad_norm": 7.316394885492628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30470 + }, + { + "epoch": 0.14782249176440063, + "grad_norm": 8.210753890125488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30480 + }, + { + "epoch": 0.14787098995723671, + "grad_norm": 5.382499921324779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30490 + }, + { + "epoch": 0.1479194881500728, + "grad_norm": 8.028887918953842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30500 + }, + { + "epoch": 0.1479679863429089, + "grad_norm": 7.682241971451731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30510 + }, + { + "epoch": 0.14801648453574498, + "grad_norm": 7.736047678008617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30520 + }, + { + "epoch": 0.14806498272858107, + "grad_norm": 7.350319037868758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30530 + }, + { + "epoch": 0.14811348092141716, + "grad_norm": 5.952108494966524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30540 + }, + { + "epoch": 0.14816197911425324, + "grad_norm": 6.041780125087826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30550 + }, + { + "epoch": 0.14821047730708933, + "grad_norm": 7.443514391525241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30560 + }, + { + "epoch": 0.14825897549992542, + "grad_norm": 7.425890089507448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30570 + }, + { + "epoch": 0.1483074736927615, + "grad_norm": 7.287665084731998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30580 + }, + { + "epoch": 0.14835597188559763, + "grad_norm": 6.374484087245946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30590 + }, + { + "epoch": 0.14840447007843371, + "grad_norm": 5.72431076761859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30600 + }, + { + "epoch": 0.1484529682712698, + "grad_norm": 7.446441827596573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30610 + }, + { + "epoch": 0.1485014664641059, + "grad_norm": 7.010735316725913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30620 + }, + { + "epoch": 0.14854996465694198, + "grad_norm": 7.682215823479055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30630 + }, + { + "epoch": 0.14859846284977807, + "grad_norm": 5.597524932454689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30640 + }, + { + "epoch": 0.14864696104261416, + "grad_norm": 7.969094326654158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30650 + }, + { + "epoch": 0.14869545923545024, + "grad_norm": 6.416329938474519e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30660 + }, + { + "epoch": 0.14874395742828633, + "grad_norm": 7.141983928704576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30670 + }, + { + "epoch": 0.14879245562112242, + "grad_norm": 7.182308650044433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30680 + }, + { + "epoch": 0.1488409538139585, + "grad_norm": 5.386515340433107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30690 + }, + { + "epoch": 0.1488894520067946, + "grad_norm": 5.482574465531798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30700 + }, + { + "epoch": 0.1489379501996307, + "grad_norm": 6.395254104063497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30710 + }, + { + "epoch": 0.14898644839246677, + "grad_norm": 6.703003236907534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30720 + }, + { + "epoch": 0.14903494658530286, + "grad_norm": 6.912205208209343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30730 + }, + { + "epoch": 0.14908344477813895, + "grad_norm": 5.657639690070937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30740 + }, + { + "epoch": 0.14913194297097504, + "grad_norm": 5.198671715334058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30750 + }, + { + "epoch": 0.14918044116381113, + "grad_norm": 6.982976969993615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30760 + }, + { + "epoch": 0.14922893935664722, + "grad_norm": 6.735509714417276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30770 + }, + { + "epoch": 0.1492774375494833, + "grad_norm": 6.540098524965288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30780 + }, + { + "epoch": 0.1493259357423194, + "grad_norm": 4.709258689672424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30790 + }, + { + "epoch": 0.14937443393515548, + "grad_norm": 6.434966053348035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30800 + }, + { + "epoch": 0.14942293212799157, + "grad_norm": 6.746238909727253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30810 + }, + { + "epoch": 0.14947143032082766, + "grad_norm": 6.334025783871766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30820 + }, + { + "epoch": 0.14951992851366375, + "grad_norm": 8.095266252894362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30830 + }, + { + "epoch": 0.14956842670649984, + "grad_norm": 5.737475703426753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30840 + }, + { + "epoch": 0.14961692489933595, + "grad_norm": 3.9316941524703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30850 + }, + { + "epoch": 0.14966542309217204, + "grad_norm": 6.013250981595775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30860 + }, + { + "epoch": 0.14971392128500813, + "grad_norm": 6.319520480246865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30870 + }, + { + "epoch": 0.14976241947784422, + "grad_norm": 6.239731646928703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30880 + }, + { + "epoch": 0.1498109176706803, + "grad_norm": 4.370180022306158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30890 + }, + { + "epoch": 0.1498594158635164, + "grad_norm": 4.871721444033028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30900 + }, + { + "epoch": 0.14990791405635248, + "grad_norm": 6.000311714160489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30910 + }, + { + "epoch": 0.14995641224918857, + "grad_norm": 5.609174422716023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30920 + }, + { + "epoch": 0.15000491044202466, + "grad_norm": 5.85831685384619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30930 + }, + { + "epoch": 0.15005340863486075, + "grad_norm": 4.1547366436134325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30940 + }, + { + "epoch": 0.15010190682769683, + "grad_norm": 4.352856990408327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30950 + }, + { + "epoch": 0.15015040502053292, + "grad_norm": 6.28516147571645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30960 + }, + { + "epoch": 0.150198903213369, + "grad_norm": 5.764708248534589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30970 + }, + { + "epoch": 0.1502474014062051, + "grad_norm": 5.486548957378545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30980 + }, + { + "epoch": 0.1502958995990412, + "grad_norm": 4.4869770476907433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 30990 + }, + { + "epoch": 0.15034439779187728, + "grad_norm": 3.97609937863308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31000 + }, + { + "epoch": 0.15039289598471337, + "grad_norm": 6.141540325188544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31010 + }, + { + "epoch": 0.15044139417754945, + "grad_norm": 6.119493036749191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31020 + }, + { + "epoch": 0.15048989237038554, + "grad_norm": 6.596525850000035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31030 + }, + { + "epoch": 0.15053839056322163, + "grad_norm": 4.473586727726797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31040 + }, + { + "epoch": 0.15058688875605772, + "grad_norm": 4.3395965576564777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31050 + }, + { + "epoch": 0.1506353869488938, + "grad_norm": 5.34166986199125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31060 + }, + { + "epoch": 0.1506838851417299, + "grad_norm": 2.148312887584325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31070 + }, + { + "epoch": 0.15073238333456598, + "grad_norm": 5.700348992832005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31080 + }, + { + "epoch": 0.15078088152740207, + "grad_norm": 4.2755121398840856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31090 + }, + { + "epoch": 0.1508293797202382, + "grad_norm": 4.197938210381835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31100 + }, + { + "epoch": 0.15087787791307428, + "grad_norm": 5.46921512523113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31110 + }, + { + "epoch": 0.15092637610591036, + "grad_norm": 5.385520012168854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31120 + }, + { + "epoch": 0.15097487429874645, + "grad_norm": 5.583992219726497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31130 + }, + { + "epoch": 0.15102337249158254, + "grad_norm": 4.345675108652358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31140 + }, + { + "epoch": 0.15107187068441863, + "grad_norm": 3.7968393939991074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31150 + }, + { + "epoch": 0.15112036887725472, + "grad_norm": 6.861034194116655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31160 + }, + { + "epoch": 0.1511688670700908, + "grad_norm": 5.126995006321522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31170 + }, + { + "epoch": 0.1512173652629269, + "grad_norm": 4.830484954254644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31180 + }, + { + "epoch": 0.15126586345576298, + "grad_norm": 4.249551182056166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31190 + }, + { + "epoch": 0.15131436164859907, + "grad_norm": 3.8570283322769683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31200 + }, + { + "epoch": 0.15136285984143516, + "grad_norm": 5.465113304126135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31210 + }, + { + "epoch": 0.15141135803427125, + "grad_norm": 5.007286176805792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31220 + }, + { + "epoch": 0.15145985622710734, + "grad_norm": 5.151144364390348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31230 + }, + { + "epoch": 0.15150835441994343, + "grad_norm": 4.054919884310948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31240 + }, + { + "epoch": 0.1515568526127795, + "grad_norm": 4.2481178752495907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31250 + }, + { + "epoch": 0.1516053508056156, + "grad_norm": 5.25511723026284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31260 + }, + { + "epoch": 0.1516538489984517, + "grad_norm": 4.953346888214583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31270 + }, + { + "epoch": 0.15170234719128778, + "grad_norm": 4.987589932170522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31280 + }, + { + "epoch": 0.15175084538412387, + "grad_norm": 3.571971660676354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31290 + }, + { + "epoch": 0.15179934357695996, + "grad_norm": 3.910521968464309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31300 + }, + { + "epoch": 0.15184784176979604, + "grad_norm": 5.367637072595244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31310 + }, + { + "epoch": 0.15189633996263213, + "grad_norm": 5.132117166795069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31320 + }, + { + "epoch": 0.15194483815546822, + "grad_norm": 5.204527724345098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31330 + }, + { + "epoch": 0.1519933363483043, + "grad_norm": 4.2535262423371023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31340 + }, + { + "epoch": 0.1520418345411404, + "grad_norm": 3.6465897323978425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31350 + }, + { + "epoch": 0.1520903327339765, + "grad_norm": 5.803939870929753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31360 + }, + { + "epoch": 0.1521388309268126, + "grad_norm": 5.220834964347887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31370 + }, + { + "epoch": 0.1521873291196487, + "grad_norm": 4.916101374874415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31380 + }, + { + "epoch": 0.15223582731248478, + "grad_norm": 3.631951130955713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31390 + }, + { + "epoch": 0.15228432550532087, + "grad_norm": 3.2788250337034697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31400 + }, + { + "epoch": 0.15233282369815696, + "grad_norm": 5.457379188555933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31410 + }, + { + "epoch": 0.15238132189099304, + "grad_norm": 4.909436484012986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31420 + }, + { + "epoch": 0.15242982008382913, + "grad_norm": 4.842912062485993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31430 + }, + { + "epoch": 0.15247831827666522, + "grad_norm": 3.643803836439474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31440 + }, + { + "epoch": 0.1525268164695013, + "grad_norm": 3.290890049356676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31450 + }, + { + "epoch": 0.1525753146623374, + "grad_norm": 4.856009923059901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31460 + }, + { + "epoch": 0.15262381285517349, + "grad_norm": 8.438835266133538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31470 + }, + { + "epoch": 0.15267231104800957, + "grad_norm": 4.853414452554716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31480 + }, + { + "epoch": 0.15272080924084566, + "grad_norm": 3.540589545991679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31490 + }, + { + "epoch": 0.15276930743368175, + "grad_norm": 3.863424922201375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31500 + }, + { + "epoch": 0.15281780562651784, + "grad_norm": 5.120469381836301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31510 + }, + { + "epoch": 0.15286630381935393, + "grad_norm": 4.6918671614548657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31520 + }, + { + "epoch": 0.15291480201219002, + "grad_norm": 4.4708275481752935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31530 + }, + { + "epoch": 0.1529633002050261, + "grad_norm": 3.5645271623252484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31540 + }, + { + "epoch": 0.1530117983978622, + "grad_norm": 3.4603417020662164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31550 + }, + { + "epoch": 0.15306029659069828, + "grad_norm": 4.3511468561518996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31560 + }, + { + "epoch": 0.15310879478353437, + "grad_norm": 4.437953862179711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31570 + }, + { + "epoch": 0.15315729297637046, + "grad_norm": 4.480688176045078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31580 + }, + { + "epoch": 0.15320579116920655, + "grad_norm": 3.7912755601610115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31590 + }, + { + "epoch": 0.15325428936204263, + "grad_norm": 3.1559235935674224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31600 + }, + { + "epoch": 0.15330278755487875, + "grad_norm": 4.038482188661874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31610 + }, + { + "epoch": 0.15335128574771484, + "grad_norm": 5.305003014655085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31620 + }, + { + "epoch": 0.15339978394055093, + "grad_norm": 4.544119462934759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31630 + }, + { + "epoch": 0.15344828213338702, + "grad_norm": 3.536680139859527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31640 + }, + { + "epoch": 0.1534967803262231, + "grad_norm": 3.1403817501995945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31650 + }, + { + "epoch": 0.1535452785190592, + "grad_norm": 4.2296917968087655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31660 + }, + { + "epoch": 0.15359377671189528, + "grad_norm": 4.35982059343587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31670 + }, + { + "epoch": 0.15364227490473137, + "grad_norm": 4.364696053471562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31680 + }, + { + "epoch": 0.15369077309756746, + "grad_norm": 3.3352486639159906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31690 + }, + { + "epoch": 0.15373927129040355, + "grad_norm": 4.159565492045658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31700 + }, + { + "epoch": 0.15378776948323963, + "grad_norm": 4.4015183675583103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31710 + }, + { + "epoch": 0.15383626767607572, + "grad_norm": 3.972827755660546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31720 + }, + { + "epoch": 0.1538847658689118, + "grad_norm": 4.0428960801364155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31730 + }, + { + "epoch": 0.1539332640617479, + "grad_norm": 3.142491209473519e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31740 + }, + { + "epoch": 0.153981762254584, + "grad_norm": 3.763529434763768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31750 + }, + { + "epoch": 0.15403026044742008, + "grad_norm": 3.8866812701598974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31760 + }, + { + "epoch": 0.15407875864025616, + "grad_norm": 4.186761657365423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31770 + }, + { + "epoch": 0.15412725683309225, + "grad_norm": 3.9427058595720155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31780 + }, + { + "epoch": 0.15417575502592834, + "grad_norm": 2.934550025202043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31790 + }, + { + "epoch": 0.15422425321876443, + "grad_norm": 3.0410461704377667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31800 + }, + { + "epoch": 0.15427275141160052, + "grad_norm": 4.3637732005663565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31810 + }, + { + "epoch": 0.1543212496044366, + "grad_norm": 4.258025398939935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31820 + }, + { + "epoch": 0.1543697477972727, + "grad_norm": 3.865157793825347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31830 + }, + { + "epoch": 0.15441824599010878, + "grad_norm": 2.870969808554946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31840 + }, + { + "epoch": 0.15446674418294487, + "grad_norm": 2.9696454362238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31850 + }, + { + "epoch": 0.15451524237578096, + "grad_norm": 3.927748366550077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31860 + }, + { + "epoch": 0.15456374056861708, + "grad_norm": 3.9774636206857394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31870 + }, + { + "epoch": 0.15461223876145316, + "grad_norm": 4.242596389758546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31880 + }, + { + "epoch": 0.15466073695428925, + "grad_norm": 2.809378258916695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31890 + }, + { + "epoch": 0.15470923514712534, + "grad_norm": 2.874928668461507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31900 + }, + { + "epoch": 0.15475773333996143, + "grad_norm": 3.6770549627362925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31910 + }, + { + "epoch": 0.15480623153279752, + "grad_norm": 3.915736499493505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31920 + }, + { + "epoch": 0.1548547297256336, + "grad_norm": 3.470622402801382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31930 + }, + { + "epoch": 0.1549032279184697, + "grad_norm": 3.4135305782001524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31940 + }, + { + "epoch": 0.15495172611130578, + "grad_norm": 2.885309413613868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31950 + }, + { + "epoch": 0.15500022430414187, + "grad_norm": 3.6068252029508585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31960 + }, + { + "epoch": 0.15504872249697796, + "grad_norm": 4.0045082982942404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31970 + }, + { + "epoch": 0.15509722068981405, + "grad_norm": 3.9273788843274815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31980 + }, + { + "epoch": 0.15514571888265014, + "grad_norm": 3.0819424523542693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 31990 + }, + { + "epoch": 0.15519421707548622, + "grad_norm": 2.637153215800936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32000 + }, + { + "epoch": 0.1552427152683223, + "grad_norm": 3.783981981086981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32010 + }, + { + "epoch": 0.1552912134611584, + "grad_norm": 3.529335117491428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32020 + }, + { + "epoch": 0.1553397116539945, + "grad_norm": 3.690282710522297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32030 + }, + { + "epoch": 0.15538820984683058, + "grad_norm": 3.0691077768096875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32040 + }, + { + "epoch": 0.15543670803966667, + "grad_norm": 3.162897712627455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32050 + }, + { + "epoch": 0.15548520623250275, + "grad_norm": 3.469474165740394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32060 + }, + { + "epoch": 0.15553370442533884, + "grad_norm": 3.355875435318012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32070 + }, + { + "epoch": 0.15558220261817493, + "grad_norm": 3.458613662132848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32080 + }, + { + "epoch": 0.15563070081101102, + "grad_norm": 2.6215585080535675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32090 + }, + { + "epoch": 0.1556791990038471, + "grad_norm": 2.9240322874102276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32100 + }, + { + "epoch": 0.1557276971966832, + "grad_norm": 3.5125384556522476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32110 + }, + { + "epoch": 0.1557761953895193, + "grad_norm": 3.497959539799922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32120 + }, + { + "epoch": 0.1558246935823554, + "grad_norm": 3.518360642829066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32130 + }, + { + "epoch": 0.1558731917751915, + "grad_norm": 2.733338817506592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32140 + }, + { + "epoch": 0.15592168996802758, + "grad_norm": 2.521294106827554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32150 + }, + { + "epoch": 0.15597018816086367, + "grad_norm": 3.543105719927553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32160 + }, + { + "epoch": 0.15601868635369975, + "grad_norm": 3.483865498310479e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32170 + }, + { + "epoch": 0.15606718454653584, + "grad_norm": 3.3988337122536905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32180 + }, + { + "epoch": 0.15611568273937193, + "grad_norm": 2.430782899409678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32190 + }, + { + "epoch": 0.15616418093220802, + "grad_norm": 5.47735851341713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32200 + }, + { + "epoch": 0.1562126791250441, + "grad_norm": 3.235116139421734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32210 + }, + { + "epoch": 0.1562611773178802, + "grad_norm": 3.401253252377501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32220 + }, + { + "epoch": 0.15630967551071628, + "grad_norm": 3.4480970612094097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32230 + }, + { + "epoch": 0.15635817370355237, + "grad_norm": 2.2790024445384915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32240 + }, + { + "epoch": 0.15640667189638846, + "grad_norm": 2.554794491516077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32250 + }, + { + "epoch": 0.15645517008922455, + "grad_norm": 3.3393138210158213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32260 + }, + { + "epoch": 0.15650366828206064, + "grad_norm": 3.4366752288406133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32270 + }, + { + "epoch": 0.15655216647489673, + "grad_norm": 3.324190345210809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32280 + }, + { + "epoch": 0.15660066466773281, + "grad_norm": 2.3983213282008364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32290 + }, + { + "epoch": 0.1566491628605689, + "grad_norm": 2.536253589369153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32300 + }, + { + "epoch": 0.156697661053405, + "grad_norm": 3.352670034928451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32310 + }, + { + "epoch": 0.15674615924624108, + "grad_norm": 2.984324680710415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32320 + }, + { + "epoch": 0.15679465743907717, + "grad_norm": 3.14768755060868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32330 + }, + { + "epoch": 0.15684315563191326, + "grad_norm": 2.50354958097887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32340 + }, + { + "epoch": 0.15689165382474934, + "grad_norm": 2.1913311343269015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32350 + }, + { + "epoch": 0.15694015201758543, + "grad_norm": 3.1295434155254043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32360 + }, + { + "epoch": 0.15698865021042152, + "grad_norm": 3.382913575933344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32370 + }, + { + "epoch": 0.15703714840325764, + "grad_norm": 3.3231196994165657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32380 + }, + { + "epoch": 0.15708564659609373, + "grad_norm": 2.320883965012399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32390 + }, + { + "epoch": 0.1571341447889298, + "grad_norm": 2.601040876015759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32400 + }, + { + "epoch": 0.1571826429817659, + "grad_norm": 3.073259904340375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32410 + }, + { + "epoch": 0.157231141174602, + "grad_norm": 3.103123731307278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32420 + }, + { + "epoch": 0.15727963936743808, + "grad_norm": 3.10118707602669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32430 + }, + { + "epoch": 0.15732813756027417, + "grad_norm": 2.5645758228165505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32440 + }, + { + "epoch": 0.15737663575311026, + "grad_norm": 2.45983443392106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32450 + }, + { + "epoch": 0.15742513394594634, + "grad_norm": 3.0546007678822207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32460 + }, + { + "epoch": 0.15747363213878243, + "grad_norm": 3.2105094760481734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32470 + }, + { + "epoch": 0.15752213033161852, + "grad_norm": 3.207788950021495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32480 + }, + { + "epoch": 0.1575706285244546, + "grad_norm": 2.384462618465477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32490 + }, + { + "epoch": 0.1576191267172907, + "grad_norm": 2.3062473530899297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32500 + }, + { + "epoch": 0.15766762491012679, + "grad_norm": 3.359940023983654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32510 + }, + { + "epoch": 0.15771612310296287, + "grad_norm": 3.549447740169853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32520 + }, + { + "epoch": 0.15776462129579896, + "grad_norm": 3.17044055009319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32530 + }, + { + "epoch": 0.15781311948863505, + "grad_norm": 2.311518159103798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32540 + }, + { + "epoch": 0.15786161768147114, + "grad_norm": 2.6257717422595306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32550 + }, + { + "epoch": 0.15791011587430723, + "grad_norm": 3.0741659884370165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32560 + }, + { + "epoch": 0.15795861406714332, + "grad_norm": 2.8163188403595996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32570 + }, + { + "epoch": 0.1580071122599794, + "grad_norm": 2.8622361014640774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32580 + }, + { + "epoch": 0.1580556104528155, + "grad_norm": 2.2625962969868851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32590 + }, + { + "epoch": 0.15810410864565158, + "grad_norm": 2.4256894448626554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32600 + }, + { + "epoch": 0.15815260683848767, + "grad_norm": 3.0576254061998043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32610 + }, + { + "epoch": 0.15820110503132376, + "grad_norm": 3.154393937165878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32620 + }, + { + "epoch": 0.15824960322415985, + "grad_norm": 3.080351973494544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32630 + }, + { + "epoch": 0.15829810141699596, + "grad_norm": 2.1724858356719778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32640 + }, + { + "epoch": 0.15834659960983205, + "grad_norm": 2.531598397581547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32650 + }, + { + "epoch": 0.15839509780266814, + "grad_norm": 2.7803091029454663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32660 + }, + { + "epoch": 0.15844359599550423, + "grad_norm": 2.842376716216677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32670 + }, + { + "epoch": 0.15849209418834032, + "grad_norm": 2.7879090680471563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32680 + }, + { + "epoch": 0.1585405923811764, + "grad_norm": 2.2145518130400887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32690 + }, + { + "epoch": 0.1585890905740125, + "grad_norm": 2.1496224178463308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32700 + }, + { + "epoch": 0.15863758876684858, + "grad_norm": 2.980278850372997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32710 + }, + { + "epoch": 0.15868608695968467, + "grad_norm": 2.5822171778600023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32720 + }, + { + "epoch": 0.15873458515252076, + "grad_norm": 2.734487907218863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32730 + }, + { + "epoch": 0.15878308334535685, + "grad_norm": 2.1614036427308747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32740 + }, + { + "epoch": 0.15883158153819293, + "grad_norm": 2.3833371187720331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32750 + }, + { + "epoch": 0.15888007973102902, + "grad_norm": 2.898447917232261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32760 + }, + { + "epoch": 0.1589285779238651, + "grad_norm": 2.6816800868800783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32770 + }, + { + "epoch": 0.1589770761167012, + "grad_norm": 2.6531736807555717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32780 + }, + { + "epoch": 0.1590255743095373, + "grad_norm": 2.3261949877451116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32790 + }, + { + "epoch": 0.15907407250237338, + "grad_norm": 2.3730699183488468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32800 + }, + { + "epoch": 0.15912257069520946, + "grad_norm": 2.837654164977721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32810 + }, + { + "epoch": 0.15917106888804555, + "grad_norm": 2.705126291857596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32820 + }, + { + "epoch": 0.15921956708088164, + "grad_norm": 4.116107277241099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32830 + }, + { + "epoch": 0.15926806527371773, + "grad_norm": 2.2915433817161102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32840 + }, + { + "epoch": 0.15931656346655382, + "grad_norm": 2.220502892669174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32850 + }, + { + "epoch": 0.1593650616593899, + "grad_norm": 2.786928234854713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32860 + }, + { + "epoch": 0.159413559852226, + "grad_norm": 2.7978416028417996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32870 + }, + { + "epoch": 0.15946205804506208, + "grad_norm": 2.727968535509717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32880 + }, + { + "epoch": 0.1595105562378982, + "grad_norm": 1.938466880346823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32890 + }, + { + "epoch": 0.1595590544307343, + "grad_norm": 2.433144175029156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32900 + }, + { + "epoch": 0.15960755262357038, + "grad_norm": 2.8562732268255786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32910 + }, + { + "epoch": 0.15965605081640646, + "grad_norm": 2.577343707343971e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32920 + }, + { + "epoch": 0.15970454900924255, + "grad_norm": 2.6776811523632205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32930 + }, + { + "epoch": 0.15975304720207864, + "grad_norm": 2.3107934055133228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32940 + }, + { + "epoch": 0.15980154539491473, + "grad_norm": 2.129574880882501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32950 + }, + { + "epoch": 0.15985004358775082, + "grad_norm": 2.646793859639729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32960 + }, + { + "epoch": 0.1598985417805869, + "grad_norm": 2.592506405107997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32970 + }, + { + "epoch": 0.159947039973423, + "grad_norm": 2.542113008985325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32980 + }, + { + "epoch": 0.15999553816625908, + "grad_norm": 2.3286915507014783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 32990 + }, + { + "epoch": 0.16004403635909517, + "grad_norm": 2.0494017860528402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33000 + }, + { + "epoch": 0.16009253455193126, + "grad_norm": 2.3945216298670857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33010 + }, + { + "epoch": 0.16014103274476735, + "grad_norm": 2.436749753087497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33020 + }, + { + "epoch": 0.16018953093760344, + "grad_norm": 2.6262921437592013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33030 + }, + { + "epoch": 0.16023802913043952, + "grad_norm": 2.0002289602416568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33040 + }, + { + "epoch": 0.1602865273232756, + "grad_norm": 2.71599986945148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33050 + }, + { + "epoch": 0.1603350255161117, + "grad_norm": 2.512732919512928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33060 + }, + { + "epoch": 0.1603835237089478, + "grad_norm": 2.443104563099041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33070 + }, + { + "epoch": 0.16043202190178388, + "grad_norm": 2.3853894504100026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33080 + }, + { + "epoch": 0.16048052009461997, + "grad_norm": 2.722402427934867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33090 + }, + { + "epoch": 0.16052901828745605, + "grad_norm": 2.3687816508299875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33100 + }, + { + "epoch": 0.16057751648029214, + "grad_norm": 2.437409420963377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33110 + }, + { + "epoch": 0.16062601467312823, + "grad_norm": 2.3471726251500513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33120 + }, + { + "epoch": 0.16067451286596432, + "grad_norm": 2.551060731548205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33130 + }, + { + "epoch": 0.1607230110588004, + "grad_norm": 2.089094266466418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33140 + }, + { + "epoch": 0.16077150925163652, + "grad_norm": 2.2081660233652656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33150 + }, + { + "epoch": 0.1608200074444726, + "grad_norm": 2.859070491467719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33160 + }, + { + "epoch": 0.1608685056373087, + "grad_norm": 2.4036498302848486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33170 + }, + { + "epoch": 0.1609170038301448, + "grad_norm": 2.5656669322415837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33180 + }, + { + "epoch": 0.16096550202298088, + "grad_norm": 2.808588419611624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33190 + }, + { + "epoch": 0.16101400021581697, + "grad_norm": 2.757761308203044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33200 + }, + { + "epoch": 0.16106249840865305, + "grad_norm": 2.325614048004354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33210 + }, + { + "epoch": 0.16111099660148914, + "grad_norm": 2.386312303315208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33220 + }, + { + "epoch": 0.16115949479432523, + "grad_norm": 2.724065666370734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33230 + }, + { + "epoch": 0.16120799298716132, + "grad_norm": 1.9152237484831858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33240 + }, + { + "epoch": 0.1612564911799974, + "grad_norm": 1.8851778804673813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33250 + }, + { + "epoch": 0.1613049893728335, + "grad_norm": 2.3131440229917644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33260 + }, + { + "epoch": 0.16135348756566958, + "grad_norm": 2.3736154730613634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33270 + }, + { + "epoch": 0.16140198575850567, + "grad_norm": 2.3367461210455076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33280 + }, + { + "epoch": 0.16145048395134176, + "grad_norm": 3.22403451491482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33290 + }, + { + "epoch": 0.16149898214417785, + "grad_norm": 1.9668560469199292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33300 + }, + { + "epoch": 0.16154748033701394, + "grad_norm": 2.400005314484588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33310 + }, + { + "epoch": 0.16159597852985003, + "grad_norm": 2.396474485522049e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33320 + }, + { + "epoch": 0.16164447672268611, + "grad_norm": 2.3158746387252904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33330 + }, + { + "epoch": 0.1616929749155222, + "grad_norm": 2.182167833097992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33340 + }, + { + "epoch": 0.1617414731083583, + "grad_norm": 2.617920529246476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33350 + }, + { + "epoch": 0.16178997130119438, + "grad_norm": 2.3275055127669475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33360 + }, + { + "epoch": 0.16183846949403047, + "grad_norm": 2.2653149756024504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33370 + }, + { + "epoch": 0.16188696768686656, + "grad_norm": 2.4440154788862856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33380 + }, + { + "epoch": 0.16193546587970264, + "grad_norm": 1.8586064243208966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33390 + }, + { + "epoch": 0.16198396407253876, + "grad_norm": 1.9028243514185306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33400 + }, + { + "epoch": 0.16203246226537485, + "grad_norm": 2.246985815190783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33410 + }, + { + "epoch": 0.16208096045821094, + "grad_norm": 2.1724760301822243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33420 + }, + { + "epoch": 0.16212945865104703, + "grad_norm": 2.2348135075844766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33430 + }, + { + "epoch": 0.16217795684388311, + "grad_norm": 3.977721974024462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33440 + }, + { + "epoch": 0.1622264550367192, + "grad_norm": 1.9790881822245865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33450 + }, + { + "epoch": 0.1622749532295553, + "grad_norm": 2.1175834774567193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33460 + }, + { + "epoch": 0.16232345142239138, + "grad_norm": 2.2452383063864545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33470 + }, + { + "epoch": 0.16237194961522747, + "grad_norm": 2.313658882258096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33480 + }, + { + "epoch": 0.16242044780806356, + "grad_norm": 1.8925207712072734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33490 + }, + { + "epoch": 0.16246894600089964, + "grad_norm": 1.9558939357011695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33500 + }, + { + "epoch": 0.16251744419373573, + "grad_norm": 2.1886538092985575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33510 + }, + { + "epoch": 0.16256594238657182, + "grad_norm": 2.2767198970541358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33520 + }, + { + "epoch": 0.1626144405794079, + "grad_norm": 4.2184640847153787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33530 + }, + { + "epoch": 0.162662938772244, + "grad_norm": 1.8685285851915978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33540 + }, + { + "epoch": 0.1627114369650801, + "grad_norm": 1.994434342122986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33550 + }, + { + "epoch": 0.16275993515791617, + "grad_norm": 2.2399088095426123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33560 + }, + { + "epoch": 0.16280843335075226, + "grad_norm": 2.2254681653066655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33570 + }, + { + "epoch": 0.16285693154358835, + "grad_norm": 2.1654841475537978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33580 + }, + { + "epoch": 0.16290542973642444, + "grad_norm": 1.81463661874659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33590 + }, + { + "epoch": 0.16295392792926053, + "grad_norm": 1.923035171103038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33600 + }, + { + "epoch": 0.16300242612209662, + "grad_norm": 2.1066530564439745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33610 + }, + { + "epoch": 0.1630509243149327, + "grad_norm": 2.1900800106777751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33620 + }, + { + "epoch": 0.1630994225077688, + "grad_norm": 2.1550047790697135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33630 + }, + { + "epoch": 0.16314792070060488, + "grad_norm": 1.813050261034732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33640 + }, + { + "epoch": 0.16319641889344097, + "grad_norm": 1.9666823902753094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33650 + }, + { + "epoch": 0.16324491708627709, + "grad_norm": 2.208815175208656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33660 + }, + { + "epoch": 0.16329341527911317, + "grad_norm": 2.1566908969816723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33670 + }, + { + "epoch": 0.16334191347194926, + "grad_norm": 2.0959488722382957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33680 + }, + { + "epoch": 0.16339041166478535, + "grad_norm": 1.7890960180011461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33690 + }, + { + "epoch": 0.16343890985762144, + "grad_norm": 1.7760426374024973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33700 + }, + { + "epoch": 0.16348740805045753, + "grad_norm": 2.3549991112759017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33710 + }, + { + "epoch": 0.16353590624329362, + "grad_norm": 2.1484574119767785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33720 + }, + { + "epoch": 0.1635844044361297, + "grad_norm": 2.1364239444210398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33730 + }, + { + "epoch": 0.1636329026289658, + "grad_norm": 1.2219863947393605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33740 + }, + { + "epoch": 0.16368140082180188, + "grad_norm": 1.8119439459951536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33750 + }, + { + "epoch": 0.16372989901463797, + "grad_norm": 2.1014041351463675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33760 + }, + { + "epoch": 0.16377839720747406, + "grad_norm": 1.94278371168366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33770 + }, + { + "epoch": 0.16382689540031015, + "grad_norm": 2.094074886827002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33780 + }, + { + "epoch": 0.16387539359314623, + "grad_norm": 1.737653860800492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33790 + }, + { + "epoch": 0.16392389178598232, + "grad_norm": 1.8648209731964016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33800 + }, + { + "epoch": 0.1639723899788184, + "grad_norm": 2.039082289684302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33810 + }, + { + "epoch": 0.1640208881716545, + "grad_norm": 2.3728479447981954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33820 + }, + { + "epoch": 0.1640693863644906, + "grad_norm": 2.1595559474008041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33830 + }, + { + "epoch": 0.16411788455732668, + "grad_norm": 1.722348201838031e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33840 + }, + { + "epoch": 0.16416638275016276, + "grad_norm": 2.058151267192443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33850 + }, + { + "epoch": 0.16421488094299885, + "grad_norm": 2.0531346933694294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33860 + }, + { + "epoch": 0.16426337913583494, + "grad_norm": 2.0067822958935722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33870 + }, + { + "epoch": 0.16431187732867103, + "grad_norm": 1.967098910427012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33880 + }, + { + "epoch": 0.16436037552150712, + "grad_norm": 1.7614458158732305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33890 + }, + { + "epoch": 0.1644088737143432, + "grad_norm": 1.7521136896903045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33900 + }, + { + "epoch": 0.1644573719071793, + "grad_norm": 1.9771728432260716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33910 + }, + { + "epoch": 0.1645058701000154, + "grad_norm": 2.029174481776863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33920 + }, + { + "epoch": 0.1645543682928515, + "grad_norm": 2.0905886799482687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33930 + }, + { + "epoch": 0.1646028664856876, + "grad_norm": 1.7245865535642224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33940 + }, + { + "epoch": 0.16465136467852368, + "grad_norm": 1.7717985656418023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33950 + }, + { + "epoch": 0.16469986287135976, + "grad_norm": 1.9855629318499268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33960 + }, + { + "epoch": 0.16474836106419585, + "grad_norm": 2.014898967672707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33970 + }, + { + "epoch": 0.16479685925703194, + "grad_norm": 2.001852834609963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33980 + }, + { + "epoch": 0.16484535744986803, + "grad_norm": 1.7016674291880918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 33990 + }, + { + "epoch": 0.16489385564270412, + "grad_norm": 1.7074903269076458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34000 + }, + { + "epoch": 0.1649423538355402, + "grad_norm": 1.9306679632791202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34010 + }, + { + "epoch": 0.1649908520283763, + "grad_norm": 1.9945814244692883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34020 + }, + { + "epoch": 0.16503935022121238, + "grad_norm": 2.0757792640324624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34030 + }, + { + "epoch": 0.16508784841404847, + "grad_norm": 1.9052228594773624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34040 + }, + { + "epoch": 0.16513634660688456, + "grad_norm": 1.6675991787451494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34050 + }, + { + "epoch": 0.16518484479972065, + "grad_norm": 2.0456805316371174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34060 + }, + { + "epoch": 0.16523334299255674, + "grad_norm": 2.0550345425363048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34070 + }, + { + "epoch": 0.16528184118539282, + "grad_norm": 2.1044731113306625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34080 + }, + { + "epoch": 0.1653303393782289, + "grad_norm": 5.39549432687636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34090 + }, + { + "epoch": 0.165378837571065, + "grad_norm": 1.6465024543776963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34100 + }, + { + "epoch": 0.1654273357639011, + "grad_norm": 2.0295154001814808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34110 + }, + { + "epoch": 0.16547583395673718, + "grad_norm": 1.982075303885722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34120 + }, + { + "epoch": 0.16552433214957327, + "grad_norm": 1.9825986896648828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34130 + }, + { + "epoch": 0.16557283034240936, + "grad_norm": 1.658903272527823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34140 + }, + { + "epoch": 0.16562132853524544, + "grad_norm": 1.6409623526669748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34150 + }, + { + "epoch": 0.16566982672808153, + "grad_norm": 1.9234656178923615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34160 + }, + { + "epoch": 0.16571832492091765, + "grad_norm": 1.9174420629042288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34170 + }, + { + "epoch": 0.16576682311375374, + "grad_norm": 2.335222717420038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34180 + }, + { + "epoch": 0.16581532130658982, + "grad_norm": 1.7283126396705484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34190 + }, + { + "epoch": 0.1658638194994259, + "grad_norm": 1.779747833552392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34200 + }, + { + "epoch": 0.165912317692262, + "grad_norm": 1.8972006898820837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34210 + }, + { + "epoch": 0.1659608158850981, + "grad_norm": 1.924355075288986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34220 + }, + { + "epoch": 0.16600931407793418, + "grad_norm": 1.90658369092489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34230 + }, + { + "epoch": 0.16605781227077027, + "grad_norm": 1.6565664395784552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34240 + }, + { + "epoch": 0.16610631046360635, + "grad_norm": 1.6255482648830366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34250 + }, + { + "epoch": 0.16615480865644244, + "grad_norm": 1.8957740621772246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34260 + }, + { + "epoch": 0.16620330684927853, + "grad_norm": 1.9115830696136982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34270 + }, + { + "epoch": 0.16625180504211462, + "grad_norm": 1.8610352015002718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34280 + }, + { + "epoch": 0.1663003032349507, + "grad_norm": 1.7087482717670355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34290 + }, + { + "epoch": 0.1663488014277868, + "grad_norm": 1.5917460416403628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34300 + }, + { + "epoch": 0.16639729962062288, + "grad_norm": 1.8125466283436253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34310 + }, + { + "epoch": 0.16644579781345897, + "grad_norm": 1.8531370926666568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34320 + }, + { + "epoch": 0.16649429600629506, + "grad_norm": 1.908090467850343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34330 + }, + { + "epoch": 0.16654279419913115, + "grad_norm": 1.5763075111863145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34340 + }, + { + "epoch": 0.16659129239196724, + "grad_norm": 1.6322391616085952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34350 + }, + { + "epoch": 0.16663979058480333, + "grad_norm": 1.8129087209217687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34360 + }, + { + "epoch": 0.16668828877763942, + "grad_norm": 1.8892585273988516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34370 + }, + { + "epoch": 0.1667367869704755, + "grad_norm": 1.8625364361923857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34380 + }, + { + "epoch": 0.1667852851633116, + "grad_norm": 1.6188559470720065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34390 + }, + { + "epoch": 0.16683378335614768, + "grad_norm": 1.5767999173021963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34400 + }, + { + "epoch": 0.16688228154898377, + "grad_norm": 1.867206123051801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34410 + }, + { + "epoch": 0.16693077974181986, + "grad_norm": 1.900364452467329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34420 + }, + { + "epoch": 0.16697927793465597, + "grad_norm": 1.9180829724518844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34430 + }, + { + "epoch": 0.16702777612749206, + "grad_norm": 1.7170295052437723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34440 + }, + { + "epoch": 0.16707627432032815, + "grad_norm": 1.6214707443396037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34450 + }, + { + "epoch": 0.16712477251316424, + "grad_norm": 1.8250499067562487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34460 + }, + { + "epoch": 0.16717327070600033, + "grad_norm": 1.8371453336385457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34470 + }, + { + "epoch": 0.16722176889883641, + "grad_norm": 1.8007844460043998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34480 + }, + { + "epoch": 0.1672702670916725, + "grad_norm": 1.636298918583634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34490 + }, + { + "epoch": 0.1673187652845086, + "grad_norm": 2.530313167881104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34500 + }, + { + "epoch": 0.16736726347734468, + "grad_norm": 2.0822560031774628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34510 + }, + { + "epoch": 0.16741576167018077, + "grad_norm": 1.8303221338555886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34520 + }, + { + "epoch": 0.16746425986301686, + "grad_norm": 7.988823199411854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34530 + }, + { + "epoch": 0.16751275805585294, + "grad_norm": 1.614551621287319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34540 + }, + { + "epoch": 0.16756125624868903, + "grad_norm": 1.5601065683767956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34550 + }, + { + "epoch": 0.16760975444152512, + "grad_norm": 1.7531907303691696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34560 + }, + { + "epoch": 0.1676582526343612, + "grad_norm": 1.8519567390740121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34570 + }, + { + "epoch": 0.1677067508271973, + "grad_norm": 1.8508660559746204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34580 + }, + { + "epoch": 0.1677552490200334, + "grad_norm": 1.5482933690691425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34590 + }, + { + "epoch": 0.16780374721286948, + "grad_norm": 1.5859461655054474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34600 + }, + { + "epoch": 0.16785224540570556, + "grad_norm": 1.7625458781367342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34610 + }, + { + "epoch": 0.16790074359854165, + "grad_norm": 1.7547051811561687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34620 + }, + { + "epoch": 0.16794924179137774, + "grad_norm": 1.8328421447222354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34630 + }, + { + "epoch": 0.16799773998421383, + "grad_norm": 1.5418621046592307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34640 + }, + { + "epoch": 0.16804623817704992, + "grad_norm": 1.6336882424639043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34650 + }, + { + "epoch": 0.168094736369886, + "grad_norm": 1.8215440888980083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34660 + }, + { + "epoch": 0.1681432345627221, + "grad_norm": 1.72685204802292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34670 + }, + { + "epoch": 0.1681917327555582, + "grad_norm": 1.8011488123192976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34680 + }, + { + "epoch": 0.1682402309483943, + "grad_norm": 1.6859615925568505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34690 + }, + { + "epoch": 0.1682887291412304, + "grad_norm": 1.5246234852384077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34700 + }, + { + "epoch": 0.16833722733406647, + "grad_norm": 1.8729086548319174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34710 + }, + { + "epoch": 0.16838572552690256, + "grad_norm": 1.886100164938398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34720 + }, + { + "epoch": 0.16843422371973865, + "grad_norm": 1.7913687599957484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34730 + }, + { + "epoch": 0.16848272191257474, + "grad_norm": 1.7680187625046528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34740 + }, + { + "epoch": 0.16853122010541083, + "grad_norm": 1.9097392112144007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34750 + }, + { + "epoch": 0.16857971829824692, + "grad_norm": 1.76427604969831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34760 + }, + { + "epoch": 0.168628216491083, + "grad_norm": 1.8168357485137676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34770 + }, + { + "epoch": 0.1686767146839191, + "grad_norm": 1.76575753130237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34780 + }, + { + "epoch": 0.16872521287675518, + "grad_norm": 1.5468054925804609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34790 + }, + { + "epoch": 0.16877371106959127, + "grad_norm": 1.5337656122937915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34800 + }, + { + "epoch": 0.16882220926242736, + "grad_norm": 1.7178058442368638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34810 + }, + { + "epoch": 0.16887070745526345, + "grad_norm": 1.7124101248100487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34820 + }, + { + "epoch": 0.16891920564809954, + "grad_norm": 2.3621032596565783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34830 + }, + { + "epoch": 0.16896770384093562, + "grad_norm": 1.5485983340113307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34840 + }, + { + "epoch": 0.1690162020337717, + "grad_norm": 1.4958551730615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34850 + }, + { + "epoch": 0.1690647002266078, + "grad_norm": 1.7782936367893853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34860 + }, + { + "epoch": 0.1691131984194439, + "grad_norm": 1.8528893974689709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34870 + }, + { + "epoch": 0.16916169661227998, + "grad_norm": 1.8775324406306027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34880 + }, + { + "epoch": 0.16921019480511607, + "grad_norm": 1.506450360011513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34890 + }, + { + "epoch": 0.16925869299795215, + "grad_norm": 1.5639015771284903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34900 + }, + { + "epoch": 0.16930719119078824, + "grad_norm": 1.7350292580431415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34910 + }, + { + "epoch": 0.16935568938362433, + "grad_norm": 1.880495972272911e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34920 + }, + { + "epoch": 0.16940418757646042, + "grad_norm": 1.7792427797758137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34930 + }, + { + "epoch": 0.16945268576929653, + "grad_norm": 1.516437748705357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34940 + }, + { + "epoch": 0.16950118396213262, + "grad_norm": 1.514246434908273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34950 + }, + { + "epoch": 0.1695496821549687, + "grad_norm": 1.6944206038260745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34960 + }, + { + "epoch": 0.1695981803478048, + "grad_norm": 1.6814823311506188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34970 + }, + { + "epoch": 0.1696466785406409, + "grad_norm": 1.75594507823007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34980 + }, + { + "epoch": 0.16969517673347698, + "grad_norm": 1.5166345690431626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 34990 + }, + { + "epoch": 0.16974367492631307, + "grad_norm": 1.4847181262211961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35000 + }, + { + "epoch": 0.16979217311914915, + "grad_norm": 1.7070696856080758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35010 + }, + { + "epoch": 0.16984067131198524, + "grad_norm": 1.6581867612330825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35020 + }, + { + "epoch": 0.16988916950482133, + "grad_norm": 1.7178744826651382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35030 + }, + { + "epoch": 0.16993766769765742, + "grad_norm": 1.5031321254355134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35040 + }, + { + "epoch": 0.1699861658904935, + "grad_norm": 1.4794422042996302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35050 + }, + { + "epoch": 0.1700346640833296, + "grad_norm": 1.6746160724778747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35060 + }, + { + "epoch": 0.17008316227616568, + "grad_norm": 1.747734756918362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35070 + }, + { + "epoch": 0.17013166046900177, + "grad_norm": 1.7060661150480882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35080 + }, + { + "epoch": 0.17018015866183786, + "grad_norm": 1.7203893776240875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35090 + }, + { + "epoch": 0.17022865685467395, + "grad_norm": 1.5092383875980886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35100 + }, + { + "epoch": 0.17027715504751004, + "grad_norm": 1.7376311234329478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35110 + }, + { + "epoch": 0.17032565324034613, + "grad_norm": 1.6515261336280673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35120 + }, + { + "epoch": 0.1703741514331822, + "grad_norm": 1.8212715247045708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35130 + }, + { + "epoch": 0.1704226496260183, + "grad_norm": 1.5080667026268202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35140 + }, + { + "epoch": 0.1704711478188544, + "grad_norm": 2.1046308518180012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35150 + }, + { + "epoch": 0.17051964601169048, + "grad_norm": 1.7319706557827885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35160 + }, + { + "epoch": 0.17056814420452657, + "grad_norm": 1.6693633142494946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35170 + }, + { + "epoch": 0.17061664239736266, + "grad_norm": 1.9266629180947348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35180 + }, + { + "epoch": 0.17066514059019874, + "grad_norm": 1.4812211190928792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35190 + }, + { + "epoch": 0.17071363878303486, + "grad_norm": 1.4933850422949035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35200 + }, + { + "epoch": 0.17076213697587095, + "grad_norm": 1.64946271752342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35210 + }, + { + "epoch": 0.17081063516870704, + "grad_norm": 1.700902885204414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35220 + }, + { + "epoch": 0.17085913336154313, + "grad_norm": 1.6079012254976988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35230 + }, + { + "epoch": 0.1709076315543792, + "grad_norm": 1.5075553960741672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35240 + }, + { + "epoch": 0.1709561297472153, + "grad_norm": 1.4784929192046548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35250 + }, + { + "epoch": 0.1710046279400514, + "grad_norm": 1.6464166208152164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35260 + }, + { + "epoch": 0.17105312613288748, + "grad_norm": 1.696077589485867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35270 + }, + { + "epoch": 0.17110162432572357, + "grad_norm": 1.7308910571500746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35280 + }, + { + "epoch": 0.17115012251855966, + "grad_norm": 1.9066473555540142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35290 + }, + { + "epoch": 0.17119862071139574, + "grad_norm": 1.6561249083224538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35300 + }, + { + "epoch": 0.17124711890423183, + "grad_norm": 1.7197412205405271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35310 + }, + { + "epoch": 0.17129561709706792, + "grad_norm": 1.686051831484292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35320 + }, + { + "epoch": 0.171344115289904, + "grad_norm": 1.74280742726296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35330 + }, + { + "epoch": 0.1713926134827401, + "grad_norm": 1.5186945745426783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35340 + }, + { + "epoch": 0.17144111167557619, + "grad_norm": 1.5951164300531673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35350 + }, + { + "epoch": 0.17148960986841227, + "grad_norm": 1.758504453164278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35360 + }, + { + "epoch": 0.17153810806124836, + "grad_norm": 1.7089966775074572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35370 + }, + { + "epoch": 0.17158660625408445, + "grad_norm": 1.6365069654966646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35380 + }, + { + "epoch": 0.17163510444692054, + "grad_norm": 1.5470480718704493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35390 + }, + { + "epoch": 0.17168360263975663, + "grad_norm": 1.5126842356494308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35400 + }, + { + "epoch": 0.17173210083259272, + "grad_norm": 1.650291068244769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35410 + }, + { + "epoch": 0.1717805990254288, + "grad_norm": 1.6686965409462573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35420 + }, + { + "epoch": 0.1718290972182649, + "grad_norm": 1.6432647953479318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35430 + }, + { + "epoch": 0.17187759541110098, + "grad_norm": 1.5126710195545456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35440 + }, + { + "epoch": 0.1719260936039371, + "grad_norm": 1.6741570618705737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35450 + }, + { + "epoch": 0.17197459179677319, + "grad_norm": 1.6369584443509666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35460 + }, + { + "epoch": 0.17202308998960927, + "grad_norm": 1.6922386691931024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35470 + }, + { + "epoch": 0.17207158818244536, + "grad_norm": 1.6371750177768263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35480 + }, + { + "epoch": 0.17212008637528145, + "grad_norm": 1.528096618130803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35490 + }, + { + "epoch": 0.17216858456811754, + "grad_norm": 1.540249883191791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35500 + }, + { + "epoch": 0.17221708276095363, + "grad_norm": 1.6629091703634913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35510 + }, + { + "epoch": 0.17226558095378972, + "grad_norm": 1.677043997005967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35520 + }, + { + "epoch": 0.1723140791466258, + "grad_norm": 1.644364431285794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35530 + }, + { + "epoch": 0.1723625773394619, + "grad_norm": 1.4886246901824052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35540 + }, + { + "epoch": 0.17241107553229798, + "grad_norm": 1.445375090725065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35550 + }, + { + "epoch": 0.17245957372513407, + "grad_norm": 1.669901337209012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35560 + }, + { + "epoch": 0.17250807191797016, + "grad_norm": 3.236570478293288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35570 + }, + { + "epoch": 0.17255657011080625, + "grad_norm": 1.592891294421861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35580 + }, + { + "epoch": 0.17260506830364233, + "grad_norm": 1.504725446466182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35590 + }, + { + "epoch": 0.17265356649647842, + "grad_norm": 1.4639851997344522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35600 + }, + { + "epoch": 0.1727020646893145, + "grad_norm": 1.6358281129669194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35610 + }, + { + "epoch": 0.1727505628821506, + "grad_norm": 1.66967041081989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35620 + }, + { + "epoch": 0.1727990610749867, + "grad_norm": 1.6199253138893255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35630 + }, + { + "epoch": 0.17284755926782278, + "grad_norm": 1.4758867905584339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35640 + }, + { + "epoch": 0.17289605746065886, + "grad_norm": 1.5010800780146383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35650 + }, + { + "epoch": 0.17294455565349495, + "grad_norm": 1.6087849985524372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35660 + }, + { + "epoch": 0.17299305384633104, + "grad_norm": 1.6401851610226004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35670 + }, + { + "epoch": 0.17304155203916713, + "grad_norm": 1.601440118292885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35680 + }, + { + "epoch": 0.17309005023200322, + "grad_norm": 1.459599872077888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35690 + }, + { + "epoch": 0.1731385484248393, + "grad_norm": 1.4452595564762305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35700 + }, + { + "epoch": 0.17318704661767542, + "grad_norm": 1.575474044557268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35710 + }, + { + "epoch": 0.1732355448105115, + "grad_norm": 1.6208305453346838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35720 + }, + { + "epoch": 0.1732840430033476, + "grad_norm": 1.6169396133136615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35730 + }, + { + "epoch": 0.1733325411961837, + "grad_norm": 1.44855135886246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35740 + }, + { + "epoch": 0.17338103938901978, + "grad_norm": 1.4414689530894975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35750 + }, + { + "epoch": 0.17342953758185586, + "grad_norm": 1.5837431988074968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35760 + }, + { + "epoch": 0.17347803577469195, + "grad_norm": 1.570303851394783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35770 + }, + { + "epoch": 0.17352653396752804, + "grad_norm": 1.618049765284013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35780 + }, + { + "epoch": 0.17357503216036413, + "grad_norm": 1.4308908191651426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35790 + }, + { + "epoch": 0.17362353035320022, + "grad_norm": 1.667323914489316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35800 + }, + { + "epoch": 0.1736720285460363, + "grad_norm": 1.5823209764675994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35810 + }, + { + "epoch": 0.1737205267388724, + "grad_norm": 1.5688773657984711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35820 + }, + { + "epoch": 0.17376902493170848, + "grad_norm": 1.574614145738451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35830 + }, + { + "epoch": 0.17381752312454457, + "grad_norm": 1.4697521066864283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35840 + }, + { + "epoch": 0.17386602131738066, + "grad_norm": 1.4683061522191565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35850 + }, + { + "epoch": 0.17391451951021675, + "grad_norm": 1.5742334369406308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35860 + }, + { + "epoch": 0.17396301770305284, + "grad_norm": 1.582837541036497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35870 + }, + { + "epoch": 0.17401151589588892, + "grad_norm": 1.526096014003997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35880 + }, + { + "epoch": 0.174060014088725, + "grad_norm": 1.4271712700519856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35890 + }, + { + "epoch": 0.1741085122815611, + "grad_norm": 1.396899023120568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35900 + }, + { + "epoch": 0.1741570104743972, + "grad_norm": 1.5452634727353143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35910 + }, + { + "epoch": 0.17420550866723328, + "grad_norm": 1.575296550981875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35920 + }, + { + "epoch": 0.17425400686006937, + "grad_norm": 1.617007256982106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35930 + }, + { + "epoch": 0.17430250505290545, + "grad_norm": 1.4547723026225867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35940 + }, + { + "epoch": 0.17435100324574154, + "grad_norm": 1.3919027708197973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35950 + }, + { + "epoch": 0.17439950143857766, + "grad_norm": 1.5051982416025567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35960 + }, + { + "epoch": 0.17444799963141375, + "grad_norm": 1.5735844272057875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35970 + }, + { + "epoch": 0.17449649782424984, + "grad_norm": 1.5003405451352592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35980 + }, + { + "epoch": 0.17454499601708592, + "grad_norm": 1.5015530152595602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 35990 + }, + { + "epoch": 0.174593494209922, + "grad_norm": 1.4275425996856939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36000 + }, + { + "epoch": 0.1746419924027581, + "grad_norm": 1.5534962471974723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36010 + }, + { + "epoch": 0.1746904905955942, + "grad_norm": 1.5258012808772037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36020 + }, + { + "epoch": 0.17473898878843028, + "grad_norm": 1.521253807368339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36030 + }, + { + "epoch": 0.17478748698126637, + "grad_norm": 1.4084130839364661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36040 + }, + { + "epoch": 0.17483598517410245, + "grad_norm": 1.3877766491532384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36050 + }, + { + "epoch": 0.17488448336693854, + "grad_norm": 1.525979200778238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36060 + }, + { + "epoch": 0.17493298155977463, + "grad_norm": 1.547798404999412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36070 + }, + { + "epoch": 0.17498147975261072, + "grad_norm": 1.4657337032986106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36080 + }, + { + "epoch": 0.1750299779454468, + "grad_norm": 1.3825156486291235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36090 + }, + { + "epoch": 0.1750784761382829, + "grad_norm": 1.3839867563092412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36100 + }, + { + "epoch": 0.17512697433111898, + "grad_norm": 1.509628617668568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36110 + }, + { + "epoch": 0.17517547252395507, + "grad_norm": 2.2489589923679887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36120 + }, + { + "epoch": 0.17522397071679116, + "grad_norm": 1.5149510090850526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36130 + }, + { + "epoch": 0.17527246890962725, + "grad_norm": 1.4063110143069935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36140 + }, + { + "epoch": 0.17532096710246334, + "grad_norm": 1.3338113546979002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36150 + }, + { + "epoch": 0.17536946529529943, + "grad_norm": 1.560255498134211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36160 + }, + { + "epoch": 0.17541796348813551, + "grad_norm": 1.5355632854152645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36170 + }, + { + "epoch": 0.1754664616809716, + "grad_norm": 1.4780195556340914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36180 + }, + { + "epoch": 0.1755149598738077, + "grad_norm": 1.3778428353816707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36190 + }, + { + "epoch": 0.17556345806664378, + "grad_norm": 1.339543302947277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36200 + }, + { + "epoch": 0.17561195625947987, + "grad_norm": 1.528509727677374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36210 + }, + { + "epoch": 0.17566045445231598, + "grad_norm": 1.4956829375023517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36220 + }, + { + "epoch": 0.17570895264515207, + "grad_norm": 1.4387825331141357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36230 + }, + { + "epoch": 0.17575745083798816, + "grad_norm": 1.3664043763128575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36240 + }, + { + "epoch": 0.17580594903082425, + "grad_norm": 1.3550369715176203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36250 + }, + { + "epoch": 0.17585444722366034, + "grad_norm": 1.4702433759339328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36260 + }, + { + "epoch": 0.17590294541649643, + "grad_norm": 1.4475079979092698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36270 + }, + { + "epoch": 0.17595144360933251, + "grad_norm": 1.4598255404507654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36280 + }, + { + "epoch": 0.1759999418021686, + "grad_norm": 1.3323244729690487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36290 + }, + { + "epoch": 0.1760484399950047, + "grad_norm": 1.3759635919541324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36300 + }, + { + "epoch": 0.17609693818784078, + "grad_norm": 1.482053733070643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36310 + }, + { + "epoch": 0.17614543638067687, + "grad_norm": 1.4070411680222605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36320 + }, + { + "epoch": 0.17619393457351296, + "grad_norm": 1.4212037058314309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36330 + }, + { + "epoch": 0.17624243276634904, + "grad_norm": 3.89625256502768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36340 + }, + { + "epoch": 0.17629093095918513, + "grad_norm": 1.3051480607373378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36350 + }, + { + "epoch": 0.17633942915202122, + "grad_norm": 1.7691030507194228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36360 + }, + { + "epoch": 0.1763879273448573, + "grad_norm": 1.4278192850269988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36370 + }, + { + "epoch": 0.1764364255376934, + "grad_norm": 1.414233139485077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36380 + }, + { + "epoch": 0.17648492373052949, + "grad_norm": 1.3015475985866942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36390 + }, + { + "epoch": 0.17653342192336557, + "grad_norm": 1.2732346021948615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36400 + }, + { + "epoch": 0.17658192011620166, + "grad_norm": 1.4101966883117711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36410 + }, + { + "epoch": 0.17663041830903775, + "grad_norm": 1.4143168414193497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36420 + }, + { + "epoch": 0.17667891650187384, + "grad_norm": 1.3872548265680962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36430 + }, + { + "epoch": 0.17672741469470993, + "grad_norm": 1.2796434134543233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36440 + }, + { + "epoch": 0.17677591288754602, + "grad_norm": 1.272254337436607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36450 + }, + { + "epoch": 0.1768244110803821, + "grad_norm": 1.452894622389067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36460 + }, + { + "epoch": 0.1768729092732182, + "grad_norm": 1.4248810487060837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36470 + }, + { + "epoch": 0.1769214074660543, + "grad_norm": 1.43098276339515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36480 + }, + { + "epoch": 0.1769699056588904, + "grad_norm": 1.3249058383735246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36490 + }, + { + "epoch": 0.17701840385172649, + "grad_norm": 1.241138676277842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36500 + }, + { + "epoch": 0.17706690204456257, + "grad_norm": 1.3927201791830157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36510 + }, + { + "epoch": 0.17711540023739866, + "grad_norm": 1.4095559208726627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36520 + }, + { + "epoch": 0.17716389843023475, + "grad_norm": 1.4132923809029307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36530 + }, + { + "epoch": 0.17721239662307084, + "grad_norm": 1.34588219680154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36540 + }, + { + "epoch": 0.17726089481590693, + "grad_norm": 1.2509701718954602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36550 + }, + { + "epoch": 0.17730939300874302, + "grad_norm": 1.361579506919952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36560 + }, + { + "epoch": 0.1773578912015791, + "grad_norm": 1.398030065047351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36570 + }, + { + "epoch": 0.1774063893944152, + "grad_norm": 1.3700399392746476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36580 + }, + { + "epoch": 0.17745488758725128, + "grad_norm": 1.2888781952824502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36590 + }, + { + "epoch": 0.17750338578008737, + "grad_norm": 1.22765882792919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36600 + }, + { + "epoch": 0.17755188397292346, + "grad_norm": 1.3720224956159655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36610 + }, + { + "epoch": 0.17760038216575955, + "grad_norm": 1.3960702460735774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36620 + }, + { + "epoch": 0.17764888035859563, + "grad_norm": 1.3742362625634996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36630 + }, + { + "epoch": 0.17769737855143172, + "grad_norm": 1.2403415894368663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36640 + }, + { + "epoch": 0.1777458767442678, + "grad_norm": 1.2255550529971515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36650 + }, + { + "epoch": 0.1777943749371039, + "grad_norm": 1.3584528346655134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36660 + }, + { + "epoch": 0.17784287312994, + "grad_norm": 1.3725150438403944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36670 + }, + { + "epoch": 0.17789137132277608, + "grad_norm": 1.3507704466064752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36680 + }, + { + "epoch": 0.17793986951561216, + "grad_norm": 1.2178372799098724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36690 + }, + { + "epoch": 0.17798836770844825, + "grad_norm": 1.236127076253979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36700 + }, + { + "epoch": 0.17803686590128434, + "grad_norm": 1.3515028740584967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36710 + }, + { + "epoch": 0.17808536409412043, + "grad_norm": 1.4184244889747788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36720 + }, + { + "epoch": 0.17813386228695655, + "grad_norm": 1.3436635981634026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36730 + }, + { + "epoch": 0.17818236047979263, + "grad_norm": 1.1831422597197161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36740 + }, + { + "epoch": 0.17823085867262872, + "grad_norm": 1.194793952663531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36750 + }, + { + "epoch": 0.1782793568654648, + "grad_norm": 1.352805441001692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36760 + }, + { + "epoch": 0.1783278550583009, + "grad_norm": 1.3642349472320348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36770 + }, + { + "epoch": 0.178376353251137, + "grad_norm": 1.3982109692278755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36780 + }, + { + "epoch": 0.17842485144397308, + "grad_norm": 1.1892396400980942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36790 + }, + { + "epoch": 0.17847334963680916, + "grad_norm": 1.1834369217922358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36800 + }, + { + "epoch": 0.17852184782964525, + "grad_norm": 1.3655034081239137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36810 + }, + { + "epoch": 0.17857034602248134, + "grad_norm": 1.3270144449961663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36820 + }, + { + "epoch": 0.17861884421531743, + "grad_norm": 1.319704949764855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36830 + }, + { + "epoch": 0.17866734240815352, + "grad_norm": 1.168019423403166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36840 + }, + { + "epoch": 0.1787158406009896, + "grad_norm": 1.1672425159758859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36850 + }, + { + "epoch": 0.1787643387938257, + "grad_norm": 1.3362797801619308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36860 + }, + { + "epoch": 0.17881283698666178, + "grad_norm": 1.2924222403398744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36870 + }, + { + "epoch": 0.17886133517949787, + "grad_norm": 1.3656874386924756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36880 + }, + { + "epoch": 0.17890983337233396, + "grad_norm": 1.1575154701404244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36890 + }, + { + "epoch": 0.17895833156517005, + "grad_norm": 1.1689357393152022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36900 + }, + { + "epoch": 0.17900682975800614, + "grad_norm": 1.3149441713267151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36910 + }, + { + "epoch": 0.17905532795084222, + "grad_norm": 1.3002468790546118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36920 + }, + { + "epoch": 0.1791038261436783, + "grad_norm": 1.2902685853077855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36930 + }, + { + "epoch": 0.1791523243365144, + "grad_norm": 1.1406638122934964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36940 + }, + { + "epoch": 0.1792008225293505, + "grad_norm": 1.1697743929062199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36950 + }, + { + "epoch": 0.17924932072218658, + "grad_norm": 1.2535365101484786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36960 + }, + { + "epoch": 0.17929781891502267, + "grad_norm": 1.3183702662900032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36970 + }, + { + "epoch": 0.17934631710785875, + "grad_norm": 1.2379642555515602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36980 + }, + { + "epoch": 0.17939481530069487, + "grad_norm": 1.1259337640012745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 36990 + }, + { + "epoch": 0.17944331349353096, + "grad_norm": 1.1551669132359166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37000 + }, + { + "epoch": 0.17949181168636705, + "grad_norm": 1.2841961449794326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37010 + }, + { + "epoch": 0.17954030987920314, + "grad_norm": 1.210452751365665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37020 + }, + { + "epoch": 0.17958880807203922, + "grad_norm": 1.286015702817167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37030 + }, + { + "epoch": 0.1796373062648753, + "grad_norm": 1.3225911743575125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37040 + }, + { + "epoch": 0.1796858044577114, + "grad_norm": 1.16990911180892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37050 + }, + { + "epoch": 0.1797343026505475, + "grad_norm": 1.2587810260811239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37060 + }, + { + "epoch": 0.17978280084338358, + "grad_norm": 1.9571294274101092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37070 + }, + { + "epoch": 0.17983129903621967, + "grad_norm": 1.2218170297728648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37080 + }, + { + "epoch": 0.17987979722905575, + "grad_norm": 1.1067287175592355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37090 + }, + { + "epoch": 0.17992829542189184, + "grad_norm": 1.1169441904712585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37100 + }, + { + "epoch": 0.17997679361472793, + "grad_norm": 1.259683131138445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37110 + }, + { + "epoch": 0.18002529180756402, + "grad_norm": 1.2216641209761292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37120 + }, + { + "epoch": 0.1800737900004001, + "grad_norm": 1.2653784153826564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37130 + }, + { + "epoch": 0.1801222881932362, + "grad_norm": 1.068197548192984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37140 + }, + { + "epoch": 0.18017078638607228, + "grad_norm": 1.0877177203383326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37150 + }, + { + "epoch": 0.18021928457890837, + "grad_norm": 1.23171403743072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37160 + }, + { + "epoch": 0.18026778277174446, + "grad_norm": 1.2557853779071593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37170 + }, + { + "epoch": 0.18031628096458055, + "grad_norm": 1.2663052473271819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37180 + }, + { + "epoch": 0.18036477915741664, + "grad_norm": 1.071441317890276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37190 + }, + { + "epoch": 0.18041327735025273, + "grad_norm": 1.1225995422137203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37200 + }, + { + "epoch": 0.18046177554308881, + "grad_norm": 1.2570885132845433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37210 + }, + { + "epoch": 0.1805102737359249, + "grad_norm": 1.2310225372402783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37220 + }, + { + "epoch": 0.180558771928761, + "grad_norm": 1.1840383962180567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37230 + }, + { + "epoch": 0.1806072701215971, + "grad_norm": 1.1012508593921666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37240 + }, + { + "epoch": 0.1806557683144332, + "grad_norm": 1.0965227659198717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37250 + }, + { + "epoch": 0.18070426650726928, + "grad_norm": 1.2065270027505903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37260 + }, + { + "epoch": 0.18075276470010537, + "grad_norm": 1.1997703097677004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37270 + }, + { + "epoch": 0.18080126289294146, + "grad_norm": 4.184091721981531e-07, + "learning_rate": 0.0002, + "loss": 0.0028, + "step": 37280 + }, + { + "epoch": 0.18084976108577755, + "grad_norm": 8.683280611876398e-06, + "learning_rate": 0.0002, + "loss": 0.0031, + "step": 37290 + }, + { + "epoch": 0.18089825927861364, + "grad_norm": 5.246460932539776e-05, + "learning_rate": 0.0002, + "loss": 0.0021, + "step": 37300 + }, + { + "epoch": 0.18094675747144973, + "grad_norm": 0.039353080093860626, + "learning_rate": 0.0002, + "loss": 0.6014, + "step": 37310 + }, + { + "epoch": 0.18099525566428581, + "grad_norm": 0.0017031066818162799, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 37320 + }, + { + "epoch": 0.1810437538571219, + "grad_norm": 0.006973891984671354, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 37330 + }, + { + "epoch": 0.181092252049958, + "grad_norm": 0.00046768056927248836, + "learning_rate": 0.0002, + "loss": 0.0068, + "step": 37340 + }, + { + "epoch": 0.18114075024279408, + "grad_norm": 0.00019352925301063806, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37350 + }, + { + "epoch": 0.18118924843563017, + "grad_norm": 0.0003816258686129004, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 37360 + }, + { + "epoch": 0.18123774662846626, + "grad_norm": 0.00019989653083030134, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37370 + }, + { + "epoch": 0.18128624482130234, + "grad_norm": 0.00010957886115647852, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37380 + }, + { + "epoch": 0.18133474301413843, + "grad_norm": 5.1107148465234786e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37390 + }, + { + "epoch": 0.18138324120697452, + "grad_norm": 4.3507290683919564e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37400 + }, + { + "epoch": 0.1814317393998106, + "grad_norm": 6.230719736777246e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37410 + }, + { + "epoch": 0.1814802375926467, + "grad_norm": 6.156492599984631e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37420 + }, + { + "epoch": 0.1815287357854828, + "grad_norm": 5.128279372001998e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37430 + }, + { + "epoch": 0.18157723397831887, + "grad_norm": 3.371409184182994e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37440 + }, + { + "epoch": 0.18162573217115496, + "grad_norm": 3.118865788565017e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37450 + }, + { + "epoch": 0.18167423036399105, + "grad_norm": 4.171202454017475e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37460 + }, + { + "epoch": 0.18172272855682714, + "grad_norm": 4.0828148485161364e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37470 + }, + { + "epoch": 0.18177122674966323, + "grad_norm": 3.332770575070754e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37480 + }, + { + "epoch": 0.18181972494249932, + "grad_norm": 2.5647517759352922e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37490 + }, + { + "epoch": 0.18186822313533543, + "grad_norm": 2.55520390055608e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37500 + }, + { + "epoch": 0.18191672132817152, + "grad_norm": 2.8381795345922e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37510 + }, + { + "epoch": 0.1819652195210076, + "grad_norm": 2.9583472496597096e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37520 + }, + { + "epoch": 0.1820137177138437, + "grad_norm": 2.6335863367421553e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37530 + }, + { + "epoch": 0.1820622159066798, + "grad_norm": 2.294155456183944e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37540 + }, + { + "epoch": 0.18211071409951587, + "grad_norm": 2.0539973775157705e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37550 + }, + { + "epoch": 0.18215921229235196, + "grad_norm": 2.4632496206322685e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37560 + }, + { + "epoch": 0.18220771048518805, + "grad_norm": 2.3629272618563846e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37570 + }, + { + "epoch": 0.18225620867802414, + "grad_norm": 2.20363635889953e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37580 + }, + { + "epoch": 0.18230470687086023, + "grad_norm": 1.9621722458396107e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37590 + }, + { + "epoch": 0.18235320506369632, + "grad_norm": 1.8264319805894047e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37600 + }, + { + "epoch": 0.1824017032565324, + "grad_norm": 2.146937003999483e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37610 + }, + { + "epoch": 0.1824502014493685, + "grad_norm": 1.9090402929577976e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37620 + }, + { + "epoch": 0.18249869964220458, + "grad_norm": 2.0320869225542992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37630 + }, + { + "epoch": 0.18254719783504067, + "grad_norm": 1.7046035281964578e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37640 + }, + { + "epoch": 0.18259569602787676, + "grad_norm": 1.544771293993108e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37650 + }, + { + "epoch": 0.18264419422071285, + "grad_norm": 1.7257394574698992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37660 + }, + { + "epoch": 0.18269269241354893, + "grad_norm": 2.243960443593096e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37670 + }, + { + "epoch": 0.18274119060638502, + "grad_norm": 1.708102718112059e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37680 + }, + { + "epoch": 0.1827896887992211, + "grad_norm": 1.5118042028916534e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37690 + }, + { + "epoch": 0.1828381869920572, + "grad_norm": 1.455940946470946e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37700 + }, + { + "epoch": 0.1828866851848933, + "grad_norm": 1.5484132745768875e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37710 + }, + { + "epoch": 0.18293518337772938, + "grad_norm": 1.54703466250794e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37720 + }, + { + "epoch": 0.18298368157056547, + "grad_norm": 1.5110652384464629e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37730 + }, + { + "epoch": 0.18303217976340155, + "grad_norm": 1.3331487934920006e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37740 + }, + { + "epoch": 0.18308067795623764, + "grad_norm": 1.296292703045765e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37750 + }, + { + "epoch": 0.18312917614907376, + "grad_norm": 1.4094782272877637e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37760 + }, + { + "epoch": 0.18317767434190985, + "grad_norm": 1.4235785783967003e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37770 + }, + { + "epoch": 0.18322617253474593, + "grad_norm": 1.309684830630431e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37780 + }, + { + "epoch": 0.18327467072758202, + "grad_norm": 1.2290860468056053e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37790 + }, + { + "epoch": 0.1833231689204181, + "grad_norm": 1.156067355623236e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37800 + }, + { + "epoch": 0.1833716671132542, + "grad_norm": 1.3089493222651072e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37810 + }, + { + "epoch": 0.1834201653060903, + "grad_norm": 1.2512112334661651e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37820 + }, + { + "epoch": 0.18346866349892638, + "grad_norm": 1.3462024071486667e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37830 + }, + { + "epoch": 0.18351716169176246, + "grad_norm": 1.1052437912439927e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37840 + }, + { + "epoch": 0.18356565988459855, + "grad_norm": 1.0679045772121754e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37850 + }, + { + "epoch": 0.18361415807743464, + "grad_norm": 1.2133595191698987e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37860 + }, + { + "epoch": 0.18366265627027073, + "grad_norm": 1.0850751095858868e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37870 + }, + { + "epoch": 0.18371115446310682, + "grad_norm": 1.0912086509051733e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37880 + }, + { + "epoch": 0.1837596526559429, + "grad_norm": 1.0463532817084342e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37890 + }, + { + "epoch": 0.183808150848779, + "grad_norm": 9.71380723058246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37900 + }, + { + "epoch": 0.18385664904161508, + "grad_norm": 1.0606032446958125e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37910 + }, + { + "epoch": 0.18390514723445117, + "grad_norm": 1.9807397620752454e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 37920 + }, + { + "epoch": 0.18395364542728726, + "grad_norm": 7.581940735690296e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37930 + }, + { + "epoch": 0.18400214362012335, + "grad_norm": 1.969158256542869e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37940 + }, + { + "epoch": 0.18405064181295944, + "grad_norm": 1.924583557411097e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37950 + }, + { + "epoch": 0.18409914000579553, + "grad_norm": 8.355914906132966e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37960 + }, + { + "epoch": 0.1841476381986316, + "grad_norm": 6.24347449047491e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37970 + }, + { + "epoch": 0.1841961363914677, + "grad_norm": 4.816000000573695e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37980 + }, + { + "epoch": 0.1842446345843038, + "grad_norm": 1.3987588317831978e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 37990 + }, + { + "epoch": 0.18429313277713988, + "grad_norm": 1.2655071259359829e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38000 + }, + { + "epoch": 0.184341630969976, + "grad_norm": 2.837370448105503e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38010 + }, + { + "epoch": 0.18439012916281208, + "grad_norm": 2.7377762307878584e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38020 + }, + { + "epoch": 0.18443862735564817, + "grad_norm": 2.2528778572450392e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38030 + }, + { + "epoch": 0.18448712554848426, + "grad_norm": 1.1055924005631823e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38040 + }, + { + "epoch": 0.18453562374132035, + "grad_norm": 1.0731264410424046e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38050 + }, + { + "epoch": 0.18458412193415644, + "grad_norm": 1.8227397958980873e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38060 + }, + { + "epoch": 0.18463262012699252, + "grad_norm": 1.9100840290775523e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38070 + }, + { + "epoch": 0.1846811183198286, + "grad_norm": 1.6048928955569863e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38080 + }, + { + "epoch": 0.1847296165126647, + "grad_norm": 9.362142918689642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38090 + }, + { + "epoch": 0.1847781147055008, + "grad_norm": 9.55218911258271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38100 + }, + { + "epoch": 0.18482661289833688, + "grad_norm": 1.6307085388689302e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38110 + }, + { + "epoch": 0.18487511109117297, + "grad_norm": 1.4801069482928142e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38120 + }, + { + "epoch": 0.18492360928400906, + "grad_norm": 1.3335995390661992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38130 + }, + { + "epoch": 0.18497210747684514, + "grad_norm": 8.763566256675404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38140 + }, + { + "epoch": 0.18502060566968123, + "grad_norm": 8.561386493965983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38150 + }, + { + "epoch": 0.18506910386251732, + "grad_norm": 1.5163651369221043e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38160 + }, + { + "epoch": 0.1851176020553534, + "grad_norm": 1.2626745046873111e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38170 + }, + { + "epoch": 0.1851661002481895, + "grad_norm": 1.2329801393207163e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38180 + }, + { + "epoch": 0.18521459844102559, + "grad_norm": 8.03696002549259e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38190 + }, + { + "epoch": 0.18526309663386167, + "grad_norm": 7.46096429793397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38200 + }, + { + "epoch": 0.18531159482669776, + "grad_norm": 1.1319200893922243e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38210 + }, + { + "epoch": 0.18536009301953385, + "grad_norm": 9.930822670867201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38220 + }, + { + "epoch": 0.18540859121236994, + "grad_norm": 1.0677450518414844e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38230 + }, + { + "epoch": 0.18545708940520603, + "grad_norm": 7.170188382588094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38240 + }, + { + "epoch": 0.18550558759804212, + "grad_norm": 7.083043783495668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38250 + }, + { + "epoch": 0.1855540857908782, + "grad_norm": 8.371521289518569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38260 + }, + { + "epoch": 0.18560258398371432, + "grad_norm": 9.49700734054204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38270 + }, + { + "epoch": 0.1856510821765504, + "grad_norm": 9.354147550766356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38280 + }, + { + "epoch": 0.1856995803693865, + "grad_norm": 6.689337169518694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38290 + }, + { + "epoch": 0.18574807856222258, + "grad_norm": 6.601997938560089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38300 + }, + { + "epoch": 0.18579657675505867, + "grad_norm": 9.515383680991363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38310 + }, + { + "epoch": 0.18584507494789476, + "grad_norm": 8.026729119592346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38320 + }, + { + "epoch": 0.18589357314073085, + "grad_norm": 8.79388699104311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38330 + }, + { + "epoch": 0.18594207133356694, + "grad_norm": 6.258910161704989e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38340 + }, + { + "epoch": 0.18599056952640303, + "grad_norm": 6.394663614628371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38350 + }, + { + "epoch": 0.18603906771923912, + "grad_norm": 8.435576091869734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38360 + }, + { + "epoch": 0.1860875659120752, + "grad_norm": 7.394820386252832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38370 + }, + { + "epoch": 0.1861360641049113, + "grad_norm": 7.228478352772072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38380 + }, + { + "epoch": 0.18618456229774738, + "grad_norm": 6.242298240977107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38390 + }, + { + "epoch": 0.18623306049058347, + "grad_norm": 5.663672254740959e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38400 + }, + { + "epoch": 0.18628155868341956, + "grad_norm": 7.327715593419271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38410 + }, + { + "epoch": 0.18633005687625565, + "grad_norm": 7.090696271916386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38420 + }, + { + "epoch": 0.18637855506909173, + "grad_norm": 7.310759883694118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38430 + }, + { + "epoch": 0.18642705326192782, + "grad_norm": 5.335287823982071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38440 + }, + { + "epoch": 0.1864755514547639, + "grad_norm": 5.608800620393595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38450 + }, + { + "epoch": 0.1865240496476, + "grad_norm": 6.0871766436321195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38460 + }, + { + "epoch": 0.1865725478404361, + "grad_norm": 6.2314443312061485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38470 + }, + { + "epoch": 0.18662104603327218, + "grad_norm": 6.9626353251805995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38480 + }, + { + "epoch": 0.18666954422610826, + "grad_norm": 5.234947366261622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38490 + }, + { + "epoch": 0.18671804241894435, + "grad_norm": 5.121603862789925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38500 + }, + { + "epoch": 0.18676654061178044, + "grad_norm": 6.132263933977811e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38510 + }, + { + "epoch": 0.18681503880461656, + "grad_norm": 6.079545528336894e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38520 + }, + { + "epoch": 0.18686353699745265, + "grad_norm": 5.95615392740001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38530 + }, + { + "epoch": 0.18691203519028873, + "grad_norm": 5.002639682061272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38540 + }, + { + "epoch": 0.18696053338312482, + "grad_norm": 4.876818820775952e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38550 + }, + { + "epoch": 0.1870090315759609, + "grad_norm": 5.514430995390285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38560 + }, + { + "epoch": 0.187057529768797, + "grad_norm": 5.827874247188447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38570 + }, + { + "epoch": 0.1871060279616331, + "grad_norm": 0.0001193721400341019, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38580 + }, + { + "epoch": 0.18715452615446918, + "grad_norm": 4.8933075049717445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38590 + }, + { + "epoch": 0.18720302434730526, + "grad_norm": 4.779588834935566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38600 + }, + { + "epoch": 0.18725152254014135, + "grad_norm": 5.311564564181026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38610 + }, + { + "epoch": 0.18730002073297744, + "grad_norm": 5.127845270180842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38620 + }, + { + "epoch": 0.18734851892581353, + "grad_norm": 4.8723754844104405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38630 + }, + { + "epoch": 0.18739701711864962, + "grad_norm": 4.637573056243127e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38640 + }, + { + "epoch": 0.1874455153114857, + "grad_norm": 4.5183655856817495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38650 + }, + { + "epoch": 0.1874940135043218, + "grad_norm": 4.8040647016023286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38660 + }, + { + "epoch": 0.18754251169715788, + "grad_norm": 8.137575605360325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38670 + }, + { + "epoch": 0.18759100988999397, + "grad_norm": 4.5410474740492646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38680 + }, + { + "epoch": 0.18763950808283006, + "grad_norm": 4.505719061853597e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38690 + }, + { + "epoch": 0.18768800627566615, + "grad_norm": 4.310201802582014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38700 + }, + { + "epoch": 0.18773650446850224, + "grad_norm": 4.803386218554806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38710 + }, + { + "epoch": 0.18778500266133832, + "grad_norm": 4.588733190757921e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38720 + }, + { + "epoch": 0.1878335008541744, + "grad_norm": 4.974688636139035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38730 + }, + { + "epoch": 0.1878819990470105, + "grad_norm": 4.0765671656117775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38740 + }, + { + "epoch": 0.1879304972398466, + "grad_norm": 4.179743427812355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38750 + }, + { + "epoch": 0.18797899543268268, + "grad_norm": 4.379598067316692e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38760 + }, + { + "epoch": 0.18802749362551877, + "grad_norm": 5.051595962868305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38770 + }, + { + "epoch": 0.18807599181835488, + "grad_norm": 4.389716195873916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38780 + }, + { + "epoch": 0.18812449001119097, + "grad_norm": 3.979956090915948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38790 + }, + { + "epoch": 0.18817298820402706, + "grad_norm": 3.913592081516981e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38800 + }, + { + "epoch": 0.18822148639686315, + "grad_norm": 4.276202162145637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38810 + }, + { + "epoch": 0.18826998458969924, + "grad_norm": 4.1155226426781155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38820 + }, + { + "epoch": 0.18831848278253532, + "grad_norm": 4.012394583696732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38830 + }, + { + "epoch": 0.1883669809753714, + "grad_norm": 3.753409828277654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38840 + }, + { + "epoch": 0.1884154791682075, + "grad_norm": 3.757214699362521e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38850 + }, + { + "epoch": 0.1884639773610436, + "grad_norm": 4.359521426522406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38860 + }, + { + "epoch": 0.18851247555387968, + "grad_norm": 4.051136784255505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38870 + }, + { + "epoch": 0.18856097374671577, + "grad_norm": 4.0868608266464435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38880 + }, + { + "epoch": 0.18860947193955185, + "grad_norm": 3.5291691347083542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38890 + }, + { + "epoch": 0.18865797013238794, + "grad_norm": 3.637003146650386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38900 + }, + { + "epoch": 0.18870646832522403, + "grad_norm": 3.7354016058088746e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38910 + }, + { + "epoch": 0.18875496651806012, + "grad_norm": 3.5375167044549016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38920 + }, + { + "epoch": 0.1888034647108962, + "grad_norm": 3.6430144518817542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38930 + }, + { + "epoch": 0.1888519629037323, + "grad_norm": 3.4022714316961356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38940 + }, + { + "epoch": 0.18890046109656838, + "grad_norm": 3.5015832509088796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38950 + }, + { + "epoch": 0.18894895928940447, + "grad_norm": 3.431940967857372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38960 + }, + { + "epoch": 0.18899745748224056, + "grad_norm": 3.39849543706805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38970 + }, + { + "epoch": 0.18904595567507665, + "grad_norm": 0.0001543820690130815, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38980 + }, + { + "epoch": 0.18909445386791274, + "grad_norm": 3.728833917193697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 38990 + }, + { + "epoch": 0.18914295206074883, + "grad_norm": 3.4344307096034754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39000 + }, + { + "epoch": 0.18919145025358491, + "grad_norm": 9.197983308695257e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39010 + }, + { + "epoch": 0.189239948446421, + "grad_norm": 3.56491727870889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39020 + }, + { + "epoch": 0.18928844663925712, + "grad_norm": 3.339454451634083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39030 + }, + { + "epoch": 0.1893369448320932, + "grad_norm": 3.356372417329112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39040 + }, + { + "epoch": 0.1893854430249293, + "grad_norm": 3.284830199845601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39050 + }, + { + "epoch": 0.18943394121776538, + "grad_norm": 3.280982355136075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39060 + }, + { + "epoch": 0.18948243941060147, + "grad_norm": 3.2371531233366113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39070 + }, + { + "epoch": 0.18953093760343756, + "grad_norm": 3.3899414120242e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39080 + }, + { + "epoch": 0.18957943579627365, + "grad_norm": 3.3034762054739986e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39090 + }, + { + "epoch": 0.18962793398910974, + "grad_norm": 3.1741426482767565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39100 + }, + { + "epoch": 0.18967643218194583, + "grad_norm": 3.5856464819516987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39110 + }, + { + "epoch": 0.1897249303747819, + "grad_norm": 2.9495940907509066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39120 + }, + { + "epoch": 0.189773428567618, + "grad_norm": 2.9809634725097567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39130 + }, + { + "epoch": 0.1898219267604541, + "grad_norm": 3.093978421020438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39140 + }, + { + "epoch": 0.18987042495329018, + "grad_norm": 3.3943260859814472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39150 + }, + { + "epoch": 0.18991892314612627, + "grad_norm": 3.0320181849674555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39160 + }, + { + "epoch": 0.18996742133896236, + "grad_norm": 3.065088321818621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39170 + }, + { + "epoch": 0.19001591953179844, + "grad_norm": 2.9955733680253616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39180 + }, + { + "epoch": 0.19006441772463453, + "grad_norm": 2.8498591291281627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39190 + }, + { + "epoch": 0.19011291591747062, + "grad_norm": 2.8480167202360462e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39200 + }, + { + "epoch": 0.1901614141103067, + "grad_norm": 3.380871248737094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39210 + }, + { + "epoch": 0.1902099123031428, + "grad_norm": 2.688618678803323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39220 + }, + { + "epoch": 0.19025841049597889, + "grad_norm": 2.8574531825142913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39230 + }, + { + "epoch": 0.19030690868881497, + "grad_norm": 2.844386244760244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39240 + }, + { + "epoch": 0.19035540688165106, + "grad_norm": 2.8204267437104136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39250 + }, + { + "epoch": 0.19040390507448715, + "grad_norm": 2.7404157663113438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39260 + }, + { + "epoch": 0.19045240326732324, + "grad_norm": 2.6741956844489323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39270 + }, + { + "epoch": 0.19050090146015933, + "grad_norm": 2.7923538254981395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39280 + }, + { + "epoch": 0.19054939965299544, + "grad_norm": 2.6623113171808654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39290 + }, + { + "epoch": 0.19059789784583153, + "grad_norm": 2.717122015383211e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39300 + }, + { + "epoch": 0.19064639603866762, + "grad_norm": 2.7087837679573568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39310 + }, + { + "epoch": 0.1906948942315037, + "grad_norm": 2.582549768703757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39320 + }, + { + "epoch": 0.1907433924243398, + "grad_norm": 2.7552368919714354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39330 + }, + { + "epoch": 0.19079189061717589, + "grad_norm": 2.6264385724061867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39340 + }, + { + "epoch": 0.19084038881001197, + "grad_norm": 2.5356939659104683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39350 + }, + { + "epoch": 0.19088888700284806, + "grad_norm": 2.4746964299993124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39360 + }, + { + "epoch": 0.19093738519568415, + "grad_norm": 2.569168600530247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39370 + }, + { + "epoch": 0.19098588338852024, + "grad_norm": 2.6018337848654483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39380 + }, + { + "epoch": 0.19103438158135633, + "grad_norm": 2.4921416752476944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39390 + }, + { + "epoch": 0.19108287977419242, + "grad_norm": 2.5600631943234475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39400 + }, + { + "epoch": 0.1911313779670285, + "grad_norm": 2.5432493657717714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39410 + }, + { + "epoch": 0.1911798761598646, + "grad_norm": 2.463492592141847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39420 + }, + { + "epoch": 0.19122837435270068, + "grad_norm": 2.5969131911551813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39430 + }, + { + "epoch": 0.19127687254553677, + "grad_norm": 2.4285329800477484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39440 + }, + { + "epoch": 0.19132537073837286, + "grad_norm": 2.4663706881256076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39450 + }, + { + "epoch": 0.19137386893120895, + "grad_norm": 2.358888650633162e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39460 + }, + { + "epoch": 0.19142236712404503, + "grad_norm": 2.5617448500270257e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39470 + }, + { + "epoch": 0.19147086531688112, + "grad_norm": 2.4210464744101046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39480 + }, + { + "epoch": 0.1915193635097172, + "grad_norm": 2.386641426710412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39490 + }, + { + "epoch": 0.1915678617025533, + "grad_norm": 2.347341251152102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39500 + }, + { + "epoch": 0.1916163598953894, + "grad_norm": 2.314911171197309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39510 + }, + { + "epoch": 0.19166485808822548, + "grad_norm": 2.447167162245023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39520 + }, + { + "epoch": 0.19171335628106156, + "grad_norm": 2.2984884253673954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39530 + }, + { + "epoch": 0.19176185447389765, + "grad_norm": 2.3143065845943056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39540 + }, + { + "epoch": 0.19181035266673377, + "grad_norm": 2.259353323097457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39550 + }, + { + "epoch": 0.19185885085956986, + "grad_norm": 2.265480361529626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39560 + }, + { + "epoch": 0.19190734905240595, + "grad_norm": 2.1701985133404378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39570 + }, + { + "epoch": 0.19195584724524203, + "grad_norm": 2.2931774310563924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39580 + }, + { + "epoch": 0.19200434543807812, + "grad_norm": 2.204634711233666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39590 + }, + { + "epoch": 0.1920528436309142, + "grad_norm": 2.1487312551471405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39600 + }, + { + "epoch": 0.1921013418237503, + "grad_norm": 2.2981041638558963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39610 + }, + { + "epoch": 0.1921498400165864, + "grad_norm": 2.5034439659066265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39620 + }, + { + "epoch": 0.19219833820942248, + "grad_norm": 2.1598780222120695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39630 + }, + { + "epoch": 0.19224683640225856, + "grad_norm": 2.160958047170425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39640 + }, + { + "epoch": 0.19229533459509465, + "grad_norm": 2.1019964151491877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39650 + }, + { + "epoch": 0.19234383278793074, + "grad_norm": 2.106484316755086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39660 + }, + { + "epoch": 0.19239233098076683, + "grad_norm": 2.172003860323457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39670 + }, + { + "epoch": 0.19244082917360292, + "grad_norm": 2.4531939288863214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39680 + }, + { + "epoch": 0.192489327366439, + "grad_norm": 2.0433622012205888e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39690 + }, + { + "epoch": 0.1925378255592751, + "grad_norm": 2.030468294833554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39700 + }, + { + "epoch": 0.19258632375211118, + "grad_norm": 2.0778145426447736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39710 + }, + { + "epoch": 0.19263482194494727, + "grad_norm": 1.9503847852320177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39720 + }, + { + "epoch": 0.19268332013778336, + "grad_norm": 1.9637393506855005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39730 + }, + { + "epoch": 0.19273181833061945, + "grad_norm": 2.043743506874307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39740 + }, + { + "epoch": 0.19278031652345554, + "grad_norm": 1.9740355128305964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39750 + }, + { + "epoch": 0.19282881471629162, + "grad_norm": 1.94345034287835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39760 + }, + { + "epoch": 0.1928773129091277, + "grad_norm": 1.9345454802532913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39770 + }, + { + "epoch": 0.1929258111019638, + "grad_norm": 1.971181063709082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39780 + }, + { + "epoch": 0.1929743092947999, + "grad_norm": 1.9336555396876065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39790 + }, + { + "epoch": 0.193022807487636, + "grad_norm": 1.8160457102567307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39800 + }, + { + "epoch": 0.1930713056804721, + "grad_norm": 2.5655742774688406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39810 + }, + { + "epoch": 0.19311980387330818, + "grad_norm": 3.1195829706121003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39820 + }, + { + "epoch": 0.19316830206614427, + "grad_norm": 1.8503474166209344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39830 + }, + { + "epoch": 0.19321680025898036, + "grad_norm": 1.8127759631170193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39840 + }, + { + "epoch": 0.19326529845181645, + "grad_norm": 3.213045602024067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39850 + }, + { + "epoch": 0.19331379664465254, + "grad_norm": 1.8763139451039024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39860 + }, + { + "epoch": 0.19336229483748862, + "grad_norm": 2.3120594505599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39870 + }, + { + "epoch": 0.1934107930303247, + "grad_norm": 2.019548219323042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39880 + }, + { + "epoch": 0.1934592912231608, + "grad_norm": 1.9116093881166307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39890 + }, + { + "epoch": 0.1935077894159969, + "grad_norm": 1.9534174953150796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39900 + }, + { + "epoch": 0.19355628760883298, + "grad_norm": 2.4085197765089106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39910 + }, + { + "epoch": 0.19360478580166907, + "grad_norm": 1.9823758066195296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39920 + }, + { + "epoch": 0.19365328399450515, + "grad_norm": 2.36570781453338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39930 + }, + { + "epoch": 0.19370178218734124, + "grad_norm": 1.966779109352501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39940 + }, + { + "epoch": 0.19375028038017733, + "grad_norm": 1.9038537857340998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39950 + }, + { + "epoch": 0.19379877857301342, + "grad_norm": 2.1257956177578308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39960 + }, + { + "epoch": 0.1938472767658495, + "grad_norm": 2.063083229586482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39970 + }, + { + "epoch": 0.1938957749586856, + "grad_norm": 2.2419035303755663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 39980 + }, + { + "epoch": 0.19394427315152168, + "grad_norm": 3.0842181786283618e-06, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 39990 + }, + { + "epoch": 0.19399277134435777, + "grad_norm": 0.00024580550962127745, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40000 + }, + { + "epoch": 0.19404126953719386, + "grad_norm": 0.00028862495673820376, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40010 + }, + { + "epoch": 0.19408976773002995, + "grad_norm": 9.309439337812364e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40020 + }, + { + "epoch": 0.19413826592286604, + "grad_norm": 3.138223837595433e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40030 + }, + { + "epoch": 0.19418676411570213, + "grad_norm": 4.4990771129960194e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40040 + }, + { + "epoch": 0.19423526230853821, + "grad_norm": 3.525403735693544e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40050 + }, + { + "epoch": 0.19428376050137433, + "grad_norm": 1.3248386494524311e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40060 + }, + { + "epoch": 0.19433225869421042, + "grad_norm": 1.1138514310005121e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40070 + }, + { + "epoch": 0.1943807568870465, + "grad_norm": 1.0253085747535806e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40080 + }, + { + "epoch": 0.1944292550798826, + "grad_norm": 1.5496741980314255e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40090 + }, + { + "epoch": 0.19447775327271868, + "grad_norm": 1.571449865878094e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40100 + }, + { + "epoch": 0.19452625146555477, + "grad_norm": 8.745313607505523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40110 + }, + { + "epoch": 0.19457474965839086, + "grad_norm": 8.168957720045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40120 + }, + { + "epoch": 0.19462324785122695, + "grad_norm": 4.93675543111749e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 40130 + }, + { + "epoch": 0.19467174604406304, + "grad_norm": 6.952499825274572e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40140 + }, + { + "epoch": 0.19472024423689913, + "grad_norm": 2.5989391360781156e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40150 + }, + { + "epoch": 0.19476874242973521, + "grad_norm": 4.273087324691005e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40160 + }, + { + "epoch": 0.1948172406225713, + "grad_norm": 2.701896482903976e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40170 + }, + { + "epoch": 0.1948657388154074, + "grad_norm": 2.0979125110898167e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40180 + }, + { + "epoch": 0.19491423700824348, + "grad_norm": 1.3560573279391974e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40190 + }, + { + "epoch": 0.19496273520107957, + "grad_norm": 1.3259826118883211e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40200 + }, + { + "epoch": 0.19501123339391566, + "grad_norm": 1.4433820069825742e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40210 + }, + { + "epoch": 0.19505973158675174, + "grad_norm": 1.4116450074652676e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40220 + }, + { + "epoch": 0.19510822977958783, + "grad_norm": 1.230874204338761e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40230 + }, + { + "epoch": 0.19515672797242392, + "grad_norm": 9.958554983313661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40240 + }, + { + "epoch": 0.19520522616526, + "grad_norm": 9.241873158316594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40250 + }, + { + "epoch": 0.1952537243580961, + "grad_norm": 1.0545581972110085e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40260 + }, + { + "epoch": 0.1953022225509322, + "grad_norm": 1.003938632493373e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40270 + }, + { + "epoch": 0.19535072074376827, + "grad_norm": 9.351339940621983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40280 + }, + { + "epoch": 0.19539921893660436, + "grad_norm": 7.981303497217596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40290 + }, + { + "epoch": 0.19544771712944045, + "grad_norm": 7.41575968277175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40300 + }, + { + "epoch": 0.19549621532227657, + "grad_norm": 7.466999250027584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40310 + }, + { + "epoch": 0.19554471351511266, + "grad_norm": 7.463327619916527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40320 + }, + { + "epoch": 0.19559321170794874, + "grad_norm": 6.677674264210509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40330 + }, + { + "epoch": 0.19564170990078483, + "grad_norm": 6.345150268316502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40340 + }, + { + "epoch": 0.19569020809362092, + "grad_norm": 6.420640602300409e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40350 + }, + { + "epoch": 0.195738706286457, + "grad_norm": 8.043487468967214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40360 + }, + { + "epoch": 0.1957872044792931, + "grad_norm": 5.939828042755835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40370 + }, + { + "epoch": 0.19583570267212919, + "grad_norm": 3.888862192980014e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40380 + }, + { + "epoch": 0.19588420086496527, + "grad_norm": 5.4573165471083485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40390 + }, + { + "epoch": 0.19593269905780136, + "grad_norm": 5.3486478464037646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40400 + }, + { + "epoch": 0.19598119725063745, + "grad_norm": 5.395621428760933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40410 + }, + { + "epoch": 0.19602969544347354, + "grad_norm": 4.639146936824545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40420 + }, + { + "epoch": 0.19607819363630963, + "grad_norm": 4.95572567160707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40430 + }, + { + "epoch": 0.19612669182914572, + "grad_norm": 4.739460109703941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40440 + }, + { + "epoch": 0.1961751900219818, + "grad_norm": 4.593088306137361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40450 + }, + { + "epoch": 0.1962236882148179, + "grad_norm": 4.525151325651677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40460 + }, + { + "epoch": 0.19627218640765398, + "grad_norm": 3.218557685613632e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40470 + }, + { + "epoch": 0.19632068460049007, + "grad_norm": 4.3462127905513626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40480 + }, + { + "epoch": 0.19636918279332616, + "grad_norm": 4.327780970925232e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40490 + }, + { + "epoch": 0.19641768098616225, + "grad_norm": 4.40113217337057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40500 + }, + { + "epoch": 0.19646617917899833, + "grad_norm": 3.957899480155902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40510 + }, + { + "epoch": 0.19651467737183442, + "grad_norm": 4.160456228419207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40520 + }, + { + "epoch": 0.1965631755646705, + "grad_norm": 4.1850589695968665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40530 + }, + { + "epoch": 0.1966116737575066, + "grad_norm": 3.925159035134129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40540 + }, + { + "epoch": 0.1966601719503427, + "grad_norm": 3.893317170877708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40550 + }, + { + "epoch": 0.19670867014317878, + "grad_norm": 3.4662007237784564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40560 + }, + { + "epoch": 0.1967571683360149, + "grad_norm": 3.424271199037321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40570 + }, + { + "epoch": 0.19680566652885098, + "grad_norm": 3.981675035902299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40580 + }, + { + "epoch": 0.19685416472168707, + "grad_norm": 3.707206360559212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40590 + }, + { + "epoch": 0.19690266291452316, + "grad_norm": 3.5449982078716857e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40600 + }, + { + "epoch": 0.19695116110735925, + "grad_norm": 4.221460130793275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40610 + }, + { + "epoch": 0.19699965930019533, + "grad_norm": 3.252406713727396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40620 + }, + { + "epoch": 0.19704815749303142, + "grad_norm": 8.424373845627997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40630 + }, + { + "epoch": 0.1970966556858675, + "grad_norm": 3.29858812619932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40640 + }, + { + "epoch": 0.1971451538787036, + "grad_norm": 3.3980213629547507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40650 + }, + { + "epoch": 0.1971936520715397, + "grad_norm": 3.1296910947276046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40660 + }, + { + "epoch": 0.19724215026437578, + "grad_norm": 3.4541135391918942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40670 + }, + { + "epoch": 0.19729064845721186, + "grad_norm": 2.964451596199069e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40680 + }, + { + "epoch": 0.19733914665004795, + "grad_norm": 3.2188081604545005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40690 + }, + { + "epoch": 0.19738764484288404, + "grad_norm": 3.0673741093778517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40700 + }, + { + "epoch": 0.19743614303572013, + "grad_norm": 2.728124172790558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40710 + }, + { + "epoch": 0.19748464122855622, + "grad_norm": 2.9483901471394347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40720 + }, + { + "epoch": 0.1975331394213923, + "grad_norm": 2.704244025153457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40730 + }, + { + "epoch": 0.1975816376142284, + "grad_norm": 2.812428874676698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40740 + }, + { + "epoch": 0.19763013580706448, + "grad_norm": 2.8534036573546473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40750 + }, + { + "epoch": 0.19767863399990057, + "grad_norm": 2.626607511047041e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40760 + }, + { + "epoch": 0.19772713219273666, + "grad_norm": 3.006352017109748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40770 + }, + { + "epoch": 0.19777563038557275, + "grad_norm": 2.4855155515979277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40780 + }, + { + "epoch": 0.19782412857840884, + "grad_norm": 2.7652197331917705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40790 + }, + { + "epoch": 0.19787262677124492, + "grad_norm": 2.6572513434075518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40800 + }, + { + "epoch": 0.197921124964081, + "grad_norm": 2.47832713284879e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40810 + }, + { + "epoch": 0.1979696231569171, + "grad_norm": 2.484033075234038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40820 + }, + { + "epoch": 0.19801812134975322, + "grad_norm": 2.3587986106576864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40830 + }, + { + "epoch": 0.1980666195425893, + "grad_norm": 2.559384256528574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40840 + }, + { + "epoch": 0.1981151177354254, + "grad_norm": 2.505545353415073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40850 + }, + { + "epoch": 0.19816361592826148, + "grad_norm": 2.1650653252436314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40860 + }, + { + "epoch": 0.19821211412109757, + "grad_norm": 2.2267026906774845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40870 + }, + { + "epoch": 0.19826061231393366, + "grad_norm": 2.454232571835746e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40880 + }, + { + "epoch": 0.19830911050676975, + "grad_norm": 2.4102500901790336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40890 + }, + { + "epoch": 0.19835760869960584, + "grad_norm": 2.445121253913385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40900 + }, + { + "epoch": 0.19840610689244192, + "grad_norm": 2.1920927792962175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40910 + }, + { + "epoch": 0.198454605085278, + "grad_norm": 2.7772784960689023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40920 + }, + { + "epoch": 0.1985031032781141, + "grad_norm": 2.0732529719680315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40930 + }, + { + "epoch": 0.1985516014709502, + "grad_norm": 2.227575578217511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40940 + }, + { + "epoch": 0.19860009966378628, + "grad_norm": 2.236915634057368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40950 + }, + { + "epoch": 0.19864859785662237, + "grad_norm": 1.9652172795758815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40960 + }, + { + "epoch": 0.19869709604945845, + "grad_norm": 2.051129058600054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40970 + }, + { + "epoch": 0.19874559424229454, + "grad_norm": 1.9051888102694647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40980 + }, + { + "epoch": 0.19879409243513063, + "grad_norm": 2.1058447146060644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 40990 + }, + { + "epoch": 0.19884259062796672, + "grad_norm": 2.1074761207273696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41000 + }, + { + "epoch": 0.1988910888208028, + "grad_norm": 1.8675231103770784e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41010 + }, + { + "epoch": 0.1989395870136389, + "grad_norm": 1.8548824982644874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41020 + }, + { + "epoch": 0.19898808520647498, + "grad_norm": 2.028836433964898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41030 + }, + { + "epoch": 0.19903658339931107, + "grad_norm": 1.9857845927617745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41040 + }, + { + "epoch": 0.19908508159214716, + "grad_norm": 1.8855777170756483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41050 + }, + { + "epoch": 0.19913357978498325, + "grad_norm": 1.7170533510579844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41060 + }, + { + "epoch": 0.19918207797781934, + "grad_norm": 2.189950691899867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41070 + }, + { + "epoch": 0.19923057617065545, + "grad_norm": 1.6504069435541169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41080 + }, + { + "epoch": 0.19927907436349154, + "grad_norm": 1.9421204342506826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41090 + }, + { + "epoch": 0.19932757255632763, + "grad_norm": 1.7390352695656475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41100 + }, + { + "epoch": 0.19937607074916372, + "grad_norm": 1.6036225360949175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41110 + }, + { + "epoch": 0.1994245689419998, + "grad_norm": 1.4813695088378154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41120 + }, + { + "epoch": 0.1994730671348359, + "grad_norm": 1.5755024378449889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41130 + }, + { + "epoch": 0.19952156532767198, + "grad_norm": 1.8994809352079756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41140 + }, + { + "epoch": 0.19957006352050807, + "grad_norm": 1.6673307072778698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41150 + }, + { + "epoch": 0.19961856171334416, + "grad_norm": 1.5574945564367226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41160 + }, + { + "epoch": 0.19966705990618025, + "grad_norm": 1.464688125452085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41170 + }, + { + "epoch": 0.19971555809901634, + "grad_norm": 1.4722808145961608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41180 + }, + { + "epoch": 0.19976405629185243, + "grad_norm": 1.627947540328023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41190 + }, + { + "epoch": 0.19981255448468851, + "grad_norm": 1.619360091353883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41200 + }, + { + "epoch": 0.1998610526775246, + "grad_norm": 1.3756157386524137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41210 + }, + { + "epoch": 0.1999095508703607, + "grad_norm": 1.4630376199420425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41220 + }, + { + "epoch": 0.19995804906319678, + "grad_norm": 6.19625916442601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41230 + }, + { + "epoch": 0.20000654725603287, + "grad_norm": 1.5308052070395206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41240 + }, + { + "epoch": 0.20005504544886896, + "grad_norm": 1.5224261460389243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41250 + }, + { + "epoch": 0.20010354364170505, + "grad_norm": 1.526126652606763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41260 + }, + { + "epoch": 0.20015204183454113, + "grad_norm": 1.5140051345952088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41270 + }, + { + "epoch": 0.20020054002737722, + "grad_norm": 1.3426628129309393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41280 + }, + { + "epoch": 0.2002490382202133, + "grad_norm": 1.4006703850100166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41290 + }, + { + "epoch": 0.2002975364130494, + "grad_norm": 1.3928977296018275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41300 + }, + { + "epoch": 0.2003460346058855, + "grad_norm": 1.2912971669720719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41310 + }, + { + "epoch": 0.20039453279872158, + "grad_norm": 1.3417758282230352e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41320 + }, + { + "epoch": 0.20044303099155766, + "grad_norm": 1.3532037428376498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41330 + }, + { + "epoch": 0.20049152918439378, + "grad_norm": 1.3611102076538373e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41340 + }, + { + "epoch": 0.20054002737722987, + "grad_norm": 1.318037902819924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41350 + }, + { + "epoch": 0.20058852557006596, + "grad_norm": 1.1990836128461524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41360 + }, + { + "epoch": 0.20063702376290204, + "grad_norm": 1.2548327958938899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41370 + }, + { + "epoch": 0.20068552195573813, + "grad_norm": 1.2288862762943609e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41380 + }, + { + "epoch": 0.20073402014857422, + "grad_norm": 1.2574988659252995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41390 + }, + { + "epoch": 0.2007825183414103, + "grad_norm": 1.2610712474270258e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41400 + }, + { + "epoch": 0.2008310165342464, + "grad_norm": 1.1633222811724409e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41410 + }, + { + "epoch": 0.2008795147270825, + "grad_norm": 1.2676249525611638e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41420 + }, + { + "epoch": 0.20092801291991857, + "grad_norm": 1.1006900422216859e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41430 + }, + { + "epoch": 0.20097651111275466, + "grad_norm": 1.1805250323959626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41440 + }, + { + "epoch": 0.20102500930559075, + "grad_norm": 1.1825082992800162e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41450 + }, + { + "epoch": 0.20107350749842684, + "grad_norm": 1.040358597492741e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41460 + }, + { + "epoch": 0.20112200569126293, + "grad_norm": 9.912030236591818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41470 + }, + { + "epoch": 0.20117050388409902, + "grad_norm": 1.0679165143301361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41480 + }, + { + "epoch": 0.2012190020769351, + "grad_norm": 1.1006679869751679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41490 + }, + { + "epoch": 0.2012675002697712, + "grad_norm": 1.2108448572689667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41500 + }, + { + "epoch": 0.20131599846260728, + "grad_norm": 9.179619269161776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41510 + }, + { + "epoch": 0.20136449665544337, + "grad_norm": 9.57503289100714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41520 + }, + { + "epoch": 0.20141299484827946, + "grad_norm": 1.0678239732442307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41530 + }, + { + "epoch": 0.20146149304111555, + "grad_norm": 1.1012861023118603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41540 + }, + { + "epoch": 0.20150999123395164, + "grad_norm": 9.967507139663212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41550 + }, + { + "epoch": 0.20155848942678772, + "grad_norm": 9.894858976622345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41560 + }, + { + "epoch": 0.2016069876196238, + "grad_norm": 9.115801162806747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41570 + }, + { + "epoch": 0.2016554858124599, + "grad_norm": 9.71820782069699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41580 + }, + { + "epoch": 0.20170398400529602, + "grad_norm": 9.609991593606537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41590 + }, + { + "epoch": 0.2017524821981321, + "grad_norm": 9.648438208387233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41600 + }, + { + "epoch": 0.2018009803909682, + "grad_norm": 8.16877161469165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41610 + }, + { + "epoch": 0.20184947858380428, + "grad_norm": 9.394521498506947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41620 + }, + { + "epoch": 0.20189797677664037, + "grad_norm": 8.893939593690448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41630 + }, + { + "epoch": 0.20194647496947646, + "grad_norm": 9.958031341739115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41640 + }, + { + "epoch": 0.20199497316231255, + "grad_norm": 8.930156809583423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41650 + }, + { + "epoch": 0.20204347135514863, + "grad_norm": 9.477338380747824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41660 + }, + { + "epoch": 0.20209196954798472, + "grad_norm": 8.449481470051978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41670 + }, + { + "epoch": 0.2021404677408208, + "grad_norm": 7.724709689682641e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41680 + }, + { + "epoch": 0.2021889659336569, + "grad_norm": 9.188665330839285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41690 + }, + { + "epoch": 0.202237464126493, + "grad_norm": 8.189107916223293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41700 + }, + { + "epoch": 0.20228596231932908, + "grad_norm": 8.186060540538165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41710 + }, + { + "epoch": 0.20233446051216517, + "grad_norm": 8.122152053147147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41720 + }, + { + "epoch": 0.20238295870500125, + "grad_norm": 7.614934816047025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41730 + }, + { + "epoch": 0.20243145689783734, + "grad_norm": 8.478346558149497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41740 + }, + { + "epoch": 0.20247995509067343, + "grad_norm": 8.041942578529415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41750 + }, + { + "epoch": 0.20252845328350952, + "grad_norm": 7.743778382973687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41760 + }, + { + "epoch": 0.2025769514763456, + "grad_norm": 8.160253059941169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41770 + }, + { + "epoch": 0.2026254496691817, + "grad_norm": 7.667539989597572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41780 + }, + { + "epoch": 0.20267394786201778, + "grad_norm": 8.437909855274484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41790 + }, + { + "epoch": 0.20272244605485387, + "grad_norm": 7.825491366020287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41800 + }, + { + "epoch": 0.20277094424768996, + "grad_norm": 6.758202744094888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41810 + }, + { + "epoch": 0.20281944244052605, + "grad_norm": 7.609592103108298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41820 + }, + { + "epoch": 0.20286794063336214, + "grad_norm": 7.172210985117999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41830 + }, + { + "epoch": 0.20291643882619823, + "grad_norm": 7.509401598326804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41840 + }, + { + "epoch": 0.20296493701903434, + "grad_norm": 7.420778160849295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41850 + }, + { + "epoch": 0.20301343521187043, + "grad_norm": 6.739111313436297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41860 + }, + { + "epoch": 0.20306193340470652, + "grad_norm": 6.664699867542367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41870 + }, + { + "epoch": 0.2031104315975426, + "grad_norm": 6.414967401724425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41880 + }, + { + "epoch": 0.2031589297903787, + "grad_norm": 7.265605859174684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41890 + }, + { + "epoch": 0.20320742798321478, + "grad_norm": 7.16971953806933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41900 + }, + { + "epoch": 0.20325592617605087, + "grad_norm": 7.168907814047998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41910 + }, + { + "epoch": 0.20330442436888696, + "grad_norm": 7.555749448329152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41920 + }, + { + "epoch": 0.20335292256172305, + "grad_norm": 6.475103191405651e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41930 + }, + { + "epoch": 0.20340142075455914, + "grad_norm": 6.351311867547338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41940 + }, + { + "epoch": 0.20344991894739523, + "grad_norm": 6.77178888963681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41950 + }, + { + "epoch": 0.2034984171402313, + "grad_norm": 6.582286005141214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41960 + }, + { + "epoch": 0.2035469153330674, + "grad_norm": 6.621330612688325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41970 + }, + { + "epoch": 0.2035954135259035, + "grad_norm": 6.540084314110572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41980 + }, + { + "epoch": 0.20364391171873958, + "grad_norm": 6.502309020106622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 41990 + }, + { + "epoch": 0.20369240991157567, + "grad_norm": 6.428702477023762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42000 + }, + { + "epoch": 0.20374090810441176, + "grad_norm": 6.09397375228582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42010 + }, + { + "epoch": 0.20378940629724784, + "grad_norm": 6.619561077059188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42020 + }, + { + "epoch": 0.20383790449008393, + "grad_norm": 5.940819960414956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42030 + }, + { + "epoch": 0.20388640268292002, + "grad_norm": 6.241909318305261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42040 + }, + { + "epoch": 0.2039349008757561, + "grad_norm": 6.3297278529717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42050 + }, + { + "epoch": 0.2039833990685922, + "grad_norm": 6.375411771841755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42060 + }, + { + "epoch": 0.20403189726142829, + "grad_norm": 5.653932930727024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42070 + }, + { + "epoch": 0.20408039545426437, + "grad_norm": 5.750084710598458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42080 + }, + { + "epoch": 0.20412889364710046, + "grad_norm": 5.970740630800719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42090 + }, + { + "epoch": 0.20417739183993655, + "grad_norm": 6.189606551743054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42100 + }, + { + "epoch": 0.20422589003277267, + "grad_norm": 5.484429266289226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42110 + }, + { + "epoch": 0.20427438822560876, + "grad_norm": 6.298563448581262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42120 + }, + { + "epoch": 0.20432288641844484, + "grad_norm": 5.815750796500652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42130 + }, + { + "epoch": 0.20437138461128093, + "grad_norm": 5.629451038657862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42140 + }, + { + "epoch": 0.20441988280411702, + "grad_norm": 5.58931219529768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42150 + }, + { + "epoch": 0.2044683809969531, + "grad_norm": 5.648351475429081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42160 + }, + { + "epoch": 0.2045168791897892, + "grad_norm": 5.064694619250076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42170 + }, + { + "epoch": 0.20456537738262529, + "grad_norm": 6.094017521718342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42180 + }, + { + "epoch": 0.20461387557546137, + "grad_norm": 5.377644356485689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42190 + }, + { + "epoch": 0.20466237376829746, + "grad_norm": 5.599614496532013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42200 + }, + { + "epoch": 0.20471087196113355, + "grad_norm": 1.5721943782409653e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42210 + }, + { + "epoch": 0.20475937015396964, + "grad_norm": 5.500434667737863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42220 + }, + { + "epoch": 0.20480786834680573, + "grad_norm": 5.128241014062951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42230 + }, + { + "epoch": 0.20485636653964182, + "grad_norm": 5.219918648435851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42240 + }, + { + "epoch": 0.2049048647324779, + "grad_norm": 4.987103352505073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42250 + }, + { + "epoch": 0.204953362925314, + "grad_norm": 5.699599228137231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42260 + }, + { + "epoch": 0.20500186111815008, + "grad_norm": 5.292182549965219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42270 + }, + { + "epoch": 0.20505035931098617, + "grad_norm": 4.4144709931970283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42280 + }, + { + "epoch": 0.20509885750382226, + "grad_norm": 4.968471216670878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42290 + }, + { + "epoch": 0.20514735569665835, + "grad_norm": 4.980487915418053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42300 + }, + { + "epoch": 0.20519585388949443, + "grad_norm": 5.862465854988841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42310 + }, + { + "epoch": 0.20524435208233052, + "grad_norm": 4.858952706854325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42320 + }, + { + "epoch": 0.2052928502751666, + "grad_norm": 4.959661055181641e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42330 + }, + { + "epoch": 0.2053413484680027, + "grad_norm": 4.7603870712009666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42340 + }, + { + "epoch": 0.2053898466608388, + "grad_norm": 4.852773258789966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42350 + }, + { + "epoch": 0.2054383448536749, + "grad_norm": 4.581252142088488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42360 + }, + { + "epoch": 0.205486843046511, + "grad_norm": 4.919810407955083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42370 + }, + { + "epoch": 0.20553534123934708, + "grad_norm": 4.4954538225283613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42380 + }, + { + "epoch": 0.20558383943218317, + "grad_norm": 4.5517347757595417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42390 + }, + { + "epoch": 0.20563233762501926, + "grad_norm": 4.5441589691108675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42400 + }, + { + "epoch": 0.20568083581785535, + "grad_norm": 4.1763033209463174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42410 + }, + { + "epoch": 0.20572933401069143, + "grad_norm": 4.682585768023273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42420 + }, + { + "epoch": 0.20577783220352752, + "grad_norm": 5.039483994551119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42430 + }, + { + "epoch": 0.2058263303963636, + "grad_norm": 4.707995628905337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42440 + }, + { + "epoch": 0.2058748285891997, + "grad_norm": 4.3050528120147646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42450 + }, + { + "epoch": 0.2059233267820358, + "grad_norm": 4.136333586757246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42460 + }, + { + "epoch": 0.20597182497487188, + "grad_norm": 4.447560684184282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42470 + }, + { + "epoch": 0.20602032316770796, + "grad_norm": 5.049462288297946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42480 + }, + { + "epoch": 0.20606882136054405, + "grad_norm": 4.5519450964093267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42490 + }, + { + "epoch": 0.20611731955338014, + "grad_norm": 4.611349311289814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42500 + }, + { + "epoch": 0.20616581774621623, + "grad_norm": 4.358826686257089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42510 + }, + { + "epoch": 0.20621431593905232, + "grad_norm": 1.0043249858426861e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 42520 + }, + { + "epoch": 0.2062628141318884, + "grad_norm": 8.791672007646412e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42530 + }, + { + "epoch": 0.2063113123247245, + "grad_norm": 5.248474280961091e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42540 + }, + { + "epoch": 0.20635981051756058, + "grad_norm": 4.6277987166831736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42550 + }, + { + "epoch": 0.20640830871039667, + "grad_norm": 1.992267016248661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42560 + }, + { + "epoch": 0.20645680690323276, + "grad_norm": 2.080432523143827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42570 + }, + { + "epoch": 0.20650530509606885, + "grad_norm": 2.5130393623840064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42580 + }, + { + "epoch": 0.20655380328890494, + "grad_norm": 2.8615786504815333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42590 + }, + { + "epoch": 0.20660230148174102, + "grad_norm": 2.7132916784466943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42600 + }, + { + "epoch": 0.2066507996745771, + "grad_norm": 1.449962269362004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42610 + }, + { + "epoch": 0.20669929786741323, + "grad_norm": 1.5454930917258025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42620 + }, + { + "epoch": 0.20674779606024932, + "grad_norm": 1.151193259829597e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42630 + }, + { + "epoch": 0.2067962942530854, + "grad_norm": 2.2921894924365915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42640 + }, + { + "epoch": 0.2068447924459215, + "grad_norm": 2.002899691433413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42650 + }, + { + "epoch": 0.20689329063875758, + "grad_norm": 1.2037970691380906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42660 + }, + { + "epoch": 0.20694178883159367, + "grad_norm": 1.4202928468876053e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42670 + }, + { + "epoch": 0.20699028702442976, + "grad_norm": 1.1863196505146334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42680 + }, + { + "epoch": 0.20703878521726585, + "grad_norm": 1.8475271872375743e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42690 + }, + { + "epoch": 0.20708728341010194, + "grad_norm": 1.675341877671599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42700 + }, + { + "epoch": 0.20713578160293802, + "grad_norm": 9.926272468874231e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42710 + }, + { + "epoch": 0.2071842797957741, + "grad_norm": 8.910720339372347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42720 + }, + { + "epoch": 0.2072327779886102, + "grad_norm": 7.9846404332784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42730 + }, + { + "epoch": 0.2072812761814463, + "grad_norm": 1.4817717328696745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42740 + }, + { + "epoch": 0.20732977437428238, + "grad_norm": 1.4018555702932645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42750 + }, + { + "epoch": 0.20737827256711847, + "grad_norm": 8.21146784346638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42760 + }, + { + "epoch": 0.20742677075995455, + "grad_norm": 7.451178589690244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42770 + }, + { + "epoch": 0.20747526895279064, + "grad_norm": 7.195412194960227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42780 + }, + { + "epoch": 0.20752376714562673, + "grad_norm": 1.2894251995021477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42790 + }, + { + "epoch": 0.20757226533846282, + "grad_norm": 1.1541028470674064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42800 + }, + { + "epoch": 0.2076207635312989, + "grad_norm": 6.975831752242811e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42810 + }, + { + "epoch": 0.207669261724135, + "grad_norm": 7.981662406564283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42820 + }, + { + "epoch": 0.20771775991697108, + "grad_norm": 7.656772140762769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42830 + }, + { + "epoch": 0.20776625810980717, + "grad_norm": 1.155421273324464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42840 + }, + { + "epoch": 0.20781475630264326, + "grad_norm": 1.1503453833938693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42850 + }, + { + "epoch": 0.20786325449547935, + "grad_norm": 6.885355787744629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42860 + }, + { + "epoch": 0.20791175268831547, + "grad_norm": 5.49142077943543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42870 + }, + { + "epoch": 0.20796025088115155, + "grad_norm": 6.728159291924385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42880 + }, + { + "epoch": 0.20800874907398764, + "grad_norm": 9.637831226427807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42890 + }, + { + "epoch": 0.20805724726682373, + "grad_norm": 8.491444418723404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42900 + }, + { + "epoch": 0.20810574545965982, + "grad_norm": 6.916235406606575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42910 + }, + { + "epoch": 0.2081542436524959, + "grad_norm": 5.404856437962735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42920 + }, + { + "epoch": 0.208202741845332, + "grad_norm": 5.687259658770927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42930 + }, + { + "epoch": 0.20825124003816808, + "grad_norm": 9.073150977201294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42940 + }, + { + "epoch": 0.20829973823100417, + "grad_norm": 7.93026856626966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42950 + }, + { + "epoch": 0.20834823642384026, + "grad_norm": 6.023898322382593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42960 + }, + { + "epoch": 0.20839673461667635, + "grad_norm": 5.188796876609558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42970 + }, + { + "epoch": 0.20844523280951244, + "grad_norm": 4.82277187074942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42980 + }, + { + "epoch": 0.20849373100234853, + "grad_norm": 8.172610819201509e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 42990 + }, + { + "epoch": 0.20854222919518461, + "grad_norm": 7.665358907615882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43000 + }, + { + "epoch": 0.2085907273880207, + "grad_norm": 4.999358793611464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43010 + }, + { + "epoch": 0.2086392255808568, + "grad_norm": 7.180880174928461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43020 + }, + { + "epoch": 0.20868772377369288, + "grad_norm": 4.89997660224617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43030 + }, + { + "epoch": 0.20873622196652897, + "grad_norm": 7.039570277811436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43040 + }, + { + "epoch": 0.20878472015936506, + "grad_norm": 6.908561545060365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43050 + }, + { + "epoch": 0.20883321835220114, + "grad_norm": 4.5504921786232444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43060 + }, + { + "epoch": 0.20888171654503723, + "grad_norm": 4.413494707478094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43070 + }, + { + "epoch": 0.20893021473787332, + "grad_norm": 4.260033108494099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43080 + }, + { + "epoch": 0.2089787129307094, + "grad_norm": 6.051931222827989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43090 + }, + { + "epoch": 0.2090272111235455, + "grad_norm": 6.273899657571747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43100 + }, + { + "epoch": 0.20907570931638159, + "grad_norm": 4.954061409989663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43110 + }, + { + "epoch": 0.20912420750921767, + "grad_norm": 4.051998416798597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43120 + }, + { + "epoch": 0.2091727057020538, + "grad_norm": 4.106464075448457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43130 + }, + { + "epoch": 0.20922120389488988, + "grad_norm": 5.620104843728768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43140 + }, + { + "epoch": 0.20926970208772597, + "grad_norm": 5.855667950527277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43150 + }, + { + "epoch": 0.20931820028056206, + "grad_norm": 4.5538575932368985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43160 + }, + { + "epoch": 0.20936669847339814, + "grad_norm": 4.6336748482644907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43170 + }, + { + "epoch": 0.20941519666623423, + "grad_norm": 0.0025256131775677204, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43180 + }, + { + "epoch": 0.20946369485907032, + "grad_norm": 0.00027210565167479217, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43190 + }, + { + "epoch": 0.2095121930519064, + "grad_norm": 2.9691414056287613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43200 + }, + { + "epoch": 0.2095606912447425, + "grad_norm": 4.1164650610880926e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 43210 + }, + { + "epoch": 0.20960918943757859, + "grad_norm": 0.00010054219455923885, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 43220 + }, + { + "epoch": 0.20965768763041467, + "grad_norm": 0.0009255201439373195, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 43230 + }, + { + "epoch": 0.20970618582325076, + "grad_norm": 0.21325497329235077, + "learning_rate": 0.0002, + "loss": 0.005, + "step": 43240 + }, + { + "epoch": 0.20975468401608685, + "grad_norm": 0.0005386670818552375, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 43250 + }, + { + "epoch": 0.20980318220892294, + "grad_norm": 0.04221387952566147, + "learning_rate": 0.0002, + "loss": 0.0031, + "step": 43260 + }, + { + "epoch": 0.20985168040175903, + "grad_norm": 0.004527793265879154, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 43270 + }, + { + "epoch": 0.20990017859459512, + "grad_norm": 0.00033468400943093, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 43280 + }, + { + "epoch": 0.2099486767874312, + "grad_norm": 0.00023521148250438273, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43290 + }, + { + "epoch": 0.2099971749802673, + "grad_norm": 0.010813545435667038, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 43300 + }, + { + "epoch": 0.21004567317310338, + "grad_norm": 0.00025728141190484166, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43310 + }, + { + "epoch": 0.21009417136593947, + "grad_norm": 7.029631524346769e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43320 + }, + { + "epoch": 0.21014266955877556, + "grad_norm": 2.861136272258591e-05, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 43330 + }, + { + "epoch": 0.21019116775161165, + "grad_norm": 0.00025527618709020317, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43340 + }, + { + "epoch": 0.21023966594444773, + "grad_norm": 0.00033237782190553844, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43350 + }, + { + "epoch": 0.21028816413728382, + "grad_norm": 0.00011926570732612163, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43360 + }, + { + "epoch": 0.2103366623301199, + "grad_norm": 6.830121856182814e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 43370 + }, + { + "epoch": 0.210385160522956, + "grad_norm": 4.4074997276766226e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43380 + }, + { + "epoch": 0.21043365871579212, + "grad_norm": 6.111499533290043e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43390 + }, + { + "epoch": 0.2104821569086282, + "grad_norm": 5.0791571993613616e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43400 + }, + { + "epoch": 0.2105306551014643, + "grad_norm": 2.9939641535747796e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43410 + }, + { + "epoch": 0.21057915329430038, + "grad_norm": 2.7703265004674904e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43420 + }, + { + "epoch": 0.21062765148713647, + "grad_norm": 2.5758077754289843e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43430 + }, + { + "epoch": 0.21067614967997256, + "grad_norm": 3.504352571326308e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43440 + }, + { + "epoch": 0.21072464787280865, + "grad_norm": 2.8223354092915542e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43450 + }, + { + "epoch": 0.21077314606564473, + "grad_norm": 9.320751269115135e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43460 + }, + { + "epoch": 0.21082164425848082, + "grad_norm": 1.9032628188142553e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43470 + }, + { + "epoch": 0.2108701424513169, + "grad_norm": 1.932229679368902e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 43480 + }, + { + "epoch": 0.210918640644153, + "grad_norm": 3.593949440983124e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43490 + }, + { + "epoch": 0.2109671388369891, + "grad_norm": 3.552353518898599e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43500 + }, + { + "epoch": 0.21101563702982518, + "grad_norm": 3.776041558012366e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43510 + }, + { + "epoch": 0.21106413522266126, + "grad_norm": 3.565052247722633e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43520 + }, + { + "epoch": 0.21111263341549735, + "grad_norm": 3.1051607948029414e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43530 + }, + { + "epoch": 0.21116113160833344, + "grad_norm": 3.6865942092845216e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 43540 + }, + { + "epoch": 0.21120962980116953, + "grad_norm": 0.00151911866851151, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43550 + }, + { + "epoch": 0.21125812799400562, + "grad_norm": 0.00018920454022008926, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43560 + }, + { + "epoch": 0.2113066261868417, + "grad_norm": 0.0001346040953649208, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43570 + }, + { + "epoch": 0.2113551243796778, + "grad_norm": 8.851649181451648e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43580 + }, + { + "epoch": 0.21140362257251388, + "grad_norm": 2.371231312281452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43590 + }, + { + "epoch": 0.21145212076534997, + "grad_norm": 2.2120553694549017e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43600 + }, + { + "epoch": 0.21150061895818606, + "grad_norm": 5.221955507295206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43610 + }, + { + "epoch": 0.21154911715102215, + "grad_norm": 4.317570710554719e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43620 + }, + { + "epoch": 0.21159761534385824, + "grad_norm": 3.267827196395956e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43630 + }, + { + "epoch": 0.21164611353669435, + "grad_norm": 1.493541549280053e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43640 + }, + { + "epoch": 0.21169461172953044, + "grad_norm": 1.3505956303561106e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43650 + }, + { + "epoch": 0.21174310992236653, + "grad_norm": 2.921517261711415e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43660 + }, + { + "epoch": 0.21179160811520262, + "grad_norm": 2.63092642853735e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43670 + }, + { + "epoch": 0.2118401063080387, + "grad_norm": 2.1839854525751434e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43680 + }, + { + "epoch": 0.2118886045008748, + "grad_norm": 1.2126485671615228e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43690 + }, + { + "epoch": 0.21193710269371088, + "grad_norm": 1.180659091915004e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43700 + }, + { + "epoch": 0.21198560088654697, + "grad_norm": 1.9271519704489037e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43710 + }, + { + "epoch": 0.21203409907938306, + "grad_norm": 1.8837943571270443e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43720 + }, + { + "epoch": 0.21208259727221915, + "grad_norm": 1.787879955372773e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43730 + }, + { + "epoch": 0.21213109546505524, + "grad_norm": 1.004559817374684e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43740 + }, + { + "epoch": 0.21217959365789132, + "grad_norm": 1.1170114703418221e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43750 + }, + { + "epoch": 0.2122280918507274, + "grad_norm": 3.2711475796531886e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 43760 + }, + { + "epoch": 0.2122765900435635, + "grad_norm": 9.983505879063159e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43770 + }, + { + "epoch": 0.2123250882363996, + "grad_norm": 0.0001182563864858821, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43780 + }, + { + "epoch": 0.21237358642923568, + "grad_norm": 1.4840528820059262e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43790 + }, + { + "epoch": 0.21242208462207177, + "grad_norm": 1.2349479220574722e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43800 + }, + { + "epoch": 0.21247058281490785, + "grad_norm": 0.00029410808929242194, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 43810 + }, + { + "epoch": 0.21251908100774394, + "grad_norm": 0.0005426523275673389, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43820 + }, + { + "epoch": 0.21256757920058003, + "grad_norm": 0.0006244051037356257, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 43830 + }, + { + "epoch": 0.21261607739341612, + "grad_norm": 2.137666342605371e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43840 + }, + { + "epoch": 0.2126645755862522, + "grad_norm": 1.6570866137044504e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43850 + }, + { + "epoch": 0.2127130737790883, + "grad_norm": 0.00013003204367123544, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43860 + }, + { + "epoch": 0.21276157197192438, + "grad_norm": 8.036556391743943e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43870 + }, + { + "epoch": 0.21281007016476047, + "grad_norm": 5.282653364702128e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43880 + }, + { + "epoch": 0.21285856835759656, + "grad_norm": 8.821019946481101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43890 + }, + { + "epoch": 0.21290706655043268, + "grad_norm": 8.028933734749444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43900 + }, + { + "epoch": 0.21295556474326877, + "grad_norm": 3.1933690479490906e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43910 + }, + { + "epoch": 0.21300406293610485, + "grad_norm": 2.8740274501615204e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43920 + }, + { + "epoch": 0.21305256112894094, + "grad_norm": 2.340229184483178e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43930 + }, + { + "epoch": 0.21310105932177703, + "grad_norm": 6.986516837059753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43940 + }, + { + "epoch": 0.21314955751461312, + "grad_norm": 6.503674285340821e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43950 + }, + { + "epoch": 0.2131980557074492, + "grad_norm": 1.8739741790341213e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43960 + }, + { + "epoch": 0.2132465539002853, + "grad_norm": 1.7199536159751005e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43970 + }, + { + "epoch": 0.21329505209312138, + "grad_norm": 0.0004314961261115968, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43980 + }, + { + "epoch": 0.21334355028595747, + "grad_norm": 6.122402282926487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 43990 + }, + { + "epoch": 0.21339204847879356, + "grad_norm": 5.293768481351435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44000 + }, + { + "epoch": 0.21344054667162965, + "grad_norm": 1.418955889675999e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44010 + }, + { + "epoch": 0.21348904486446574, + "grad_norm": 1.2981986401428003e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44020 + }, + { + "epoch": 0.21353754305730183, + "grad_norm": 1.2961494576302357e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44030 + }, + { + "epoch": 0.21358604125013791, + "grad_norm": 5.512875759450253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44040 + }, + { + "epoch": 0.213634539442974, + "grad_norm": 5.000739747629268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44050 + }, + { + "epoch": 0.2136830376358101, + "grad_norm": 1.183657059300458e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44060 + }, + { + "epoch": 0.21373153582864618, + "grad_norm": 1.1075645488745067e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44070 + }, + { + "epoch": 0.21378003402148227, + "grad_norm": 8.885132046998478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44080 + }, + { + "epoch": 0.21382853221431836, + "grad_norm": 4.999870725441724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44090 + }, + { + "epoch": 0.21387703040715444, + "grad_norm": 4.923457254335517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44100 + }, + { + "epoch": 0.21392552859999053, + "grad_norm": 8.779086783761159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44110 + }, + { + "epoch": 0.21397402679282662, + "grad_norm": 8.390079528908245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44120 + }, + { + "epoch": 0.2140225249856627, + "grad_norm": 8.674586752022151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44130 + }, + { + "epoch": 0.2140710231784988, + "grad_norm": 4.525832991930656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44140 + }, + { + "epoch": 0.21411952137133491, + "grad_norm": 5.47430772712687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44150 + }, + { + "epoch": 0.214168019564171, + "grad_norm": 7.374311280727852e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 44160 + }, + { + "epoch": 0.2142165177570071, + "grad_norm": 1.3980262338009197e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44170 + }, + { + "epoch": 0.21426501594984318, + "grad_norm": 2.8427832148736343e-05, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 44180 + }, + { + "epoch": 0.21431351414267927, + "grad_norm": 8.373834862140939e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44190 + }, + { + "epoch": 0.21436201233551536, + "grad_norm": 3.1037372536957264e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44200 + }, + { + "epoch": 0.21441051052835144, + "grad_norm": 3.905515404767357e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44210 + }, + { + "epoch": 0.21445900872118753, + "grad_norm": 2.601702362881042e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44220 + }, + { + "epoch": 0.21450750691402362, + "grad_norm": 4.795237327925861e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44230 + }, + { + "epoch": 0.2145560051068597, + "grad_norm": 1.9137660274282098e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 44240 + }, + { + "epoch": 0.2146045032996958, + "grad_norm": 3.771477349800989e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44250 + }, + { + "epoch": 0.2146530014925319, + "grad_norm": 9.36249562073499e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44260 + }, + { + "epoch": 0.21470149968536797, + "grad_norm": 5.762685395893641e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44270 + }, + { + "epoch": 0.21474999787820406, + "grad_norm": 3.5511882742866874e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44280 + }, + { + "epoch": 0.21479849607104015, + "grad_norm": 3.1406398193212226e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44290 + }, + { + "epoch": 0.21484699426387624, + "grad_norm": 1.7505744835943915e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44300 + }, + { + "epoch": 0.21489549245671233, + "grad_norm": 1.9906599845853634e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44310 + }, + { + "epoch": 0.21494399064954842, + "grad_norm": 1.707895535218995e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44320 + }, + { + "epoch": 0.2149924888423845, + "grad_norm": 1.630364204174839e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44330 + }, + { + "epoch": 0.2150409870352206, + "grad_norm": 1.4498530617856886e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44340 + }, + { + "epoch": 0.21508948522805668, + "grad_norm": 1.0402871339465491e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44350 + }, + { + "epoch": 0.21513798342089277, + "grad_norm": 1.3034205949224997e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44360 + }, + { + "epoch": 0.21518648161372886, + "grad_norm": 1.2199256161693484e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44370 + }, + { + "epoch": 0.21523497980656495, + "grad_norm": 0.00023830799909774214, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44380 + }, + { + "epoch": 0.21528347799940104, + "grad_norm": 8.922660526877735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44390 + }, + { + "epoch": 0.21533197619223712, + "grad_norm": 8.581833753851242e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44400 + }, + { + "epoch": 0.21538047438507324, + "grad_norm": 1.0663887223927304e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44410 + }, + { + "epoch": 0.21542897257790933, + "grad_norm": 9.154142389888875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44420 + }, + { + "epoch": 0.21547747077074542, + "grad_norm": 1.0452767128299456e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44430 + }, + { + "epoch": 0.2155259689635815, + "grad_norm": 6.680730621155817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44440 + }, + { + "epoch": 0.2155744671564176, + "grad_norm": 6.083582775318064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44450 + }, + { + "epoch": 0.21562296534925368, + "grad_norm": 8.606567462265957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44460 + }, + { + "epoch": 0.21567146354208977, + "grad_norm": 1.2893122402601875e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 44470 + }, + { + "epoch": 0.21571996173492586, + "grad_norm": 0.00011857396748382598, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44480 + }, + { + "epoch": 0.21576845992776195, + "grad_norm": 0.00021787721198052168, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44490 + }, + { + "epoch": 0.21581695812059803, + "grad_norm": 0.00010431894042994827, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44500 + }, + { + "epoch": 0.21586545631343412, + "grad_norm": 8.553420047974214e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44510 + }, + { + "epoch": 0.2159139545062702, + "grad_norm": 4.37547241745051e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44520 + }, + { + "epoch": 0.2159624526991063, + "grad_norm": 2.286058952449821e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44530 + }, + { + "epoch": 0.2160109508919424, + "grad_norm": 2.4291159206768498e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44540 + }, + { + "epoch": 0.21605944908477848, + "grad_norm": 1.833737587730866e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44550 + }, + { + "epoch": 0.21610794727761456, + "grad_norm": 1.9251141566201113e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44560 + }, + { + "epoch": 0.21615644547045065, + "grad_norm": 0.0002246522781206295, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44570 + }, + { + "epoch": 0.21620494366328674, + "grad_norm": 1.4937793821445666e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44580 + }, + { + "epoch": 0.21625344185612283, + "grad_norm": 1.2362296729406808e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44590 + }, + { + "epoch": 0.21630194004895892, + "grad_norm": 1.0417411431262735e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44600 + }, + { + "epoch": 0.216350438241795, + "grad_norm": 1.6134748875629157e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44610 + }, + { + "epoch": 0.2163989364346311, + "grad_norm": 0.011439718306064606, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 44620 + }, + { + "epoch": 0.21644743462746718, + "grad_norm": 0.00014642674068454653, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44630 + }, + { + "epoch": 0.21649593282030327, + "grad_norm": 3.182159343850799e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44640 + }, + { + "epoch": 0.21654443101313936, + "grad_norm": 2.731760650931392e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44650 + }, + { + "epoch": 0.21659292920597545, + "grad_norm": 0.00016663342830725014, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44660 + }, + { + "epoch": 0.21664142739881156, + "grad_norm": 8.225506462622434e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44670 + }, + { + "epoch": 0.21668992559164765, + "grad_norm": 4.638740938389674e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44680 + }, + { + "epoch": 0.21673842378448374, + "grad_norm": 8.469110980513506e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44690 + }, + { + "epoch": 0.21678692197731983, + "grad_norm": 8.667516340210568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44700 + }, + { + "epoch": 0.21683542017015592, + "grad_norm": 2.4072465748758987e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44710 + }, + { + "epoch": 0.216883918362992, + "grad_norm": 2.1399760953499936e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44720 + }, + { + "epoch": 0.2169324165558281, + "grad_norm": 2.2357004127115943e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44730 + }, + { + "epoch": 0.21698091474866418, + "grad_norm": 6.2841540966473985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44740 + }, + { + "epoch": 0.21702941294150027, + "grad_norm": 6.314975962595781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44750 + }, + { + "epoch": 0.21707791113433636, + "grad_norm": 1.6169989976333454e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44760 + }, + { + "epoch": 0.21712640932717245, + "grad_norm": 1.539216100354679e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44770 + }, + { + "epoch": 0.21717490752000854, + "grad_norm": 1.4625429685111158e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44780 + }, + { + "epoch": 0.21722340571284462, + "grad_norm": 4.94152618557564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44790 + }, + { + "epoch": 0.2172719039056807, + "grad_norm": 4.747114417114062e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44800 + }, + { + "epoch": 0.2173204020985168, + "grad_norm": 1.0975892109854612e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44810 + }, + { + "epoch": 0.2173689002913529, + "grad_norm": 1.3483715520123951e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44820 + }, + { + "epoch": 0.21741739848418898, + "grad_norm": 9.745318493514787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44830 + }, + { + "epoch": 0.21746589667702507, + "grad_norm": 4.4854177758679725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44840 + }, + { + "epoch": 0.21751439486986116, + "grad_norm": 4.222958978061797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44850 + }, + { + "epoch": 0.21756289306269724, + "grad_norm": 9.383594260725658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44860 + }, + { + "epoch": 0.21761139125553333, + "grad_norm": 9.481002962274943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44870 + }, + { + "epoch": 0.21765988944836942, + "grad_norm": 7.718102096987423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44880 + }, + { + "epoch": 0.2177083876412055, + "grad_norm": 3.975800154876197e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44890 + }, + { + "epoch": 0.2177568858340416, + "grad_norm": 3.847512289212318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44900 + }, + { + "epoch": 0.21780538402687769, + "grad_norm": 7.57360703573795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44910 + }, + { + "epoch": 0.2178538822197138, + "grad_norm": 7.301033747353358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44920 + }, + { + "epoch": 0.2179023804125499, + "grad_norm": 6.887350082251942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44930 + }, + { + "epoch": 0.21795087860538598, + "grad_norm": 3.326275191284367e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44940 + }, + { + "epoch": 0.21799937679822207, + "grad_norm": 3.3402122880943352e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44950 + }, + { + "epoch": 0.21804787499105815, + "grad_norm": 6.808717444073409e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44960 + }, + { + "epoch": 0.21809637318389424, + "grad_norm": 6.124336323409807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44970 + }, + { + "epoch": 0.21814487137673033, + "grad_norm": 6.285391464189161e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44980 + }, + { + "epoch": 0.21819336956956642, + "grad_norm": 3.4455324566806667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 44990 + }, + { + "epoch": 0.2182418677624025, + "grad_norm": 2.921975465142168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45000 + }, + { + "epoch": 0.2182903659552386, + "grad_norm": 6.863240741949994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45010 + }, + { + "epoch": 0.21833886414807469, + "grad_norm": 6.439491244236706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45020 + }, + { + "epoch": 0.21838736234091077, + "grad_norm": 5.375131877372041e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45030 + }, + { + "epoch": 0.21843586053374686, + "grad_norm": 2.8406980163708795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45040 + }, + { + "epoch": 0.21848435872658295, + "grad_norm": 2.7596665859164204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45050 + }, + { + "epoch": 0.21853285691941904, + "grad_norm": 5.020009211875731e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45060 + }, + { + "epoch": 0.21858135511225513, + "grad_norm": 5.576396688411478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45070 + }, + { + "epoch": 0.21862985330509122, + "grad_norm": 4.466080554266227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45080 + }, + { + "epoch": 0.2186783514979273, + "grad_norm": 2.497227796993684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45090 + }, + { + "epoch": 0.2187268496907634, + "grad_norm": 2.633130179674481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45100 + }, + { + "epoch": 0.21877534788359948, + "grad_norm": 4.323610028222902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45110 + }, + { + "epoch": 0.21882384607643557, + "grad_norm": 4.620638264896115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45120 + }, + { + "epoch": 0.21887234426927166, + "grad_norm": 7.395290595013648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45130 + }, + { + "epoch": 0.21892084246210775, + "grad_norm": 2.604977680675802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45140 + }, + { + "epoch": 0.21896934065494383, + "grad_norm": 2.5512279080430744e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45150 + }, + { + "epoch": 0.21901783884777992, + "grad_norm": 5.8408213590155356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45160 + }, + { + "epoch": 0.219066337040616, + "grad_norm": 3.919387836504029e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45170 + }, + { + "epoch": 0.21911483523345213, + "grad_norm": 4.053290467709303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45180 + }, + { + "epoch": 0.21916333342628821, + "grad_norm": 2.4296666651935084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45190 + }, + { + "epoch": 0.2192118316191243, + "grad_norm": 2.125053015333833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45200 + }, + { + "epoch": 0.2192603298119604, + "grad_norm": 3.97516078010085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45210 + }, + { + "epoch": 0.21930882800479648, + "grad_norm": 3.5210630358051276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45220 + }, + { + "epoch": 0.21935732619763257, + "grad_norm": 3.833971732092323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45230 + }, + { + "epoch": 0.21940582439046866, + "grad_norm": 2.0776656128873583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45240 + }, + { + "epoch": 0.21945432258330475, + "grad_norm": 1.9469650851533515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45250 + }, + { + "epoch": 0.21950282077614083, + "grad_norm": 3.4114107165805763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45260 + }, + { + "epoch": 0.21955131896897692, + "grad_norm": 3.41451118401892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45270 + }, + { + "epoch": 0.219599817161813, + "grad_norm": 3.2031421142164618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45280 + }, + { + "epoch": 0.2196483153546491, + "grad_norm": 1.96579617295356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45290 + }, + { + "epoch": 0.2196968135474852, + "grad_norm": 2.0573343135765754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45300 + }, + { + "epoch": 0.21974531174032128, + "grad_norm": 3.074367441513459e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45310 + }, + { + "epoch": 0.21979380993315736, + "grad_norm": 2.9495181479433086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45320 + }, + { + "epoch": 0.21984230812599345, + "grad_norm": 3.464932888164185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45330 + }, + { + "epoch": 0.21989080631882954, + "grad_norm": 1.9328615508129587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45340 + }, + { + "epoch": 0.21993930451166563, + "grad_norm": 1.8781336166284746e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45350 + }, + { + "epoch": 0.21998780270450172, + "grad_norm": 2.6926547889161156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45360 + }, + { + "epoch": 0.2200363008973378, + "grad_norm": 2.967031377920648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45370 + }, + { + "epoch": 0.2200847990901739, + "grad_norm": 2.899075525419903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45380 + }, + { + "epoch": 0.22013329728300998, + "grad_norm": 1.6977218137981254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45390 + }, + { + "epoch": 0.22018179547584607, + "grad_norm": 1.7431229935027659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45400 + }, + { + "epoch": 0.22023029366868216, + "grad_norm": 2.603150960567291e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45410 + }, + { + "epoch": 0.22027879186151825, + "grad_norm": 2.814032541209599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45420 + }, + { + "epoch": 0.22032729005435436, + "grad_norm": 2.6110255930689164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45430 + }, + { + "epoch": 0.22037578824719045, + "grad_norm": 2.4614269023004454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45440 + }, + { + "epoch": 0.22042428644002654, + "grad_norm": 1.6376179701182991e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45450 + }, + { + "epoch": 0.22047278463286263, + "grad_norm": 3.1248903269442962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45460 + }, + { + "epoch": 0.22052128282569872, + "grad_norm": 2.3750876607664395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45470 + }, + { + "epoch": 0.2205697810185348, + "grad_norm": 2.407078000032925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45480 + }, + { + "epoch": 0.2206182792113709, + "grad_norm": 1.5733887721580686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45490 + }, + { + "epoch": 0.22066677740420698, + "grad_norm": 1.5889390851953067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45500 + }, + { + "epoch": 0.22071527559704307, + "grad_norm": 2.430323092994513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45510 + }, + { + "epoch": 0.22076377378987916, + "grad_norm": 2.3141644760471536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45520 + }, + { + "epoch": 0.22081227198271525, + "grad_norm": 2.260946985188639e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45530 + }, + { + "epoch": 0.22086077017555134, + "grad_norm": 1.4393339142770856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45540 + }, + { + "epoch": 0.22090926836838742, + "grad_norm": 1.4006312767378404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45550 + }, + { + "epoch": 0.2209577665612235, + "grad_norm": 2.3781794880051166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45560 + }, + { + "epoch": 0.2210062647540596, + "grad_norm": 2.140781816706294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45570 + }, + { + "epoch": 0.2210547629468957, + "grad_norm": 2.200533344876021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45580 + }, + { + "epoch": 0.22110326113973178, + "grad_norm": 1.6798855995148188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45590 + }, + { + "epoch": 0.22115175933256787, + "grad_norm": 1.4065210507396841e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45600 + }, + { + "epoch": 0.22120025752540395, + "grad_norm": 2.101026439049747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45610 + }, + { + "epoch": 0.22124875571824004, + "grad_norm": 2.0513343770289794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45620 + }, + { + "epoch": 0.22129725391107613, + "grad_norm": 2.0097377273486927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45630 + }, + { + "epoch": 0.22134575210391222, + "grad_norm": 1.2894433893961832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45640 + }, + { + "epoch": 0.2213942502967483, + "grad_norm": 1.379494392494962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45650 + }, + { + "epoch": 0.2214427484895844, + "grad_norm": 1.9121637251373613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45660 + }, + { + "epoch": 0.22149124668242048, + "grad_norm": 2.020604597419151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45670 + }, + { + "epoch": 0.22153974487525657, + "grad_norm": 1.7906137372847297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45680 + }, + { + "epoch": 0.2215882430680927, + "grad_norm": 1.416249574504036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45690 + }, + { + "epoch": 0.22163674126092878, + "grad_norm": 1.2185527111796546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45700 + }, + { + "epoch": 0.22168523945376487, + "grad_norm": 1.817553766159108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45710 + }, + { + "epoch": 0.22173373764660095, + "grad_norm": 1.7733360664351494e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45720 + }, + { + "epoch": 0.22178223583943704, + "grad_norm": 1.9156545931764413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45730 + }, + { + "epoch": 0.22183073403227313, + "grad_norm": 1.1507728459037025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45740 + }, + { + "epoch": 0.22187923222510922, + "grad_norm": 1.2856804687544354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45750 + }, + { + "epoch": 0.2219277304179453, + "grad_norm": 1.8569157873571385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45760 + }, + { + "epoch": 0.2219762286107814, + "grad_norm": 1.6326351897077984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45770 + }, + { + "epoch": 0.22202472680361748, + "grad_norm": 1.6426637330368976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45780 + }, + { + "epoch": 0.22207322499645357, + "grad_norm": 1.165776211564662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45790 + }, + { + "epoch": 0.22212172318928966, + "grad_norm": 1.1809303259724402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45800 + }, + { + "epoch": 0.22217022138212575, + "grad_norm": 1.710465085125179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45810 + }, + { + "epoch": 0.22221871957496184, + "grad_norm": 1.641877020119864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45820 + }, + { + "epoch": 0.22226721776779793, + "grad_norm": 1.7423000144844991e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45830 + }, + { + "epoch": 0.222315715960634, + "grad_norm": 1.0728143706728588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45840 + }, + { + "epoch": 0.2223642141534701, + "grad_norm": 1.049766865435231e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45850 + }, + { + "epoch": 0.2224127123463062, + "grad_norm": 1.6503327060490847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45860 + }, + { + "epoch": 0.22246121053914228, + "grad_norm": 1.6443480035377434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45870 + }, + { + "epoch": 0.22250970873197837, + "grad_norm": 1.5709581475675805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45880 + }, + { + "epoch": 0.22255820692481446, + "grad_norm": 1.0142100563825807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45890 + }, + { + "epoch": 0.22260670511765054, + "grad_norm": 9.430660270481894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45900 + }, + { + "epoch": 0.22265520331048663, + "grad_norm": 1.659940153331263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45910 + }, + { + "epoch": 0.22270370150332272, + "grad_norm": 1.555709786771331e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45920 + }, + { + "epoch": 0.2227521996961588, + "grad_norm": 1.5154998891375726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45930 + }, + { + "epoch": 0.2228006978889949, + "grad_norm": 9.575009016771219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45940 + }, + { + "epoch": 0.222849196081831, + "grad_norm": 1.0670682968338951e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45950 + }, + { + "epoch": 0.2228976942746671, + "grad_norm": 1.3684041277883807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45960 + }, + { + "epoch": 0.2229461924675032, + "grad_norm": 1.4004172044224106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45970 + }, + { + "epoch": 0.22299469066033928, + "grad_norm": 1.4533295598084806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45980 + }, + { + "epoch": 0.22304318885317537, + "grad_norm": 9.011251904667006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 45990 + }, + { + "epoch": 0.22309168704601146, + "grad_norm": 9.43434145028732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46000 + }, + { + "epoch": 0.22314018523884754, + "grad_norm": 1.3030524996793247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46010 + }, + { + "epoch": 0.22318868343168363, + "grad_norm": 1.4505853869195562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46020 + }, + { + "epoch": 0.22323718162451972, + "grad_norm": 1.429370854566514e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46030 + }, + { + "epoch": 0.2232856798173558, + "grad_norm": 9.280363428842975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46040 + }, + { + "epoch": 0.2233341780101919, + "grad_norm": 8.819721415420645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46050 + }, + { + "epoch": 0.22338267620302799, + "grad_norm": 1.4191191439749673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46060 + }, + { + "epoch": 0.22343117439586407, + "grad_norm": 1.295005631618551e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46070 + }, + { + "epoch": 0.22347967258870016, + "grad_norm": 1.4471181657427223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46080 + }, + { + "epoch": 0.22352817078153625, + "grad_norm": 7.962698873598129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46090 + }, + { + "epoch": 0.22357666897437234, + "grad_norm": 8.628183536529832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46100 + }, + { + "epoch": 0.22362516716720843, + "grad_norm": 1.2860773495049216e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46110 + }, + { + "epoch": 0.22367366536004452, + "grad_norm": 1.2962196933585801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46120 + }, + { + "epoch": 0.2237221635528806, + "grad_norm": 1.542503582641075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46130 + }, + { + "epoch": 0.2237706617457167, + "grad_norm": 8.063063319241337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46140 + }, + { + "epoch": 0.22381915993855278, + "grad_norm": 8.316914090755745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46150 + }, + { + "epoch": 0.22386765813138887, + "grad_norm": 1.2339108934611431e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46160 + }, + { + "epoch": 0.22391615632422496, + "grad_norm": 1.3119141613060492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46170 + }, + { + "epoch": 0.22396465451706105, + "grad_norm": 1.1961258223891491e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46180 + }, + { + "epoch": 0.22401315270989713, + "grad_norm": 7.37215032131644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46190 + }, + { + "epoch": 0.22406165090273325, + "grad_norm": 7.501252525798918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46200 + }, + { + "epoch": 0.22411014909556934, + "grad_norm": 1.3794991673421464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46210 + }, + { + "epoch": 0.22415864728840543, + "grad_norm": 1.148086312241503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46220 + }, + { + "epoch": 0.22420714548124152, + "grad_norm": 1.2449849009499303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46230 + }, + { + "epoch": 0.2242556436740776, + "grad_norm": 7.234602321659622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46240 + }, + { + "epoch": 0.2243041418669137, + "grad_norm": 7.066804528221837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46250 + }, + { + "epoch": 0.22435264005974978, + "grad_norm": 1.021553430291533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46260 + }, + { + "epoch": 0.22440113825258587, + "grad_norm": 1.1370659649401205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46270 + }, + { + "epoch": 0.22444963644542196, + "grad_norm": 1.036697881318105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46280 + }, + { + "epoch": 0.22449813463825805, + "grad_norm": 6.457621566369198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46290 + }, + { + "epoch": 0.22454663283109413, + "grad_norm": 6.822698424002738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46300 + }, + { + "epoch": 0.22459513102393022, + "grad_norm": 1.0783654715851299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46310 + }, + { + "epoch": 0.2246436292167663, + "grad_norm": 1.0810531421157066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46320 + }, + { + "epoch": 0.2246921274096024, + "grad_norm": 1.1179421335327788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46330 + }, + { + "epoch": 0.2247406256024385, + "grad_norm": 7.080973887241271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46340 + }, + { + "epoch": 0.22478912379527458, + "grad_norm": 6.834494001850544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46350 + }, + { + "epoch": 0.22483762198811066, + "grad_norm": 9.86660097623826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46360 + }, + { + "epoch": 0.22488612018094675, + "grad_norm": 1.0090478781421552e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46370 + }, + { + "epoch": 0.22493461837378284, + "grad_norm": 1.0173644113820046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46380 + }, + { + "epoch": 0.22498311656661893, + "grad_norm": 6.753836032658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46390 + }, + { + "epoch": 0.22503161475945502, + "grad_norm": 6.292310672506574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46400 + }, + { + "epoch": 0.2250801129522911, + "grad_norm": 9.787049748410936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46410 + }, + { + "epoch": 0.2251286111451272, + "grad_norm": 9.690022579889046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46420 + }, + { + "epoch": 0.22517710933796328, + "grad_norm": 9.354669714412012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46430 + }, + { + "epoch": 0.22522560753079937, + "grad_norm": 6.233655085452483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46440 + }, + { + "epoch": 0.22527410572363546, + "grad_norm": 6.47778676921007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46450 + }, + { + "epoch": 0.22532260391647158, + "grad_norm": 1.0619050954119302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46460 + }, + { + "epoch": 0.22537110210930766, + "grad_norm": 8.964826747615007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46470 + }, + { + "epoch": 0.22541960030214375, + "grad_norm": 8.973440230874985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46480 + }, + { + "epoch": 0.22546809849497984, + "grad_norm": 6.253574724723876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46490 + }, + { + "epoch": 0.22551659668781593, + "grad_norm": 6.170441793074133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46500 + }, + { + "epoch": 0.22556509488065202, + "grad_norm": 9.312890369983506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46510 + }, + { + "epoch": 0.2256135930734881, + "grad_norm": 9.454869882574712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46520 + }, + { + "epoch": 0.2256620912663242, + "grad_norm": 1.0617784482747084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46530 + }, + { + "epoch": 0.22571058945916028, + "grad_norm": 5.967881975266209e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46540 + }, + { + "epoch": 0.22575908765199637, + "grad_norm": 5.697612550648046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46550 + }, + { + "epoch": 0.22580758584483246, + "grad_norm": 8.719536594981037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46560 + }, + { + "epoch": 0.22585608403766855, + "grad_norm": 8.296636337945529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46570 + }, + { + "epoch": 0.22590458223050464, + "grad_norm": 8.83028405951336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46580 + }, + { + "epoch": 0.22595308042334072, + "grad_norm": 5.091321781947045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46590 + }, + { + "epoch": 0.2260015786161768, + "grad_norm": 5.206881610320124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46600 + }, + { + "epoch": 0.2260500768090129, + "grad_norm": 7.28955512840912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46610 + }, + { + "epoch": 0.226098575001849, + "grad_norm": 7.941089847918192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46620 + }, + { + "epoch": 0.22614707319468508, + "grad_norm": 7.153328738240816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46630 + }, + { + "epoch": 0.22619557138752117, + "grad_norm": 5.316367719387927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46640 + }, + { + "epoch": 0.22624406958035725, + "grad_norm": 4.756593057209102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46650 + }, + { + "epoch": 0.22629256777319334, + "grad_norm": 6.943943731130275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46660 + }, + { + "epoch": 0.22634106596602943, + "grad_norm": 9.65463073043793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46670 + }, + { + "epoch": 0.22638956415886552, + "grad_norm": 1.5584546417812817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46680 + }, + { + "epoch": 0.2264380623517016, + "grad_norm": 4.87239447011234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46690 + }, + { + "epoch": 0.2264865605445377, + "grad_norm": 4.846941124014847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46700 + }, + { + "epoch": 0.2265350587373738, + "grad_norm": 7.194057616288774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46710 + }, + { + "epoch": 0.2265835569302099, + "grad_norm": 7.096890612956486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46720 + }, + { + "epoch": 0.226632055123046, + "grad_norm": 7.75923467699613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46730 + }, + { + "epoch": 0.22668055331588208, + "grad_norm": 4.45511034286028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46740 + }, + { + "epoch": 0.22672905150871817, + "grad_norm": 5.262034505904012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46750 + }, + { + "epoch": 0.22677754970155425, + "grad_norm": 7.22244180906273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46760 + }, + { + "epoch": 0.22682604789439034, + "grad_norm": 6.627849415963283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46770 + }, + { + "epoch": 0.22687454608722643, + "grad_norm": 7.047014491945447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46780 + }, + { + "epoch": 0.22692304428006252, + "grad_norm": 4.2961514168382564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46790 + }, + { + "epoch": 0.2269715424728986, + "grad_norm": 4.217500588765688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46800 + }, + { + "epoch": 0.2270200406657347, + "grad_norm": 7.21424555649719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46810 + }, + { + "epoch": 0.22706853885857078, + "grad_norm": 7.038458988972707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46820 + }, + { + "epoch": 0.22711703705140687, + "grad_norm": 8.317410902236588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46830 + }, + { + "epoch": 0.22716553524424296, + "grad_norm": 4.989040007785661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46840 + }, + { + "epoch": 0.22721403343707905, + "grad_norm": 4.3218415157753043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46850 + }, + { + "epoch": 0.22726253162991514, + "grad_norm": 6.530945029226132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46860 + }, + { + "epoch": 0.22731102982275123, + "grad_norm": 7.068926493047911e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46870 + }, + { + "epoch": 0.22735952801558731, + "grad_norm": 6.130221095190791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46880 + }, + { + "epoch": 0.2274080262084234, + "grad_norm": 4.219151321649406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46890 + }, + { + "epoch": 0.2274565244012595, + "grad_norm": 4.359959859812079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46900 + }, + { + "epoch": 0.22750502259409558, + "grad_norm": 6.407441901501443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46910 + }, + { + "epoch": 0.22755352078693167, + "grad_norm": 8.518806566826242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46920 + }, + { + "epoch": 0.22760201897976776, + "grad_norm": 6.160036605251662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46930 + }, + { + "epoch": 0.22765051717260384, + "grad_norm": 4.293053166293248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46940 + }, + { + "epoch": 0.22769901536543993, + "grad_norm": 4.065442738010461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46950 + }, + { + "epoch": 0.22774751355827602, + "grad_norm": 6.504796488115971e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46960 + }, + { + "epoch": 0.22779601175111214, + "grad_norm": 5.97985547301505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46970 + }, + { + "epoch": 0.22784450994394823, + "grad_norm": 6.285674771788763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46980 + }, + { + "epoch": 0.22789300813678431, + "grad_norm": 4.1580824472475797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 46990 + }, + { + "epoch": 0.2279415063296204, + "grad_norm": 4.4451084590946266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47000 + }, + { + "epoch": 0.2279900045224565, + "grad_norm": 5.822582238579344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47010 + }, + { + "epoch": 0.22803850271529258, + "grad_norm": 6.694526746287011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47020 + }, + { + "epoch": 0.22808700090812867, + "grad_norm": 5.998629717396398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47030 + }, + { + "epoch": 0.22813549910096476, + "grad_norm": 3.931782259769534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47040 + }, + { + "epoch": 0.22818399729380084, + "grad_norm": 3.9692326936346944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47050 + }, + { + "epoch": 0.22823249548663693, + "grad_norm": 5.475804982779664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47060 + }, + { + "epoch": 0.22828099367947302, + "grad_norm": 5.928845894231927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47070 + }, + { + "epoch": 0.2283294918723091, + "grad_norm": 5.699202461073583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47080 + }, + { + "epoch": 0.2283779900651452, + "grad_norm": 3.915655213404534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47090 + }, + { + "epoch": 0.22842648825798129, + "grad_norm": 3.8276326108643843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47100 + }, + { + "epoch": 0.22847498645081737, + "grad_norm": 5.797083417746762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47110 + }, + { + "epoch": 0.22852348464365346, + "grad_norm": 7.530542802669515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47120 + }, + { + "epoch": 0.22857198283648955, + "grad_norm": 6.062423381081317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47130 + }, + { + "epoch": 0.22862048102932564, + "grad_norm": 3.7878712078054377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47140 + }, + { + "epoch": 0.22866897922216173, + "grad_norm": 3.872022205086978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47150 + }, + { + "epoch": 0.22871747741499782, + "grad_norm": 5.224760002420226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47160 + }, + { + "epoch": 0.2287659756078339, + "grad_norm": 4.876950470134034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47170 + }, + { + "epoch": 0.22881447380067, + "grad_norm": 5.514536951523041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47180 + }, + { + "epoch": 0.22886297199350608, + "grad_norm": 3.544624860296608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47190 + }, + { + "epoch": 0.22891147018634217, + "grad_norm": 4.215687852138217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47200 + }, + { + "epoch": 0.22895996837917826, + "grad_norm": 5.412067594079417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47210 + }, + { + "epoch": 0.22900846657201437, + "grad_norm": 5.377580691856565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47220 + }, + { + "epoch": 0.22905696476485046, + "grad_norm": 5.303173225001956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47230 + }, + { + "epoch": 0.22910546295768655, + "grad_norm": 3.8145586245263985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47240 + }, + { + "epoch": 0.22915396115052264, + "grad_norm": 3.7781779838041984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47250 + }, + { + "epoch": 0.22920245934335873, + "grad_norm": 5.055280212218349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47260 + }, + { + "epoch": 0.22925095753619482, + "grad_norm": 5.018656565880519e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47270 + }, + { + "epoch": 0.2292994557290309, + "grad_norm": 4.810922860087885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47280 + }, + { + "epoch": 0.229347953921867, + "grad_norm": 3.7660575458176027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47290 + }, + { + "epoch": 0.22939645211470308, + "grad_norm": 3.383771911558142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47300 + }, + { + "epoch": 0.22944495030753917, + "grad_norm": 4.717783781416074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47310 + }, + { + "epoch": 0.22949344850037526, + "grad_norm": 5.03749504332518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47320 + }, + { + "epoch": 0.22954194669321135, + "grad_norm": 4.251632788054849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47330 + }, + { + "epoch": 0.22959044488604743, + "grad_norm": 3.1721856430522166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47340 + }, + { + "epoch": 0.22963894307888352, + "grad_norm": 3.312579224257206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47350 + }, + { + "epoch": 0.2296874412717196, + "grad_norm": 5.531454121410206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47360 + }, + { + "epoch": 0.2297359394645557, + "grad_norm": 4.6958979282862856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47370 + }, + { + "epoch": 0.2297844376573918, + "grad_norm": 4.766655479215842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47380 + }, + { + "epoch": 0.22983293585022788, + "grad_norm": 3.535079713401501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47390 + }, + { + "epoch": 0.22988143404306396, + "grad_norm": 3.652123155006848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47400 + }, + { + "epoch": 0.22992993223590005, + "grad_norm": 4.7529169933113735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47410 + }, + { + "epoch": 0.22997843042873614, + "grad_norm": 4.732192735446006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47420 + }, + { + "epoch": 0.23002692862157223, + "grad_norm": 4.87831584905507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47430 + }, + { + "epoch": 0.23007542681440832, + "grad_norm": 3.359229481247894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47440 + }, + { + "epoch": 0.2301239250072444, + "grad_norm": 3.361919596045482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47450 + }, + { + "epoch": 0.2301724232000805, + "grad_norm": 5.104814704282035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47460 + }, + { + "epoch": 0.23022092139291658, + "grad_norm": 4.2973613290087087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47470 + }, + { + "epoch": 0.2302694195857527, + "grad_norm": 4.422564927608619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47480 + }, + { + "epoch": 0.2303179177785888, + "grad_norm": 3.1906009212434583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47490 + }, + { + "epoch": 0.23036641597142488, + "grad_norm": 3.040141507426597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47500 + }, + { + "epoch": 0.23041491416426096, + "grad_norm": 7.737234568594431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47510 + }, + { + "epoch": 0.23046341235709705, + "grad_norm": 4.393546930714365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47520 + }, + { + "epoch": 0.23051191054993314, + "grad_norm": 4.5484804900297604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47530 + }, + { + "epoch": 0.23056040874276923, + "grad_norm": 2.912997558723873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47540 + }, + { + "epoch": 0.23060890693560532, + "grad_norm": 3.3046262615243904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47550 + }, + { + "epoch": 0.2306574051284414, + "grad_norm": 5.359555075301614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47560 + }, + { + "epoch": 0.2307059033212775, + "grad_norm": 4.6504598572028044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47570 + }, + { + "epoch": 0.23075440151411358, + "grad_norm": 4.464185110464314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47580 + }, + { + "epoch": 0.23080289970694967, + "grad_norm": 3.1201648198475596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47590 + }, + { + "epoch": 0.23085139789978576, + "grad_norm": 3.0552249086213124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47600 + }, + { + "epoch": 0.23089989609262185, + "grad_norm": 4.385003933293774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47610 + }, + { + "epoch": 0.23094839428545794, + "grad_norm": 4.93383538469061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47620 + }, + { + "epoch": 0.23099689247829402, + "grad_norm": 4.4579252289622673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47630 + }, + { + "epoch": 0.2310453906711301, + "grad_norm": 3.2101058877742616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47640 + }, + { + "epoch": 0.2310938888639662, + "grad_norm": 2.931382994120213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47650 + }, + { + "epoch": 0.2311423870568023, + "grad_norm": 4.003274227670772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47660 + }, + { + "epoch": 0.23119088524963838, + "grad_norm": 4.355181886239734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47670 + }, + { + "epoch": 0.23123938344247447, + "grad_norm": 4.025890518732922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47680 + }, + { + "epoch": 0.23128788163531055, + "grad_norm": 2.904309326368093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47690 + }, + { + "epoch": 0.23133637982814664, + "grad_norm": 2.841889852334134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47700 + }, + { + "epoch": 0.23138487802098273, + "grad_norm": 3.9598052126166294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47710 + }, + { + "epoch": 0.23143337621381882, + "grad_norm": 3.7491813031920174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47720 + }, + { + "epoch": 0.2314818744066549, + "grad_norm": 3.6541410963764065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47730 + }, + { + "epoch": 0.23153037259949102, + "grad_norm": 2.849353109013464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47740 + }, + { + "epoch": 0.2315788707923271, + "grad_norm": 2.7296101734464173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47750 + }, + { + "epoch": 0.2316273689851632, + "grad_norm": 4.1672686279525806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47760 + }, + { + "epoch": 0.2316758671779993, + "grad_norm": 4.304309868530254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47770 + }, + { + "epoch": 0.23172436537083538, + "grad_norm": 5.354825134418206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47780 + }, + { + "epoch": 0.23177286356367147, + "grad_norm": 2.735597206537932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47790 + }, + { + "epoch": 0.23182136175650755, + "grad_norm": 2.688077813672862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47800 + }, + { + "epoch": 0.23186985994934364, + "grad_norm": 3.5277165011393663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47810 + }, + { + "epoch": 0.23191835814217973, + "grad_norm": 3.823568022198742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47820 + }, + { + "epoch": 0.23196685633501582, + "grad_norm": 4.2533415012258047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47830 + }, + { + "epoch": 0.2320153545278519, + "grad_norm": 4.377481559458829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47840 + }, + { + "epoch": 0.232063852720688, + "grad_norm": 2.79572390127214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47850 + }, + { + "epoch": 0.23211235091352408, + "grad_norm": 3.9053614386830304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47860 + }, + { + "epoch": 0.23216084910636017, + "grad_norm": 3.355796991399984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47870 + }, + { + "epoch": 0.23220934729919626, + "grad_norm": 3.9482688407588284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47880 + }, + { + "epoch": 0.23225784549203235, + "grad_norm": 2.81404396673679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47890 + }, + { + "epoch": 0.23230634368486844, + "grad_norm": 2.741161040376028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47900 + }, + { + "epoch": 0.23235484187770453, + "grad_norm": 3.4888196864812926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47910 + }, + { + "epoch": 0.23240334007054061, + "grad_norm": 4.188155173778796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47920 + }, + { + "epoch": 0.2324518382633767, + "grad_norm": 3.760046070055978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47930 + }, + { + "epoch": 0.2325003364562128, + "grad_norm": 2.624817909691046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47940 + }, + { + "epoch": 0.23254883464904888, + "grad_norm": 2.660643758645165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47950 + }, + { + "epoch": 0.23259733284188497, + "grad_norm": 3.571285560610704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47960 + }, + { + "epoch": 0.23264583103472106, + "grad_norm": 3.322715258491371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47970 + }, + { + "epoch": 0.23269432922755715, + "grad_norm": 3.568807471765467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47980 + }, + { + "epoch": 0.23274282742039326, + "grad_norm": 2.564481178524147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 47990 + }, + { + "epoch": 0.23279132561322935, + "grad_norm": 2.534048633151542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48000 + }, + { + "epoch": 0.23283982380606544, + "grad_norm": 3.3263361842728045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48010 + }, + { + "epoch": 0.23288832199890153, + "grad_norm": 3.8225903153943364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48020 + }, + { + "epoch": 0.23293682019173761, + "grad_norm": 3.4780609325935075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48030 + }, + { + "epoch": 0.2329853183845737, + "grad_norm": 2.683886748400255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48040 + }, + { + "epoch": 0.2330338165774098, + "grad_norm": 2.613304843634978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48050 + }, + { + "epoch": 0.23308231477024588, + "grad_norm": 3.627840214903699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48060 + }, + { + "epoch": 0.23313081296308197, + "grad_norm": 3.514620345868025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48070 + }, + { + "epoch": 0.23317931115591806, + "grad_norm": 3.1807408618078625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48080 + }, + { + "epoch": 0.23322780934875414, + "grad_norm": 2.4817066446303215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48090 + }, + { + "epoch": 0.23327630754159023, + "grad_norm": 2.350444958665321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48100 + }, + { + "epoch": 0.23332480573442632, + "grad_norm": 3.617943491462938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48110 + }, + { + "epoch": 0.2333733039272624, + "grad_norm": 3.2123989512911066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48120 + }, + { + "epoch": 0.2334218021200985, + "grad_norm": 3.3195112791872816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48130 + }, + { + "epoch": 0.2334703003129346, + "grad_norm": 2.5961423943954287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48140 + }, + { + "epoch": 0.23351879850577067, + "grad_norm": 2.3743346844184998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48150 + }, + { + "epoch": 0.23356729669860676, + "grad_norm": 3.27854309034592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48160 + }, + { + "epoch": 0.23361579489144285, + "grad_norm": 3.270105537467316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48170 + }, + { + "epoch": 0.23366429308427894, + "grad_norm": 3.5278384302728227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48180 + }, + { + "epoch": 0.23371279127711503, + "grad_norm": 2.4579503588029183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48190 + }, + { + "epoch": 0.23376128946995112, + "grad_norm": 2.3526816050889465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48200 + }, + { + "epoch": 0.2338097876627872, + "grad_norm": 3.955198053517961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48210 + }, + { + "epoch": 0.2338582858556233, + "grad_norm": 2.8577929356288223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48220 + }, + { + "epoch": 0.23390678404845938, + "grad_norm": 3.4342298249612213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48230 + }, + { + "epoch": 0.23395528224129547, + "grad_norm": 2.526910520828096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48240 + }, + { + "epoch": 0.2340037804341316, + "grad_norm": 2.392519604654808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48250 + }, + { + "epoch": 0.23405227862696767, + "grad_norm": 3.3941586252694833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48260 + }, + { + "epoch": 0.23410077681980376, + "grad_norm": 3.1752963991493743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48270 + }, + { + "epoch": 0.23414927501263985, + "grad_norm": 3.4321251973779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48280 + }, + { + "epoch": 0.23419777320547594, + "grad_norm": 2.4502634232703713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48290 + }, + { + "epoch": 0.23424627139831203, + "grad_norm": 2.8347960778773995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48300 + }, + { + "epoch": 0.23429476959114812, + "grad_norm": 2.931745939349639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48310 + }, + { + "epoch": 0.2343432677839842, + "grad_norm": 3.521186613397731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48320 + }, + { + "epoch": 0.2343917659768203, + "grad_norm": 3.1518422360932163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48330 + }, + { + "epoch": 0.23444026416965638, + "grad_norm": 2.376828831529565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48340 + }, + { + "epoch": 0.23448876236249247, + "grad_norm": 2.3673914029131993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48350 + }, + { + "epoch": 0.23453726055532856, + "grad_norm": 3.2573765906818153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48360 + }, + { + "epoch": 0.23458575874816465, + "grad_norm": 2.9459496886374836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48370 + }, + { + "epoch": 0.23463425694100074, + "grad_norm": 3.220509654511261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48380 + }, + { + "epoch": 0.23468275513383682, + "grad_norm": 2.3693593220741604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48390 + }, + { + "epoch": 0.2347312533266729, + "grad_norm": 2.29792135542084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48400 + }, + { + "epoch": 0.234779751519509, + "grad_norm": 3.11954806875292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48410 + }, + { + "epoch": 0.2348282497123451, + "grad_norm": 2.8807525609408913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48420 + }, + { + "epoch": 0.23487674790518118, + "grad_norm": 2.899955973134638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48430 + }, + { + "epoch": 0.23492524609801727, + "grad_norm": 2.2804145771715412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48440 + }, + { + "epoch": 0.23497374429085335, + "grad_norm": 2.2856944781324273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48450 + }, + { + "epoch": 0.23502224248368944, + "grad_norm": 2.880528882087674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48460 + }, + { + "epoch": 0.23507074067652553, + "grad_norm": 2.989370386785595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48470 + }, + { + "epoch": 0.23511923886936162, + "grad_norm": 2.838427519691322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48480 + }, + { + "epoch": 0.2351677370621977, + "grad_norm": 2.1949394124476385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48490 + }, + { + "epoch": 0.23521623525503382, + "grad_norm": 2.5712438400660176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48500 + }, + { + "epoch": 0.2352647334478699, + "grad_norm": 2.832934171692614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48510 + }, + { + "epoch": 0.235313231640706, + "grad_norm": 2.617356074097188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48520 + }, + { + "epoch": 0.2353617298335421, + "grad_norm": 2.776155554329307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48530 + }, + { + "epoch": 0.23541022802637818, + "grad_norm": 2.2578770142445137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48540 + }, + { + "epoch": 0.23545872621921426, + "grad_norm": 2.1512153125513578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48550 + }, + { + "epoch": 0.23550722441205035, + "grad_norm": 3.1964538038664614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48560 + }, + { + "epoch": 0.23555572260488644, + "grad_norm": 2.6392598329039174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48570 + }, + { + "epoch": 0.23560422079772253, + "grad_norm": 2.990802840940887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48580 + }, + { + "epoch": 0.23565271899055862, + "grad_norm": 2.334776922907622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48590 + }, + { + "epoch": 0.2357012171833947, + "grad_norm": 2.364445634839285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48600 + }, + { + "epoch": 0.2357497153762308, + "grad_norm": 6.265290153351089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48610 + }, + { + "epoch": 0.23579821356906688, + "grad_norm": 2.764297732937848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48620 + }, + { + "epoch": 0.23584671176190297, + "grad_norm": 2.6731007096714166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48630 + }, + { + "epoch": 0.23589520995473906, + "grad_norm": 2.1477391953794722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48640 + }, + { + "epoch": 0.23594370814757515, + "grad_norm": 2.1253104875995632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48650 + }, + { + "epoch": 0.23599220634041124, + "grad_norm": 2.7090089815828833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48660 + }, + { + "epoch": 0.23604070453324733, + "grad_norm": 2.725833212480211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48670 + }, + { + "epoch": 0.2360892027260834, + "grad_norm": 2.5163924988191866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48680 + }, + { + "epoch": 0.2361377009189195, + "grad_norm": 2.1383868897828506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48690 + }, + { + "epoch": 0.2361861991117556, + "grad_norm": 2.1915127490501618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48700 + }, + { + "epoch": 0.23623469730459168, + "grad_norm": 2.585722427284054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48710 + }, + { + "epoch": 0.23628319549742777, + "grad_norm": 2.504109488654649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48720 + }, + { + "epoch": 0.23633169369026386, + "grad_norm": 2.6648248763194715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48730 + }, + { + "epoch": 0.23638019188309994, + "grad_norm": 2.6800938712767675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48740 + }, + { + "epoch": 0.23642869007593603, + "grad_norm": 2.0787184951132076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48750 + }, + { + "epoch": 0.23647718826877215, + "grad_norm": 2.490204451532918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48760 + }, + { + "epoch": 0.23652568646160824, + "grad_norm": 2.9380544219748117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48770 + }, + { + "epoch": 0.23657418465444432, + "grad_norm": 2.5528206037961354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48780 + }, + { + "epoch": 0.2366226828472804, + "grad_norm": 2.1240050784854247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48790 + }, + { + "epoch": 0.2366711810401165, + "grad_norm": 2.0387923882481118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48800 + }, + { + "epoch": 0.2367196792329526, + "grad_norm": 2.439998922909581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48810 + }, + { + "epoch": 0.23676817742578868, + "grad_norm": 2.4022372713261575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48820 + }, + { + "epoch": 0.23681667561862477, + "grad_norm": 2.6069409386764164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48830 + }, + { + "epoch": 0.23686517381146086, + "grad_norm": 2.0965954661278374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48840 + }, + { + "epoch": 0.23691367200429694, + "grad_norm": 2.075931320177915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48850 + }, + { + "epoch": 0.23696217019713303, + "grad_norm": 2.569440766819753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48860 + }, + { + "epoch": 0.23701066838996912, + "grad_norm": 2.510998058369296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48870 + }, + { + "epoch": 0.2370591665828052, + "grad_norm": 2.436386239423882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48880 + }, + { + "epoch": 0.2371076647756413, + "grad_norm": 2.0751271279095818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48890 + }, + { + "epoch": 0.23715616296847739, + "grad_norm": 2.0772077391484345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48900 + }, + { + "epoch": 0.23720466116131347, + "grad_norm": 2.307507855903168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48910 + }, + { + "epoch": 0.23725315935414956, + "grad_norm": 4.0234644416159426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48920 + }, + { + "epoch": 0.23730165754698565, + "grad_norm": 2.3602282794854546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48930 + }, + { + "epoch": 0.23735015573982174, + "grad_norm": 1.998138969838692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48940 + }, + { + "epoch": 0.23739865393265783, + "grad_norm": 2.0076535633961612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48950 + }, + { + "epoch": 0.23744715212549392, + "grad_norm": 2.3609996446793957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48960 + }, + { + "epoch": 0.23749565031833, + "grad_norm": 2.2622930373472627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48970 + }, + { + "epoch": 0.2375441485111661, + "grad_norm": 2.3749184663302003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48980 + }, + { + "epoch": 0.23759264670400218, + "grad_norm": 1.9834313036426465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 48990 + }, + { + "epoch": 0.23764114489683827, + "grad_norm": 1.9465274192498327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49000 + }, + { + "epoch": 0.23768964308967436, + "grad_norm": 2.416691700091178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49010 + }, + { + "epoch": 0.23773814128251047, + "grad_norm": 2.2650141318081296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49020 + }, + { + "epoch": 0.23778663947534656, + "grad_norm": 2.2669669874630927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49030 + }, + { + "epoch": 0.23783513766818265, + "grad_norm": 1.997895822114515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49040 + }, + { + "epoch": 0.23788363586101874, + "grad_norm": 2.0219519569764088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49050 + }, + { + "epoch": 0.23793213405385483, + "grad_norm": 2.3097520340797928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49060 + }, + { + "epoch": 0.23798063224669092, + "grad_norm": 2.264962262188419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49070 + }, + { + "epoch": 0.238029130439527, + "grad_norm": 2.2000631361152045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49080 + }, + { + "epoch": 0.2380776286323631, + "grad_norm": 1.9757078462134814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49090 + }, + { + "epoch": 0.23812612682519918, + "grad_norm": 1.8599368445393338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49100 + }, + { + "epoch": 0.23817462501803527, + "grad_norm": 2.3549731054117728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49110 + }, + { + "epoch": 0.23822312321087136, + "grad_norm": 2.373413110490219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49120 + }, + { + "epoch": 0.23827162140370745, + "grad_norm": 2.2244013564431953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49130 + }, + { + "epoch": 0.23832011959654353, + "grad_norm": 1.89929409089018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49140 + }, + { + "epoch": 0.23836861778937962, + "grad_norm": 1.9468016887458361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49150 + }, + { + "epoch": 0.2384171159822157, + "grad_norm": 2.2572150726318796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49160 + }, + { + "epoch": 0.2384656141750518, + "grad_norm": 2.1516967763091088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49170 + }, + { + "epoch": 0.2385141123678879, + "grad_norm": 2.2423272127980454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49180 + }, + { + "epoch": 0.23856261056072398, + "grad_norm": 1.902776034512499e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49190 + }, + { + "epoch": 0.23861110875356006, + "grad_norm": 2.0293704494633857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49200 + }, + { + "epoch": 0.23865960694639615, + "grad_norm": 2.216505663454882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49210 + }, + { + "epoch": 0.23870810513923224, + "grad_norm": 2.2269284727371996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49220 + }, + { + "epoch": 0.23875660333206833, + "grad_norm": 2.3425745609984006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49230 + }, + { + "epoch": 0.23880510152490442, + "grad_norm": 1.8860316686186707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49240 + }, + { + "epoch": 0.2388535997177405, + "grad_norm": 1.8714140992415196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49250 + }, + { + "epoch": 0.2389020979105766, + "grad_norm": 2.1842480180112034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49260 + }, + { + "epoch": 0.2389505961034127, + "grad_norm": 2.153907843194247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49270 + }, + { + "epoch": 0.2389990942962488, + "grad_norm": 2.1342668787838193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49280 + }, + { + "epoch": 0.2390475924890849, + "grad_norm": 1.8890844444285904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49290 + }, + { + "epoch": 0.23909609068192098, + "grad_norm": 1.9861498401496647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49300 + }, + { + "epoch": 0.23914458887475706, + "grad_norm": 2.0938209388532414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49310 + }, + { + "epoch": 0.23919308706759315, + "grad_norm": 2.40985258415094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49320 + }, + { + "epoch": 0.23924158526042924, + "grad_norm": 2.0978900749923923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49330 + }, + { + "epoch": 0.23929008345326533, + "grad_norm": 1.8507417109958624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49340 + }, + { + "epoch": 0.23933858164610142, + "grad_norm": 1.8462873185853823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49350 + }, + { + "epoch": 0.2393870798389375, + "grad_norm": 2.0941139666774689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49360 + }, + { + "epoch": 0.2394355780317736, + "grad_norm": 2.0235484043951146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49370 + }, + { + "epoch": 0.23948407622460968, + "grad_norm": 2.102693770211772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49380 + }, + { + "epoch": 0.23953257441744577, + "grad_norm": 1.8222085884644912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49390 + }, + { + "epoch": 0.23958107261028186, + "grad_norm": 1.807187146596334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49400 + }, + { + "epoch": 0.23962957080311795, + "grad_norm": 2.087585357912758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49410 + }, + { + "epoch": 0.23967806899595404, + "grad_norm": 2.0635229702747893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49420 + }, + { + "epoch": 0.23972656718879012, + "grad_norm": 2.1431991115150595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49430 + }, + { + "epoch": 0.2397750653816262, + "grad_norm": 1.7938387486537977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49440 + }, + { + "epoch": 0.2398235635744623, + "grad_norm": 1.7771129989796464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49450 + }, + { + "epoch": 0.2398720617672984, + "grad_norm": 2.065898172531888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49460 + }, + { + "epoch": 0.23992055996013448, + "grad_norm": 2.5016598215188424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49470 + }, + { + "epoch": 0.23996905815297057, + "grad_norm": 2.115996977636314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49480 + }, + { + "epoch": 0.24001755634580665, + "grad_norm": 1.8095032316978177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49490 + }, + { + "epoch": 0.24006605453864274, + "grad_norm": 1.8111583699464973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49500 + }, + { + "epoch": 0.24011455273147883, + "grad_norm": 1.9734073930521845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49510 + }, + { + "epoch": 0.24016305092431492, + "grad_norm": 2.0255194499441131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49520 + }, + { + "epoch": 0.24021154911715104, + "grad_norm": 1.9958959285304445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49530 + }, + { + "epoch": 0.24026004730998712, + "grad_norm": 1.809997058899171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49540 + }, + { + "epoch": 0.2403085455028232, + "grad_norm": 1.734884023107952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49550 + }, + { + "epoch": 0.2403570436956593, + "grad_norm": 2.0847804194090713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49560 + }, + { + "epoch": 0.2404055418884954, + "grad_norm": 2.0905336839405209e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49570 + }, + { + "epoch": 0.24045404008133148, + "grad_norm": 1.9979458443231124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49580 + }, + { + "epoch": 0.24050253827416757, + "grad_norm": 1.8572828253127227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49590 + }, + { + "epoch": 0.24055103646700365, + "grad_norm": 1.7612606484362914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49600 + }, + { + "epoch": 0.24059953465983974, + "grad_norm": 1.97830061665627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49610 + }, + { + "epoch": 0.24064803285267583, + "grad_norm": 1.877586868204162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49620 + }, + { + "epoch": 0.24069653104551192, + "grad_norm": 1.9127132588891982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49630 + }, + { + "epoch": 0.240745029238348, + "grad_norm": 1.7700276089271938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49640 + }, + { + "epoch": 0.2407935274311841, + "grad_norm": 1.7728092416291474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49650 + }, + { + "epoch": 0.24084202562402018, + "grad_norm": 1.9119377725473896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49660 + }, + { + "epoch": 0.24089052381685627, + "grad_norm": 1.9360275871349586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49670 + }, + { + "epoch": 0.24093902200969236, + "grad_norm": 7.825407237760373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49680 + }, + { + "epoch": 0.24098752020252845, + "grad_norm": 1.819594643848177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49690 + }, + { + "epoch": 0.24103601839536454, + "grad_norm": 1.7375609218106547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49700 + }, + { + "epoch": 0.24108451658820063, + "grad_norm": 2.0458388405586447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49710 + }, + { + "epoch": 0.24113301478103671, + "grad_norm": 1.9711042398284917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49720 + }, + { + "epoch": 0.2411815129738728, + "grad_norm": 1.8766534992664674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49730 + }, + { + "epoch": 0.2412300111667089, + "grad_norm": 1.7324416035080503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49740 + }, + { + "epoch": 0.24127850935954498, + "grad_norm": 1.7363652204949176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49750 + }, + { + "epoch": 0.24132700755238107, + "grad_norm": 1.9019165620193235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49760 + }, + { + "epoch": 0.24137550574521716, + "grad_norm": 1.9088862757143943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49770 + }, + { + "epoch": 0.24142400393805327, + "grad_norm": 1.7976319099943794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49780 + }, + { + "epoch": 0.24147250213088936, + "grad_norm": 1.7239675287328282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49790 + }, + { + "epoch": 0.24152100032372545, + "grad_norm": 1.7337859503641084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49800 + }, + { + "epoch": 0.24156949851656154, + "grad_norm": 1.9434645537330653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49810 + }, + { + "epoch": 0.24161799670939763, + "grad_norm": 1.8413567204333958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49820 + }, + { + "epoch": 0.24166649490223371, + "grad_norm": 1.850690978244529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49830 + }, + { + "epoch": 0.2417149930950698, + "grad_norm": 1.7120484585575468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49840 + }, + { + "epoch": 0.2417634912879059, + "grad_norm": 1.7340477143079625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49850 + }, + { + "epoch": 0.24181198948074198, + "grad_norm": 1.9753535696054314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49860 + }, + { + "epoch": 0.24186048767357807, + "grad_norm": 1.866066554612189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49870 + }, + { + "epoch": 0.24190898586641416, + "grad_norm": 1.884096860749196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49880 + }, + { + "epoch": 0.24195748405925024, + "grad_norm": 1.7587208844815905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49890 + }, + { + "epoch": 0.24200598225208633, + "grad_norm": 1.7309753275185358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49900 + }, + { + "epoch": 0.24205448044492242, + "grad_norm": 1.8504181298339972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49910 + }, + { + "epoch": 0.2421029786377585, + "grad_norm": 1.9329951328472816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49920 + }, + { + "epoch": 0.2421514768305946, + "grad_norm": 1.8943001123261638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49930 + }, + { + "epoch": 0.24219997502343069, + "grad_norm": 1.6820226278468908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49940 + }, + { + "epoch": 0.24224847321626677, + "grad_norm": 1.6729511287394416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49950 + }, + { + "epoch": 0.24229697140910286, + "grad_norm": 1.8437323490161361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49960 + }, + { + "epoch": 0.24234546960193895, + "grad_norm": 1.8636282561601547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49970 + }, + { + "epoch": 0.24239396779477504, + "grad_norm": 1.850719826279601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49980 + }, + { + "epoch": 0.24244246598761113, + "grad_norm": 1.675720255889246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 49990 + }, + { + "epoch": 0.24249096418044722, + "grad_norm": 2.987940888488083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50000 + }, + { + "epoch": 0.2425394623732833, + "grad_norm": 1.7412357067314588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50010 + }, + { + "epoch": 0.2425879605661194, + "grad_norm": 1.7667605334281689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50020 + }, + { + "epoch": 0.24263645875895548, + "grad_norm": 1.8438710469581565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50030 + }, + { + "epoch": 0.2426849569517916, + "grad_norm": 1.636619515466009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50040 + }, + { + "epoch": 0.24273345514462769, + "grad_norm": 1.673514304911805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50050 + }, + { + "epoch": 0.24278195333746377, + "grad_norm": 1.849271029641386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50060 + }, + { + "epoch": 0.24283045153029986, + "grad_norm": 1.7072341051971307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50070 + }, + { + "epoch": 0.24287894972313595, + "grad_norm": 1.7432665799788083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50080 + }, + { + "epoch": 0.24292744791597204, + "grad_norm": 1.621430953946401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50090 + }, + { + "epoch": 0.24297594610880813, + "grad_norm": 1.6564771954108437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50100 + }, + { + "epoch": 0.24302444430164422, + "grad_norm": 1.9309962340230413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50110 + }, + { + "epoch": 0.2430729424944803, + "grad_norm": 1.6783282319465798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50120 + }, + { + "epoch": 0.2431214406873164, + "grad_norm": 1.7650604888785892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50130 + }, + { + "epoch": 0.24316993888015248, + "grad_norm": 1.6459708263028006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50140 + }, + { + "epoch": 0.24321843707298857, + "grad_norm": 1.6531798507912754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50150 + }, + { + "epoch": 0.24326693526582466, + "grad_norm": 1.6759537402322167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50160 + }, + { + "epoch": 0.24331543345866075, + "grad_norm": 1.645923504156599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50170 + }, + { + "epoch": 0.24336393165149683, + "grad_norm": 1.7020319376115367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50180 + }, + { + "epoch": 0.24341242984433292, + "grad_norm": 1.5699907862654072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50190 + }, + { + "epoch": 0.243460928037169, + "grad_norm": 1.611189190953155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50200 + }, + { + "epoch": 0.2435094262300051, + "grad_norm": 1.6687815218574542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50210 + }, + { + "epoch": 0.2435579244228412, + "grad_norm": 1.7368071780765604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50220 + }, + { + "epoch": 0.24360642261567728, + "grad_norm": 1.7089008963466767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50230 + }, + { + "epoch": 0.24365492080851336, + "grad_norm": 1.5880328874118277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50240 + }, + { + "epoch": 0.24370341900134945, + "grad_norm": 1.6282952230994852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50250 + }, + { + "epoch": 0.24375191719418554, + "grad_norm": 1.720870415056197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50260 + }, + { + "epoch": 0.24380041538702163, + "grad_norm": 1.7442289390601218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50270 + }, + { + "epoch": 0.24384891357985772, + "grad_norm": 9.286865747526463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50280 + }, + { + "epoch": 0.2438974117726938, + "grad_norm": 1.621113909777705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50290 + }, + { + "epoch": 0.24394590996552992, + "grad_norm": 1.5612131676334684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50300 + }, + { + "epoch": 0.243994408158366, + "grad_norm": 1.705084429204362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50310 + }, + { + "epoch": 0.2440429063512021, + "grad_norm": 1.718307061082669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50320 + }, + { + "epoch": 0.2440914045440382, + "grad_norm": 1.706289225467117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50330 + }, + { + "epoch": 0.24413990273687428, + "grad_norm": 1.579677899599119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50340 + }, + { + "epoch": 0.24418840092971036, + "grad_norm": 1.561424909368725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50350 + }, + { + "epoch": 0.24423689912254645, + "grad_norm": 1.6560542803745193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50360 + }, + { + "epoch": 0.24428539731538254, + "grad_norm": 1.6700802518698765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50370 + }, + { + "epoch": 0.24433389550821863, + "grad_norm": 1.6463108920561353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50380 + }, + { + "epoch": 0.24438239370105472, + "grad_norm": 1.5774944017721282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50390 + }, + { + "epoch": 0.2444308918938908, + "grad_norm": 1.5534158137597842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50400 + }, + { + "epoch": 0.2444793900867269, + "grad_norm": 1.6500398203334044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50410 + }, + { + "epoch": 0.24452788827956298, + "grad_norm": 1.6443954109490733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50420 + }, + { + "epoch": 0.24457638647239907, + "grad_norm": 1.639586599821996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50430 + }, + { + "epoch": 0.24462488466523516, + "grad_norm": 1.5649206375201175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50440 + }, + { + "epoch": 0.24467338285807125, + "grad_norm": 1.54122943740731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50450 + }, + { + "epoch": 0.24472188105090734, + "grad_norm": 1.743262600939488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50460 + }, + { + "epoch": 0.24477037924374342, + "grad_norm": 1.618316076701376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50470 + }, + { + "epoch": 0.2448188774365795, + "grad_norm": 1.6806583857942314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50480 + }, + { + "epoch": 0.2448673756294156, + "grad_norm": 1.516876011464774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50490 + }, + { + "epoch": 0.2449158738222517, + "grad_norm": 1.5678625686632586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50500 + }, + { + "epoch": 0.24496437201508778, + "grad_norm": 1.600227648168584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50510 + }, + { + "epoch": 0.24501287020792387, + "grad_norm": 1.6032737448767875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50520 + }, + { + "epoch": 0.24506136840075995, + "grad_norm": 1.640070763642143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50530 + }, + { + "epoch": 0.24510986659359604, + "grad_norm": 1.5147161036566104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50540 + }, + { + "epoch": 0.24515836478643216, + "grad_norm": 1.5559440669221658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50550 + }, + { + "epoch": 0.24520686297926825, + "grad_norm": 1.5779338013999222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50560 + }, + { + "epoch": 0.24525536117210434, + "grad_norm": 1.5785062146278506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50570 + }, + { + "epoch": 0.24530385936494042, + "grad_norm": 1.5518999418873136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50580 + }, + { + "epoch": 0.2453523575577765, + "grad_norm": 1.4956340521621314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50590 + }, + { + "epoch": 0.2454008557506126, + "grad_norm": 1.5447626822151506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50600 + }, + { + "epoch": 0.2454493539434487, + "grad_norm": 1.5757409244088194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50610 + }, + { + "epoch": 0.24549785213628478, + "grad_norm": 1.6351921772184141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50620 + }, + { + "epoch": 0.24554635032912087, + "grad_norm": 1.5308395973079314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50630 + }, + { + "epoch": 0.24559484852195695, + "grad_norm": 1.5200971859030687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50640 + }, + { + "epoch": 0.24564334671479304, + "grad_norm": 1.4749252841284033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50650 + }, + { + "epoch": 0.24569184490762913, + "grad_norm": 1.6253237333785364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50660 + }, + { + "epoch": 0.24574034310046522, + "grad_norm": 1.5700867095347348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50670 + }, + { + "epoch": 0.2457888412933013, + "grad_norm": 1.4866384390188614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50680 + }, + { + "epoch": 0.2458373394861374, + "grad_norm": 1.476020656809851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50690 + }, + { + "epoch": 0.24588583767897348, + "grad_norm": 1.4973373652082955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50700 + }, + { + "epoch": 0.24593433587180957, + "grad_norm": 1.5843635026158154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50710 + }, + { + "epoch": 0.24598283406464566, + "grad_norm": 1.563439582241699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50720 + }, + { + "epoch": 0.24603133225748175, + "grad_norm": 1.5518700990924117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50730 + }, + { + "epoch": 0.24607983045031784, + "grad_norm": 1.478279898492474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50740 + }, + { + "epoch": 0.24612832864315393, + "grad_norm": 1.5702862299349363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50750 + }, + { + "epoch": 0.24617682683599001, + "grad_norm": 1.544721044410835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50760 + }, + { + "epoch": 0.2462253250288261, + "grad_norm": 1.7743781199897057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50770 + }, + { + "epoch": 0.2462738232216622, + "grad_norm": 1.536549945058141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50780 + }, + { + "epoch": 0.24632232141449828, + "grad_norm": 1.4730655095718248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50790 + }, + { + "epoch": 0.24637081960733437, + "grad_norm": 1.860656908547753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50800 + }, + { + "epoch": 0.24641931780017048, + "grad_norm": 1.5096271965830965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50810 + }, + { + "epoch": 0.24646781599300657, + "grad_norm": 1.52067244130194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50820 + }, + { + "epoch": 0.24651631418584266, + "grad_norm": 1.645345975020973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50830 + }, + { + "epoch": 0.24656481237867875, + "grad_norm": 1.473091799653048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50840 + }, + { + "epoch": 0.24661331057151484, + "grad_norm": 1.4897747746545065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50850 + }, + { + "epoch": 0.24666180876435093, + "grad_norm": 1.496374011367152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50860 + }, + { + "epoch": 0.24671030695718701, + "grad_norm": 1.544834020705821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50870 + }, + { + "epoch": 0.2467588051500231, + "grad_norm": 1.5537018782652012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50880 + }, + { + "epoch": 0.2468073033428592, + "grad_norm": 1.4540988502176333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50890 + }, + { + "epoch": 0.24685580153569528, + "grad_norm": 1.4710339257817395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50900 + }, + { + "epoch": 0.24690429972853137, + "grad_norm": 1.5228907557229832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50910 + }, + { + "epoch": 0.24695279792136746, + "grad_norm": 1.5876749159815517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50920 + }, + { + "epoch": 0.24700129611420354, + "grad_norm": 1.558239119958671e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50930 + }, + { + "epoch": 0.24704979430703963, + "grad_norm": 1.4818604654465162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50940 + }, + { + "epoch": 0.24709829249987572, + "grad_norm": 1.4273859960667323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50950 + }, + { + "epoch": 0.2471467906927118, + "grad_norm": 1.5146534337873163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50960 + }, + { + "epoch": 0.2471952888855479, + "grad_norm": 1.5060459190863185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50970 + }, + { + "epoch": 0.247243787078384, + "grad_norm": 1.519292567309094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50980 + }, + { + "epoch": 0.24729228527122007, + "grad_norm": 1.4651047308689158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 50990 + }, + { + "epoch": 0.24734078346405616, + "grad_norm": 1.451503521820996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51000 + }, + { + "epoch": 0.24738928165689225, + "grad_norm": 1.563447966645981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51010 + }, + { + "epoch": 0.24743777984972834, + "grad_norm": 1.441253516532015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51020 + }, + { + "epoch": 0.24748627804256443, + "grad_norm": 1.5593769830957172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51030 + }, + { + "epoch": 0.24753477623540052, + "grad_norm": 1.452281281899559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51040 + }, + { + "epoch": 0.2475832744282366, + "grad_norm": 1.4175181206610432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51050 + }, + { + "epoch": 0.24763177262107272, + "grad_norm": 1.3954661426396342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51060 + }, + { + "epoch": 0.2476802708139088, + "grad_norm": 1.456082117101687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51070 + }, + { + "epoch": 0.2477287690067449, + "grad_norm": 1.5203234227101348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51080 + }, + { + "epoch": 0.24777726719958099, + "grad_norm": 1.4304194451142394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51090 + }, + { + "epoch": 0.24782576539241707, + "grad_norm": 1.428798128699782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51100 + }, + { + "epoch": 0.24787426358525316, + "grad_norm": 1.4896180289269978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51110 + }, + { + "epoch": 0.24792276177808925, + "grad_norm": 1.3978986146412353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51120 + }, + { + "epoch": 0.24797125997092534, + "grad_norm": 1.4741674192464416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51130 + }, + { + "epoch": 0.24801975816376143, + "grad_norm": 1.3870810278149293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51140 + }, + { + "epoch": 0.24806825635659752, + "grad_norm": 1.4168951167903288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51150 + }, + { + "epoch": 0.2481167545494336, + "grad_norm": 1.4249086177642312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51160 + }, + { + "epoch": 0.2481652527422697, + "grad_norm": 1.4788815860811155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51170 + }, + { + "epoch": 0.24821375093510578, + "grad_norm": 1.46645476206686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51180 + }, + { + "epoch": 0.24826224912794187, + "grad_norm": 1.404891634138039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51190 + }, + { + "epoch": 0.24831074732077796, + "grad_norm": 1.371994073906535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51200 + }, + { + "epoch": 0.24835924551361405, + "grad_norm": 1.412601875472319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51210 + }, + { + "epoch": 0.24840774370645013, + "grad_norm": 1.416658932384962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51220 + }, + { + "epoch": 0.24845624189928622, + "grad_norm": 1.3804778120629635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51230 + }, + { + "epoch": 0.2485047400921223, + "grad_norm": 1.3882737448511762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51240 + }, + { + "epoch": 0.2485532382849584, + "grad_norm": 1.40665648018512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51250 + }, + { + "epoch": 0.2486017364777945, + "grad_norm": 1.414001218336125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51260 + }, + { + "epoch": 0.24865023467063058, + "grad_norm": 1.4598721520542313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51270 + }, + { + "epoch": 0.24869873286346666, + "grad_norm": 1.4039208906524436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51280 + }, + { + "epoch": 0.24874723105630275, + "grad_norm": 1.4247066815187281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51290 + }, + { + "epoch": 0.24879572924913884, + "grad_norm": 1.4097079770181153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51300 + }, + { + "epoch": 0.24884422744197493, + "grad_norm": 1.3994018388530094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51310 + }, + { + "epoch": 0.24889272563481105, + "grad_norm": 1.376668024022365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51320 + }, + { + "epoch": 0.24894122382764713, + "grad_norm": 1.3710345569961646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51330 + }, + { + "epoch": 0.24898972202048322, + "grad_norm": 1.3408413224169635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51340 + }, + { + "epoch": 0.2490382202133193, + "grad_norm": 1.3809837184908247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51350 + }, + { + "epoch": 0.2490867184061554, + "grad_norm": 1.3756100258888182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51360 + }, + { + "epoch": 0.2491352165989915, + "grad_norm": 1.4201978615346889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51370 + }, + { + "epoch": 0.24918371479182758, + "grad_norm": 1.4159041938910377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51380 + }, + { + "epoch": 0.24923221298466366, + "grad_norm": 1.3505793106105557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51390 + }, + { + "epoch": 0.24928071117749975, + "grad_norm": 1.405773133456023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51400 + }, + { + "epoch": 0.24932920937033584, + "grad_norm": 1.3926509723205527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51410 + }, + { + "epoch": 0.24937770756317193, + "grad_norm": 1.337982240556812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51420 + }, + { + "epoch": 0.24942620575600802, + "grad_norm": 1.3504407547770825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51430 + }, + { + "epoch": 0.2494747039488441, + "grad_norm": 1.340976325536758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51440 + }, + { + "epoch": 0.2495232021416802, + "grad_norm": 1.347086282521559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51450 + }, + { + "epoch": 0.24957170033451628, + "grad_norm": 1.3370591034345125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51460 + }, + { + "epoch": 0.24962019852735237, + "grad_norm": 1.3370649298849457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51470 + }, + { + "epoch": 0.24966869672018846, + "grad_norm": 1.377240153033199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51480 + }, + { + "epoch": 0.24971719491302455, + "grad_norm": 1.3121089637024852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51490 + }, + { + "epoch": 0.24976569310586064, + "grad_norm": 1.370863742522488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51500 + }, + { + "epoch": 0.24981419129869673, + "grad_norm": 1.3865626158349187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51510 + }, + { + "epoch": 0.2498626894915328, + "grad_norm": 1.3973853185689222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51520 + }, + { + "epoch": 0.2499111876843689, + "grad_norm": 1.3456654812671331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51530 + }, + { + "epoch": 0.249959685877205, + "grad_norm": 1.447812110200175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51540 + }, + { + "epoch": 0.2500081840700411, + "grad_norm": 1.3281122335229156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51550 + }, + { + "epoch": 0.2500566822628772, + "grad_norm": 1.3258473074984067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51560 + }, + { + "epoch": 0.2501051804557133, + "grad_norm": 1.3333819026684068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51570 + }, + { + "epoch": 0.25015367864854937, + "grad_norm": 1.3122688358180312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51580 + }, + { + "epoch": 0.25020217684138546, + "grad_norm": 1.3859063585641707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51590 + }, + { + "epoch": 0.25025067503422155, + "grad_norm": 1.333490473598431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51600 + }, + { + "epoch": 0.25029917322705764, + "grad_norm": 1.2942095395374054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51610 + }, + { + "epoch": 0.2503476714198937, + "grad_norm": 1.2893282530512806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51620 + }, + { + "epoch": 0.2503961696127298, + "grad_norm": 1.3001169918425148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51630 + }, + { + "epoch": 0.2504446678055659, + "grad_norm": 1.3491293771039636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51640 + }, + { + "epoch": 0.250493165998402, + "grad_norm": 1.3548023503062723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51650 + }, + { + "epoch": 0.2505416641912381, + "grad_norm": 1.386073904541263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51660 + }, + { + "epoch": 0.25059016238407417, + "grad_norm": 1.3146073740699649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51670 + }, + { + "epoch": 0.25063866057691025, + "grad_norm": 1.3018487265981094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51680 + }, + { + "epoch": 0.25068715876974634, + "grad_norm": 1.2867779730640905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51690 + }, + { + "epoch": 0.25073565696258243, + "grad_norm": 1.2986824060590152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51700 + }, + { + "epoch": 0.2507841551554185, + "grad_norm": 1.2750848554787808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51710 + }, + { + "epoch": 0.2508326533482546, + "grad_norm": 1.327918255356053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51720 + }, + { + "epoch": 0.2508811515410907, + "grad_norm": 1.2826332351778547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51730 + }, + { + "epoch": 0.2509296497339268, + "grad_norm": 1.3028291334649111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51740 + }, + { + "epoch": 0.2509781479267629, + "grad_norm": 1.2590770381848415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51750 + }, + { + "epoch": 0.25102664611959896, + "grad_norm": 1.295982485771674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51760 + }, + { + "epoch": 0.25107514431243505, + "grad_norm": 1.2749136146794626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51770 + }, + { + "epoch": 0.25112364250527114, + "grad_norm": 1.3271635168621287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51780 + }, + { + "epoch": 0.2511721406981072, + "grad_norm": 1.282543706793149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51790 + }, + { + "epoch": 0.2512206388909433, + "grad_norm": 1.2976467189673713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51800 + }, + { + "epoch": 0.2512691370837794, + "grad_norm": 1.2729627485441597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51810 + }, + { + "epoch": 0.2513176352766155, + "grad_norm": 1.3679652965947753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51820 + }, + { + "epoch": 0.2513661334694516, + "grad_norm": 1.2991399955808447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51830 + }, + { + "epoch": 0.25141463166228767, + "grad_norm": 1.303130972019062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51840 + }, + { + "epoch": 0.25146312985512376, + "grad_norm": 1.2688337847066578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51850 + }, + { + "epoch": 0.25151162804795985, + "grad_norm": 1.2572812124744814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51860 + }, + { + "epoch": 0.25156012624079593, + "grad_norm": 1.2692376571976638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51870 + }, + { + "epoch": 0.251608624433632, + "grad_norm": 1.283032702303899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51880 + }, + { + "epoch": 0.2516571226264681, + "grad_norm": 1.2547668859497207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51890 + }, + { + "epoch": 0.2517056208193042, + "grad_norm": 1.2518025016561296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51900 + }, + { + "epoch": 0.2517541190121403, + "grad_norm": 1.2942301452767424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51910 + }, + { + "epoch": 0.2518026172049764, + "grad_norm": 1.2599350895925454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51920 + }, + { + "epoch": 0.25185111539781246, + "grad_norm": 1.411188463862345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51930 + }, + { + "epoch": 0.25189961359064855, + "grad_norm": 1.2343130606495833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51940 + }, + { + "epoch": 0.25194811178348464, + "grad_norm": 1.2305550001201482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51950 + }, + { + "epoch": 0.2519966099763208, + "grad_norm": 1.2463182486044388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51960 + }, + { + "epoch": 0.2520451081691569, + "grad_norm": 1.2609285704456852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51970 + }, + { + "epoch": 0.25209360636199296, + "grad_norm": 1.2500609614107816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51980 + }, + { + "epoch": 0.25214210455482905, + "grad_norm": 1.2087227219126362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 51990 + }, + { + "epoch": 0.25219060274766514, + "grad_norm": 1.2313856245782517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52000 + }, + { + "epoch": 0.2522391009405012, + "grad_norm": 1.237473412629697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52010 + }, + { + "epoch": 0.2522875991333373, + "grad_norm": 1.254572623565764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52020 + }, + { + "epoch": 0.2523360973261734, + "grad_norm": 1.2230395896040136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52030 + }, + { + "epoch": 0.2523845955190095, + "grad_norm": 1.1915713571397646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52040 + }, + { + "epoch": 0.2524330937118456, + "grad_norm": 1.3100262208354252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52050 + }, + { + "epoch": 0.25248159190468167, + "grad_norm": 1.2002855953596736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52060 + }, + { + "epoch": 0.25253009009751776, + "grad_norm": 1.225054546694082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52070 + }, + { + "epoch": 0.25257858829035384, + "grad_norm": 1.1957256162986596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52080 + }, + { + "epoch": 0.25262708648318993, + "grad_norm": 1.2206470501041622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52090 + }, + { + "epoch": 0.252675584676026, + "grad_norm": 1.2198792376238998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52100 + }, + { + "epoch": 0.2527240828688621, + "grad_norm": 1.2473211086216907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52110 + }, + { + "epoch": 0.2527725810616982, + "grad_norm": 1.2164883855803055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52120 + }, + { + "epoch": 0.2528210792545343, + "grad_norm": 1.242674585455461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52130 + }, + { + "epoch": 0.2528695774473704, + "grad_norm": 1.189919913713311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52140 + }, + { + "epoch": 0.25291807564020646, + "grad_norm": 1.1965454405071796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52150 + }, + { + "epoch": 0.25296657383304255, + "grad_norm": 1.1912137409808565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52160 + }, + { + "epoch": 0.25301507202587864, + "grad_norm": 1.1904386099104158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52170 + }, + { + "epoch": 0.25306357021871473, + "grad_norm": 1.2553627470879292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52180 + }, + { + "epoch": 0.2531120684115508, + "grad_norm": 1.1862810822549363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52190 + }, + { + "epoch": 0.2531605666043869, + "grad_norm": 1.1915616937585582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52200 + }, + { + "epoch": 0.253209064797223, + "grad_norm": 1.2140552030359686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52210 + }, + { + "epoch": 0.2532575629900591, + "grad_norm": 1.2310715646890458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52220 + }, + { + "epoch": 0.25330606118289517, + "grad_norm": 1.2214120204134815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52230 + }, + { + "epoch": 0.25335455937573126, + "grad_norm": 1.1899852125907273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52240 + }, + { + "epoch": 0.25340305756856735, + "grad_norm": 2.838982936737011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52250 + }, + { + "epoch": 0.25345155576140344, + "grad_norm": 1.1475960803863927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52260 + }, + { + "epoch": 0.2535000539542395, + "grad_norm": 1.1979383884863637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52270 + }, + { + "epoch": 0.2535485521470756, + "grad_norm": 1.1662905308185145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52280 + }, + { + "epoch": 0.2535970503399117, + "grad_norm": 1.1603040661611885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52290 + }, + { + "epoch": 0.2536455485327478, + "grad_norm": 1.1879441785822564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52300 + }, + { + "epoch": 0.2536940467255839, + "grad_norm": 1.2142034222506481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52310 + }, + { + "epoch": 0.25374254491841997, + "grad_norm": 1.1802931965121388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52320 + }, + { + "epoch": 0.25379104311125605, + "grad_norm": 1.1378835296227408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52330 + }, + { + "epoch": 0.25383954130409214, + "grad_norm": 1.1817029132998869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52340 + }, + { + "epoch": 0.25388803949692823, + "grad_norm": 1.2262843540611357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52350 + }, + { + "epoch": 0.2539365376897643, + "grad_norm": 1.1658616472232097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52360 + }, + { + "epoch": 0.2539850358826004, + "grad_norm": 1.1420019774277534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52370 + }, + { + "epoch": 0.2540335340754365, + "grad_norm": 1.1216068429575898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52380 + }, + { + "epoch": 0.2540820322682726, + "grad_norm": 1.169490815300378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52390 + }, + { + "epoch": 0.2541305304611087, + "grad_norm": 1.1191747972816302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52400 + }, + { + "epoch": 0.25417902865394476, + "grad_norm": 1.1957089895986428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52410 + }, + { + "epoch": 0.25422752684678085, + "grad_norm": 1.1746805483880962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52420 + }, + { + "epoch": 0.25427602503961694, + "grad_norm": 1.134310920747339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52430 + }, + { + "epoch": 0.254324523232453, + "grad_norm": 1.1663371424219804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52440 + }, + { + "epoch": 0.2543730214252891, + "grad_norm": 1.1892098683574659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52450 + }, + { + "epoch": 0.2544215196181252, + "grad_norm": 1.211631683872838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52460 + }, + { + "epoch": 0.25447001781096135, + "grad_norm": 1.1322372017730231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52470 + }, + { + "epoch": 0.25451851600379743, + "grad_norm": 1.3738190318690613e-07, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 52480 + }, + { + "epoch": 0.2545670141966335, + "grad_norm": 0.017446836456656456, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 52490 + }, + { + "epoch": 0.2546155123894696, + "grad_norm": 4.395764335640706e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52500 + }, + { + "epoch": 0.2546640105823057, + "grad_norm": 0.016377871856093407, + "learning_rate": 0.0002, + "loss": 0.0355, + "step": 52510 + }, + { + "epoch": 0.2547125087751418, + "grad_norm": 0.0013395919231697917, + "learning_rate": 0.0002, + "loss": 0.0068, + "step": 52520 + }, + { + "epoch": 0.2547610069679779, + "grad_norm": 0.00014820579963270575, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 52530 + }, + { + "epoch": 0.25480950516081396, + "grad_norm": 0.0013960471842437983, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 52540 + }, + { + "epoch": 0.25485800335365005, + "grad_norm": 0.0002763495431281626, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 52550 + }, + { + "epoch": 0.25490650154648614, + "grad_norm": 5.8474870456848294e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 52560 + }, + { + "epoch": 0.25495499973932223, + "grad_norm": 0.16243892908096313, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 52570 + }, + { + "epoch": 0.2550034979321583, + "grad_norm": 0.0002142062585335225, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 52580 + }, + { + "epoch": 0.2550519961249944, + "grad_norm": 0.00012943819456268102, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 52590 + }, + { + "epoch": 0.2551004943178305, + "grad_norm": 5.930638508289121e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52600 + }, + { + "epoch": 0.2551489925106666, + "grad_norm": 4.1876053728628904e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52610 + }, + { + "epoch": 0.25519749070350267, + "grad_norm": 2.7317113563185558e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52620 + }, + { + "epoch": 0.25524598889633876, + "grad_norm": 2.759516610240098e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52630 + }, + { + "epoch": 0.25529448708917485, + "grad_norm": 2.5454615752096288e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52640 + }, + { + "epoch": 0.25534298528201094, + "grad_norm": 2.216774555563461e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52650 + }, + { + "epoch": 0.255391483474847, + "grad_norm": 2.8491147531894967e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 52660 + }, + { + "epoch": 0.2554399816676831, + "grad_norm": 0.07163127511739731, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 52670 + }, + { + "epoch": 0.2554884798605192, + "grad_norm": 0.002892472315579653, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52680 + }, + { + "epoch": 0.2555369780533553, + "grad_norm": 3.806352833635174e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52690 + }, + { + "epoch": 0.2555854762461914, + "grad_norm": 1.9570177755667828e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52700 + }, + { + "epoch": 0.25563397443902747, + "grad_norm": 2.3211036022985354e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 52710 + }, + { + "epoch": 0.25568247263186356, + "grad_norm": 0.0007266989559866488, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52720 + }, + { + "epoch": 0.25573097082469964, + "grad_norm": 2.187145764764864e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52730 + }, + { + "epoch": 0.25577946901753573, + "grad_norm": 6.967235094634816e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52740 + }, + { + "epoch": 0.2558279672103718, + "grad_norm": 5.081079507363029e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52750 + }, + { + "epoch": 0.2558764654032079, + "grad_norm": 1.5217680811474565e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52760 + }, + { + "epoch": 0.255924963596044, + "grad_norm": 1.4680783351650462e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52770 + }, + { + "epoch": 0.2559734617888801, + "grad_norm": 1.3874857359041926e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52780 + }, + { + "epoch": 0.2560219599817162, + "grad_norm": 1.3069982742308639e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52790 + }, + { + "epoch": 0.25607045817455226, + "grad_norm": 1.2914361832372379e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52800 + }, + { + "epoch": 0.25611895636738835, + "grad_norm": 1.21520697575761e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52810 + }, + { + "epoch": 0.25616745456022444, + "grad_norm": 1.2813571629521903e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52820 + }, + { + "epoch": 0.2562159527530605, + "grad_norm": 1.1050953617086634e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52830 + }, + { + "epoch": 0.2562644509458966, + "grad_norm": 1.119702028518077e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52840 + }, + { + "epoch": 0.2563129491387327, + "grad_norm": 1.2123426131438464e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52850 + }, + { + "epoch": 0.2563614473315688, + "grad_norm": 1.0776981980598066e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52860 + }, + { + "epoch": 0.2564099455244049, + "grad_norm": 1.0348322575737257e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52870 + }, + { + "epoch": 0.25645844371724097, + "grad_norm": 9.476864761381876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52880 + }, + { + "epoch": 0.25650694191007706, + "grad_norm": 1.0114592441823334e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52890 + }, + { + "epoch": 0.25655544010291315, + "grad_norm": 8.92493608262157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52900 + }, + { + "epoch": 0.25660393829574923, + "grad_norm": 9.086540558200795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52910 + }, + { + "epoch": 0.2566524364885853, + "grad_norm": 8.31666238809703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52920 + }, + { + "epoch": 0.2567009346814214, + "grad_norm": 8.77639922691742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52930 + }, + { + "epoch": 0.2567494328742575, + "grad_norm": 7.875392839196138e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52940 + }, + { + "epoch": 0.2567979310670936, + "grad_norm": 7.304962309717666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52950 + }, + { + "epoch": 0.2568464292599297, + "grad_norm": 7.853972419979982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52960 + }, + { + "epoch": 0.25689492745276576, + "grad_norm": 7.0318069447239395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52970 + }, + { + "epoch": 0.2569434256456019, + "grad_norm": 7.186214588728035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52980 + }, + { + "epoch": 0.256991923838438, + "grad_norm": 6.149127784738084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 52990 + }, + { + "epoch": 0.2570404220312741, + "grad_norm": 6.526067863887874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53000 + }, + { + "epoch": 0.2570889202241102, + "grad_norm": 7.409710633510258e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53010 + }, + { + "epoch": 0.25713741841694626, + "grad_norm": 6.269047844398301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53020 + }, + { + "epoch": 0.25718591660978235, + "grad_norm": 6.147537987999385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53030 + }, + { + "epoch": 0.25723441480261844, + "grad_norm": 6.1625410125998314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53040 + }, + { + "epoch": 0.2572829129954545, + "grad_norm": 5.746527676819824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53050 + }, + { + "epoch": 0.2573314111882906, + "grad_norm": 5.957061148365028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53060 + }, + { + "epoch": 0.2573799093811267, + "grad_norm": 5.631310159515124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53070 + }, + { + "epoch": 0.2574284075739628, + "grad_norm": 5.6169596973632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53080 + }, + { + "epoch": 0.2574769057667989, + "grad_norm": 5.659108865074813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53090 + }, + { + "epoch": 0.25752540395963497, + "grad_norm": 5.524228527065134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53100 + }, + { + "epoch": 0.25757390215247106, + "grad_norm": 5.60717717235093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53110 + }, + { + "epoch": 0.25762240034530715, + "grad_norm": 5.105740456201602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53120 + }, + { + "epoch": 0.25767089853814323, + "grad_norm": 5.216395493334858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53130 + }, + { + "epoch": 0.2577193967309793, + "grad_norm": 4.546160653262632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53140 + }, + { + "epoch": 0.2577678949238154, + "grad_norm": 4.658969828597037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53150 + }, + { + "epoch": 0.2578163931166515, + "grad_norm": 4.909170002065366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53160 + }, + { + "epoch": 0.2578648913094876, + "grad_norm": 5.484921985043911e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53170 + }, + { + "epoch": 0.2579133895023237, + "grad_norm": 4.972299848304829e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53180 + }, + { + "epoch": 0.25796188769515976, + "grad_norm": 4.738314146379707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53190 + }, + { + "epoch": 0.25801038588799585, + "grad_norm": 4.9855220822792035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53200 + }, + { + "epoch": 0.25805888408083194, + "grad_norm": 5.038458766648546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53210 + }, + { + "epoch": 0.25810738227366803, + "grad_norm": 4.436582457856275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53220 + }, + { + "epoch": 0.2581558804665041, + "grad_norm": 4.6478212425427046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53230 + }, + { + "epoch": 0.2582043786593402, + "grad_norm": 3.994262897322187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53240 + }, + { + "epoch": 0.2582528768521763, + "grad_norm": 4.928812813886907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53250 + }, + { + "epoch": 0.2583013750450124, + "grad_norm": 4.694793915405171e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53260 + }, + { + "epoch": 0.25834987323784847, + "grad_norm": 4.180422820354579e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53270 + }, + { + "epoch": 0.25839837143068456, + "grad_norm": 4.154710040893406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53280 + }, + { + "epoch": 0.25844686962352065, + "grad_norm": 3.7070542475703405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53290 + }, + { + "epoch": 0.25849536781635674, + "grad_norm": 4.363745119917439e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53300 + }, + { + "epoch": 0.2585438660091928, + "grad_norm": 3.857499450532487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53310 + }, + { + "epoch": 0.2585923642020289, + "grad_norm": 3.7812183109053876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53320 + }, + { + "epoch": 0.258640862394865, + "grad_norm": 3.935753738915082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53330 + }, + { + "epoch": 0.2586893605877011, + "grad_norm": 9.926899110723753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53340 + }, + { + "epoch": 0.2587378587805372, + "grad_norm": 3.4298470836802153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53350 + }, + { + "epoch": 0.25878635697337327, + "grad_norm": 3.651924998848699e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53360 + }, + { + "epoch": 0.25883485516620935, + "grad_norm": 3.6172564250591677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53370 + }, + { + "epoch": 0.25888335335904544, + "grad_norm": 3.4830361528292997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53380 + }, + { + "epoch": 0.25893185155188153, + "grad_norm": 3.4730742299871054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53390 + }, + { + "epoch": 0.2589803497447176, + "grad_norm": 3.5307793950778432e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53400 + }, + { + "epoch": 0.2590288479375537, + "grad_norm": 3.369776777617517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53410 + }, + { + "epoch": 0.2590773461303898, + "grad_norm": 3.4598490401549498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53420 + }, + { + "epoch": 0.2591258443232259, + "grad_norm": 3.360212531333673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53430 + }, + { + "epoch": 0.259174342516062, + "grad_norm": 3.0645139759144513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53440 + }, + { + "epoch": 0.25922284070889806, + "grad_norm": 3.2197172004089225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53450 + }, + { + "epoch": 0.25927133890173415, + "grad_norm": 3.629450475273188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53460 + }, + { + "epoch": 0.25931983709457024, + "grad_norm": 3.083206138398964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53470 + }, + { + "epoch": 0.2593683352874063, + "grad_norm": 3.023516683242633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53480 + }, + { + "epoch": 0.2594168334802424, + "grad_norm": 3.075897666349192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53490 + }, + { + "epoch": 0.25946533167307856, + "grad_norm": 2.812126695062034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53500 + }, + { + "epoch": 0.25951382986591465, + "grad_norm": 3.0384576348296832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53510 + }, + { + "epoch": 0.25956232805875074, + "grad_norm": 2.9956831895106006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53520 + }, + { + "epoch": 0.2596108262515868, + "grad_norm": 2.9554173579526832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53530 + }, + { + "epoch": 0.2596593244444229, + "grad_norm": 2.917401161539601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53540 + }, + { + "epoch": 0.259707822637259, + "grad_norm": 2.9223435831227107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53550 + }, + { + "epoch": 0.2597563208300951, + "grad_norm": 2.9305308544280706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53560 + }, + { + "epoch": 0.2598048190229312, + "grad_norm": 2.9226368951640325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53570 + }, + { + "epoch": 0.25985331721576727, + "grad_norm": 2.8220570129633415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53580 + }, + { + "epoch": 0.25990181540860335, + "grad_norm": 2.7022363155992934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53590 + }, + { + "epoch": 0.25995031360143944, + "grad_norm": 2.643537754920544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53600 + }, + { + "epoch": 0.25999881179427553, + "grad_norm": 2.8241131531103747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53610 + }, + { + "epoch": 0.2600473099871116, + "grad_norm": 2.6854590942093637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53620 + }, + { + "epoch": 0.2600958081799477, + "grad_norm": 2.726978209466324e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53630 + }, + { + "epoch": 0.2601443063727838, + "grad_norm": 2.6470124794286676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53640 + }, + { + "epoch": 0.2601928045656199, + "grad_norm": 3.3275985060754465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53650 + }, + { + "epoch": 0.260241302758456, + "grad_norm": 2.716285962378606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53660 + }, + { + "epoch": 0.26028980095129206, + "grad_norm": 2.600904053906561e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53670 + }, + { + "epoch": 0.26033829914412815, + "grad_norm": 2.693579290280468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53680 + }, + { + "epoch": 0.26038679733696424, + "grad_norm": 2.4721982754272176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53690 + }, + { + "epoch": 0.2604352955298003, + "grad_norm": 2.492487283234368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53700 + }, + { + "epoch": 0.2604837937226364, + "grad_norm": 2.5431595531699713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53710 + }, + { + "epoch": 0.2605322919154725, + "grad_norm": 2.5865451789286453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53720 + }, + { + "epoch": 0.2605807901083086, + "grad_norm": 2.4281482637888985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53730 + }, + { + "epoch": 0.2606292883011447, + "grad_norm": 2.464354338371777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53740 + }, + { + "epoch": 0.26067778649398077, + "grad_norm": 2.2474291654361878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53750 + }, + { + "epoch": 0.26072628468681686, + "grad_norm": 2.432605469948612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53760 + }, + { + "epoch": 0.26077478287965294, + "grad_norm": 2.5685972104838584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53770 + }, + { + "epoch": 0.26082328107248903, + "grad_norm": 3.817886863544118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53780 + }, + { + "epoch": 0.2608717792653251, + "grad_norm": 2.1783837382827187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53790 + }, + { + "epoch": 0.2609202774581612, + "grad_norm": 2.1509531507035717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53800 + }, + { + "epoch": 0.2609687756509973, + "grad_norm": 2.2475521745946025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53810 + }, + { + "epoch": 0.2610172738438334, + "grad_norm": 2.314525772817433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53820 + }, + { + "epoch": 0.2610657720366695, + "grad_norm": 2.317846337973606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53830 + }, + { + "epoch": 0.26111427022950556, + "grad_norm": 2.1872233446629252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53840 + }, + { + "epoch": 0.26116276842234165, + "grad_norm": 2.4538808247598354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53850 + }, + { + "epoch": 0.26121126661517774, + "grad_norm": 2.2671595161227742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53860 + }, + { + "epoch": 0.26125976480801383, + "grad_norm": 2.0935806333000073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53870 + }, + { + "epoch": 0.2613082630008499, + "grad_norm": 2.120977114827838e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53880 + }, + { + "epoch": 0.261356761193686, + "grad_norm": 2.1793850919493707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53890 + }, + { + "epoch": 0.2614052593865221, + "grad_norm": 2.0969150682503823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53900 + }, + { + "epoch": 0.2614537575793582, + "grad_norm": 2.16776038541866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53910 + }, + { + "epoch": 0.26150225577219427, + "grad_norm": 2.1685568754037376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53920 + }, + { + "epoch": 0.26155075396503036, + "grad_norm": 2.0829838831559755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53930 + }, + { + "epoch": 0.26159925215786645, + "grad_norm": 2.279863565490814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53940 + }, + { + "epoch": 0.26164775035070253, + "grad_norm": 2.0910526927764295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53950 + }, + { + "epoch": 0.2616962485435386, + "grad_norm": 2.031381427514134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53960 + }, + { + "epoch": 0.2617447467363747, + "grad_norm": 2.013898665609304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53970 + }, + { + "epoch": 0.2617932449292108, + "grad_norm": 2.0660295376728754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53980 + }, + { + "epoch": 0.2618417431220469, + "grad_norm": 2.0989753011235734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 53990 + }, + { + "epoch": 0.261890241314883, + "grad_norm": 2.1142650439287536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54000 + }, + { + "epoch": 0.2619387395077191, + "grad_norm": 2.095392346745939e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54010 + }, + { + "epoch": 0.2619872377005552, + "grad_norm": 2.0100105757592246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54020 + }, + { + "epoch": 0.2620357358933913, + "grad_norm": 1.986213192139985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54030 + }, + { + "epoch": 0.2620842340862274, + "grad_norm": 2.021536147367442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54040 + }, + { + "epoch": 0.2621327322790635, + "grad_norm": 1.9897365746146534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54050 + }, + { + "epoch": 0.26218123047189956, + "grad_norm": 1.9170024643244687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54060 + }, + { + "epoch": 0.26222972866473565, + "grad_norm": 1.9301201064081397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54070 + }, + { + "epoch": 0.26227822685757174, + "grad_norm": 1.9882807009707903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54080 + }, + { + "epoch": 0.2623267250504078, + "grad_norm": 1.8576978391138255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54090 + }, + { + "epoch": 0.2623752232432439, + "grad_norm": 1.8543614714872092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54100 + }, + { + "epoch": 0.26242372143608, + "grad_norm": 1.91818298844737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54110 + }, + { + "epoch": 0.2624722196289161, + "grad_norm": 1.92825041267497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54120 + }, + { + "epoch": 0.2625207178217522, + "grad_norm": 1.8117111721949186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54130 + }, + { + "epoch": 0.26256921601458827, + "grad_norm": 1.808340243769635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54140 + }, + { + "epoch": 0.26261771420742436, + "grad_norm": 1.7530832110423944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54150 + }, + { + "epoch": 0.26266621240026045, + "grad_norm": 1.8390769582765643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54160 + }, + { + "epoch": 0.26271471059309653, + "grad_norm": 1.7818343849285156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54170 + }, + { + "epoch": 0.2627632087859326, + "grad_norm": 1.803637701414118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54180 + }, + { + "epoch": 0.2628117069787687, + "grad_norm": 1.7976080926018767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54190 + }, + { + "epoch": 0.2628602051716048, + "grad_norm": 1.776235308170726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54200 + }, + { + "epoch": 0.2629087033644409, + "grad_norm": 1.8320889694223297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54210 + }, + { + "epoch": 0.262957201557277, + "grad_norm": 1.8532970216256217e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54220 + }, + { + "epoch": 0.26300569975011306, + "grad_norm": 1.8311960729988641e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54230 + }, + { + "epoch": 0.26305419794294915, + "grad_norm": 1.814849042602873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54240 + }, + { + "epoch": 0.26310269613578524, + "grad_norm": 1.705149998088018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54250 + }, + { + "epoch": 0.26315119432862133, + "grad_norm": 1.7852406699603307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54260 + }, + { + "epoch": 0.2631996925214574, + "grad_norm": 1.8023366692432319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54270 + }, + { + "epoch": 0.2632481907142935, + "grad_norm": 1.7592195717952563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54280 + }, + { + "epoch": 0.2632966889071296, + "grad_norm": 2.2540530153492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54290 + }, + { + "epoch": 0.2633451870999657, + "grad_norm": 1.6818354424685822e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54300 + }, + { + "epoch": 0.26339368529280177, + "grad_norm": 1.6760276366767357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54310 + }, + { + "epoch": 0.26344218348563786, + "grad_norm": 1.7922080814969377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54320 + }, + { + "epoch": 0.26349068167847395, + "grad_norm": 1.6868493730726186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54330 + }, + { + "epoch": 0.26353917987131004, + "grad_norm": 1.6267333649011562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54340 + }, + { + "epoch": 0.2635876780641461, + "grad_norm": 1.6823669284349307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54350 + }, + { + "epoch": 0.2636361762569822, + "grad_norm": 1.6390998780480004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54360 + }, + { + "epoch": 0.2636846744498183, + "grad_norm": 1.5934730299704825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54370 + }, + { + "epoch": 0.2637331726426544, + "grad_norm": 1.6768156001489842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54380 + }, + { + "epoch": 0.2637816708354905, + "grad_norm": 1.61405807830306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54390 + }, + { + "epoch": 0.26383016902832657, + "grad_norm": 1.8264088339492446e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54400 + }, + { + "epoch": 0.26387866722116265, + "grad_norm": 1.5957780306052882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54410 + }, + { + "epoch": 0.26392716541399874, + "grad_norm": 1.5509970126004191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54420 + }, + { + "epoch": 0.26397566360683483, + "grad_norm": 1.6453788020953652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54430 + }, + { + "epoch": 0.2640241617996709, + "grad_norm": 1.6585273669988965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54440 + }, + { + "epoch": 0.264072659992507, + "grad_norm": 1.5811081084393663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54450 + }, + { + "epoch": 0.2641211581853431, + "grad_norm": 1.5357919664893416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54460 + }, + { + "epoch": 0.2641696563781792, + "grad_norm": 1.5336182741521043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54470 + }, + { + "epoch": 0.2642181545710153, + "grad_norm": 1.5438364471265231e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54480 + }, + { + "epoch": 0.26426665276385136, + "grad_norm": 1.495728383815731e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54490 + }, + { + "epoch": 0.26431515095668745, + "grad_norm": 1.52984171108983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54500 + }, + { + "epoch": 0.26436364914952354, + "grad_norm": 1.5498277434744523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54510 + }, + { + "epoch": 0.2644121473423597, + "grad_norm": 1.530497570456646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54520 + }, + { + "epoch": 0.26446064553519577, + "grad_norm": 1.491227976657683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54530 + }, + { + "epoch": 0.26450914372803186, + "grad_norm": 1.4399842029888532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54540 + }, + { + "epoch": 0.26455764192086795, + "grad_norm": 1.4533683270201436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54550 + }, + { + "epoch": 0.26460614011370404, + "grad_norm": 1.4254843563321629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54560 + }, + { + "epoch": 0.2646546383065401, + "grad_norm": 1.4449614127443056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54570 + }, + { + "epoch": 0.2647031364993762, + "grad_norm": 1.441182462258439e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54580 + }, + { + "epoch": 0.2647516346922123, + "grad_norm": 1.4847091733827256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54590 + }, + { + "epoch": 0.2648001328850484, + "grad_norm": 1.3707218613490113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54600 + }, + { + "epoch": 0.2648486310778845, + "grad_norm": 1.4437003983402974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54610 + }, + { + "epoch": 0.26489712927072057, + "grad_norm": 1.3876104958399083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54620 + }, + { + "epoch": 0.26494562746355665, + "grad_norm": 1.5100141581569915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54630 + }, + { + "epoch": 0.26499412565639274, + "grad_norm": 1.327119889538153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54640 + }, + { + "epoch": 0.26504262384922883, + "grad_norm": 1.3296402130436036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54650 + }, + { + "epoch": 0.2650911220420649, + "grad_norm": 1.306960598412843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54660 + }, + { + "epoch": 0.265139620234901, + "grad_norm": 1.3228235502538155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54670 + }, + { + "epoch": 0.2651881184277371, + "grad_norm": 1.3760276260654791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54680 + }, + { + "epoch": 0.2652366166205732, + "grad_norm": 1.3250272559162113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54690 + }, + { + "epoch": 0.2652851148134093, + "grad_norm": 1.2923665053676814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54700 + }, + { + "epoch": 0.26533361300624536, + "grad_norm": 1.3029346064286074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54710 + }, + { + "epoch": 0.26538211119908145, + "grad_norm": 1.2872282013631775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54720 + }, + { + "epoch": 0.26543060939191754, + "grad_norm": 1.2777934443874983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54730 + }, + { + "epoch": 0.2654791075847536, + "grad_norm": 1.2110557463529403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54740 + }, + { + "epoch": 0.2655276057775897, + "grad_norm": 1.3391306765697664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54750 + }, + { + "epoch": 0.2655761039704258, + "grad_norm": 1.2788809726771433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54760 + }, + { + "epoch": 0.2656246021632619, + "grad_norm": 1.1949030067626154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54770 + }, + { + "epoch": 0.265673100356098, + "grad_norm": 1.342805830972793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54780 + }, + { + "epoch": 0.26572159854893407, + "grad_norm": 1.2225600585225038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54790 + }, + { + "epoch": 0.26577009674177016, + "grad_norm": 2.894214503612602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54800 + }, + { + "epoch": 0.26581859493460624, + "grad_norm": 1.343174176327011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54810 + }, + { + "epoch": 0.26586709312744233, + "grad_norm": 1.1905361816388904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54820 + }, + { + "epoch": 0.2659155913202784, + "grad_norm": 2.228611265309155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54830 + }, + { + "epoch": 0.2659640895131145, + "grad_norm": 1.3796502571494784e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54840 + }, + { + "epoch": 0.2660125877059506, + "grad_norm": 1.1716991821231204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54850 + }, + { + "epoch": 0.2660610858987867, + "grad_norm": 1.1123014473923831e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54860 + }, + { + "epoch": 0.2661095840916228, + "grad_norm": 1.1221424074392417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54870 + }, + { + "epoch": 0.26615808228445886, + "grad_norm": 1.1422013130868436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54880 + }, + { + "epoch": 0.26620658047729495, + "grad_norm": 1.158050849880965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54890 + }, + { + "epoch": 0.26625507867013104, + "grad_norm": 1.059786882251501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54900 + }, + { + "epoch": 0.26630357686296713, + "grad_norm": 1.102773921957123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54910 + }, + { + "epoch": 0.2663520750558032, + "grad_norm": 1.0598091648716945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54920 + }, + { + "epoch": 0.2664005732486393, + "grad_norm": 2.426088940410409e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54930 + }, + { + "epoch": 0.2664490714414754, + "grad_norm": 1.03258503258985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54940 + }, + { + "epoch": 0.2664975696343115, + "grad_norm": 1.0088145927511505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54950 + }, + { + "epoch": 0.26654606782714757, + "grad_norm": 1.035177888297767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54960 + }, + { + "epoch": 0.26659456601998366, + "grad_norm": 1.0248310218230472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54970 + }, + { + "epoch": 0.26664306421281975, + "grad_norm": 9.819847264225245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54980 + }, + { + "epoch": 0.26669156240565584, + "grad_norm": 9.944758403435117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 54990 + }, + { + "epoch": 0.2667400605984919, + "grad_norm": 1.0211872449872317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55000 + }, + { + "epoch": 0.266788558791328, + "grad_norm": 9.210131679537881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55010 + }, + { + "epoch": 0.2668370569841641, + "grad_norm": 9.384099257658818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55020 + }, + { + "epoch": 0.26688555517700024, + "grad_norm": 8.573057925786998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55030 + }, + { + "epoch": 0.26693405336983633, + "grad_norm": 1.0006858701672172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55040 + }, + { + "epoch": 0.2669825515626724, + "grad_norm": 9.475198794461903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55050 + }, + { + "epoch": 0.2670310497555085, + "grad_norm": 9.053194389707642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55060 + }, + { + "epoch": 0.2670795479483446, + "grad_norm": 9.160715990219614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55070 + }, + { + "epoch": 0.2671280461411807, + "grad_norm": 8.849121400089643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55080 + }, + { + "epoch": 0.2671765443340168, + "grad_norm": 8.840046916702704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55090 + }, + { + "epoch": 0.26722504252685286, + "grad_norm": 9.573850547894835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55100 + }, + { + "epoch": 0.26727354071968895, + "grad_norm": 8.089430139079923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55110 + }, + { + "epoch": 0.26732203891252504, + "grad_norm": 8.04058061021351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55120 + }, + { + "epoch": 0.26737053710536113, + "grad_norm": 8.895136147657468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55130 + }, + { + "epoch": 0.2674190352981972, + "grad_norm": 8.148901429194666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55140 + }, + { + "epoch": 0.2674675334910333, + "grad_norm": 9.069328257282905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55150 + }, + { + "epoch": 0.2675160316838694, + "grad_norm": 8.724476856514229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55160 + }, + { + "epoch": 0.2675645298767055, + "grad_norm": 8.699300906300778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55170 + }, + { + "epoch": 0.26761302806954157, + "grad_norm": 8.286352226605231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55180 + }, + { + "epoch": 0.26766152626237766, + "grad_norm": 7.803211587997794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55190 + }, + { + "epoch": 0.26771002445521375, + "grad_norm": 7.753765771667531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55200 + }, + { + "epoch": 0.26775852264804983, + "grad_norm": 8.028822549022152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55210 + }, + { + "epoch": 0.2678070208408859, + "grad_norm": 7.300110951291572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55220 + }, + { + "epoch": 0.267855519033722, + "grad_norm": 8.135307325574104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55230 + }, + { + "epoch": 0.2679040172265581, + "grad_norm": 8.502160540047043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55240 + }, + { + "epoch": 0.2679525154193942, + "grad_norm": 8.050142810134275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55250 + }, + { + "epoch": 0.2680010136122303, + "grad_norm": 7.588528774249426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55260 + }, + { + "epoch": 0.26804951180506636, + "grad_norm": 7.704314270995383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55270 + }, + { + "epoch": 0.26809800999790245, + "grad_norm": 7.790267773088999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55280 + }, + { + "epoch": 0.26814650819073854, + "grad_norm": 7.780259352330177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55290 + }, + { + "epoch": 0.26819500638357463, + "grad_norm": 7.85482029641571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55300 + }, + { + "epoch": 0.2682435045764107, + "grad_norm": 6.943455446162261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55310 + }, + { + "epoch": 0.2682920027692468, + "grad_norm": 7.360318932114751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55320 + }, + { + "epoch": 0.2683405009620829, + "grad_norm": 1.0536746231082361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55330 + }, + { + "epoch": 0.268388999154919, + "grad_norm": 7.316151027225715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55340 + }, + { + "epoch": 0.26843749734775507, + "grad_norm": 7.036625220280257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55350 + }, + { + "epoch": 0.26848599554059116, + "grad_norm": 6.32428850622091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55360 + }, + { + "epoch": 0.26853449373342725, + "grad_norm": 7.419527605634357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55370 + }, + { + "epoch": 0.26858299192626334, + "grad_norm": 6.338041202980094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55380 + }, + { + "epoch": 0.2686314901190994, + "grad_norm": 7.203245218079246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55390 + }, + { + "epoch": 0.2686799883119355, + "grad_norm": 6.608226499338343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55400 + }, + { + "epoch": 0.2687284865047716, + "grad_norm": 6.789776421101124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55410 + }, + { + "epoch": 0.2687769846976077, + "grad_norm": 7.073613801367173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55420 + }, + { + "epoch": 0.2688254828904438, + "grad_norm": 0.3772336542606354, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 55430 + }, + { + "epoch": 0.26887398108327987, + "grad_norm": 8.413747991653509e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55440 + }, + { + "epoch": 0.26892247927611596, + "grad_norm": 1.127896780417359e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55450 + }, + { + "epoch": 0.26897097746895204, + "grad_norm": 0.0007611913024447858, + "learning_rate": 0.0002, + "loss": 0.007, + "step": 55460 + }, + { + "epoch": 0.26901947566178813, + "grad_norm": 0.004979088436812162, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 55470 + }, + { + "epoch": 0.2690679738546242, + "grad_norm": 0.04727620631456375, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 55480 + }, + { + "epoch": 0.2691164720474603, + "grad_norm": 8.04009468993172e-05, + "learning_rate": 0.0002, + "loss": 0.0158, + "step": 55490 + }, + { + "epoch": 0.2691649702402964, + "grad_norm": 3.8308931834762916e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55500 + }, + { + "epoch": 0.2692134684331325, + "grad_norm": 0.031480852514505386, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 55510 + }, + { + "epoch": 0.2692619666259686, + "grad_norm": 7.482436194550246e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55520 + }, + { + "epoch": 0.26931046481880466, + "grad_norm": 4.0994367736857384e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55530 + }, + { + "epoch": 0.2693589630116408, + "grad_norm": 3.0498360501951538e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55540 + }, + { + "epoch": 0.2694074612044769, + "grad_norm": 2.608899558254052e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55550 + }, + { + "epoch": 0.269455959397313, + "grad_norm": 2.0888668586849235e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55560 + }, + { + "epoch": 0.26950445759014907, + "grad_norm": 2.0738189050462097e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55570 + }, + { + "epoch": 0.26955295578298516, + "grad_norm": 1.8181353880208917e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55580 + }, + { + "epoch": 0.26960145397582125, + "grad_norm": 8.847138815326616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55590 + }, + { + "epoch": 0.26964995216865734, + "grad_norm": 9.105096978601068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55600 + }, + { + "epoch": 0.2696984503614934, + "grad_norm": 1.4740410733793397e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55610 + }, + { + "epoch": 0.2697469485543295, + "grad_norm": 1.515012263553217e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55620 + }, + { + "epoch": 0.2697954467471656, + "grad_norm": 2.203964140790049e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55630 + }, + { + "epoch": 0.2698439449400017, + "grad_norm": 6.478686373156961e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55640 + }, + { + "epoch": 0.2698924431328378, + "grad_norm": 6.1191954046080355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55650 + }, + { + "epoch": 0.26994094132567387, + "grad_norm": 1.346081535302801e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55660 + }, + { + "epoch": 0.26998943951850995, + "grad_norm": 1.1389718565624207e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55670 + }, + { + "epoch": 0.27003793771134604, + "grad_norm": 1.1004804946423974e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55680 + }, + { + "epoch": 0.27008643590418213, + "grad_norm": 5.537616289075231e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55690 + }, + { + "epoch": 0.2701349340970182, + "grad_norm": 5.169516953174025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55700 + }, + { + "epoch": 0.2701834322898543, + "grad_norm": 1.7102653146139346e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55710 + }, + { + "epoch": 0.2702319304826904, + "grad_norm": 1.1290451766399201e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55720 + }, + { + "epoch": 0.2702804286755265, + "grad_norm": 1.0794180525408592e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55730 + }, + { + "epoch": 0.2703289268683626, + "grad_norm": 5.367902303987648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55740 + }, + { + "epoch": 0.27037742506119866, + "grad_norm": 4.4490334403235465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55750 + }, + { + "epoch": 0.27042592325403475, + "grad_norm": 1.2171064554422628e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55760 + }, + { + "epoch": 0.27047442144687084, + "grad_norm": 9.31470549403457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55770 + }, + { + "epoch": 0.2705229196397069, + "grad_norm": 2.711165143409744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55780 + }, + { + "epoch": 0.270571417832543, + "grad_norm": 4.048262780997902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55790 + }, + { + "epoch": 0.2706199160253791, + "grad_norm": 3.6999986150476616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55800 + }, + { + "epoch": 0.2706684142182152, + "grad_norm": 8.326713214046322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55810 + }, + { + "epoch": 0.2707169124110513, + "grad_norm": 9.159947694570292e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55820 + }, + { + "epoch": 0.27076541060388737, + "grad_norm": 8.806634468783159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55830 + }, + { + "epoch": 0.27081390879672346, + "grad_norm": 8.735214578337036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55840 + }, + { + "epoch": 0.27086240698955955, + "grad_norm": 3.0246808364609024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55850 + }, + { + "epoch": 0.27091090518239563, + "grad_norm": 8.289192919619381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55860 + }, + { + "epoch": 0.2709594033752317, + "grad_norm": 7.5671177910408005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55870 + }, + { + "epoch": 0.2710079015680678, + "grad_norm": 6.564812792930752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55880 + }, + { + "epoch": 0.2710563997609039, + "grad_norm": 2.9763132260995917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55890 + }, + { + "epoch": 0.27110489795374, + "grad_norm": 2.907328052970115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55900 + }, + { + "epoch": 0.2711533961465761, + "grad_norm": 8.255572538473643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55910 + }, + { + "epoch": 0.27120189433941216, + "grad_norm": 0.001946752774529159, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55920 + }, + { + "epoch": 0.27125039253224825, + "grad_norm": 8.175715265679173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55930 + }, + { + "epoch": 0.27129889072508434, + "grad_norm": 3.1693784876551945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55940 + }, + { + "epoch": 0.27134738891792043, + "grad_norm": 3.5319994822202716e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55950 + }, + { + "epoch": 0.2713958871107565, + "grad_norm": 6.765413672837894e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55960 + }, + { + "epoch": 0.2714443853035926, + "grad_norm": 5.705839612346608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55970 + }, + { + "epoch": 0.2714928834964287, + "grad_norm": 6.191582997416845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55980 + }, + { + "epoch": 0.2715413816892648, + "grad_norm": 3.581487362680491e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 55990 + }, + { + "epoch": 0.27158987988210087, + "grad_norm": 2.7874066290678456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56000 + }, + { + "epoch": 0.27163837807493696, + "grad_norm": 6.3888610384310596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56010 + }, + { + "epoch": 0.27168687626777305, + "grad_norm": 5.88077546126442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56020 + }, + { + "epoch": 0.27173537446060914, + "grad_norm": 6.735756414855132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56030 + }, + { + "epoch": 0.2717838726534452, + "grad_norm": 7.327782441279851e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56040 + }, + { + "epoch": 0.27183237084628137, + "grad_norm": 2.6038821943075163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56050 + }, + { + "epoch": 0.27188086903911746, + "grad_norm": 5.388598765421193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56060 + }, + { + "epoch": 0.27192936723195354, + "grad_norm": 5.635878551402129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56070 + }, + { + "epoch": 0.27197786542478963, + "grad_norm": 4.6827558435325045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56080 + }, + { + "epoch": 0.2720263636176257, + "grad_norm": 2.7152750590175856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56090 + }, + { + "epoch": 0.2720748618104618, + "grad_norm": 2.5282813567173434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56100 + }, + { + "epoch": 0.2721233600032979, + "grad_norm": 5.060473995399661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56110 + }, + { + "epoch": 0.272171858196134, + "grad_norm": 5.138988854014315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56120 + }, + { + "epoch": 0.2722203563889701, + "grad_norm": 4.848192475037649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56130 + }, + { + "epoch": 0.27226885458180616, + "grad_norm": 2.3452387267752783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56140 + }, + { + "epoch": 0.27231735277464225, + "grad_norm": 2.343691448913887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56150 + }, + { + "epoch": 0.27236585096747834, + "grad_norm": 4.334896402724553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56160 + }, + { + "epoch": 0.27241434916031443, + "grad_norm": 4.766722668136936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56170 + }, + { + "epoch": 0.2724628473531505, + "grad_norm": 4.864487891609315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56180 + }, + { + "epoch": 0.2725113455459866, + "grad_norm": 1.9430090105743147e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 56190 + }, + { + "epoch": 0.2725598437388227, + "grad_norm": 6.757371920684818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56200 + }, + { + "epoch": 0.2726083419316588, + "grad_norm": 9.498855433776043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56210 + }, + { + "epoch": 0.27265684012449487, + "grad_norm": 8.36003073345637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56220 + }, + { + "epoch": 0.27270533831733096, + "grad_norm": 5.768396931671305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56230 + }, + { + "epoch": 0.27275383651016705, + "grad_norm": 2.1235741769487504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56240 + }, + { + "epoch": 0.27280233470300314, + "grad_norm": 1.91290178008785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56250 + }, + { + "epoch": 0.2728508328958392, + "grad_norm": 5.045636498834938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56260 + }, + { + "epoch": 0.2728993310886753, + "grad_norm": 4.951194114255486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56270 + }, + { + "epoch": 0.2729478292815114, + "grad_norm": 5.3452595238923095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56280 + }, + { + "epoch": 0.2729963274743475, + "grad_norm": 1.929346126416931e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56290 + }, + { + "epoch": 0.2730448256671836, + "grad_norm": 1.899043354569585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56300 + }, + { + "epoch": 0.27309332386001967, + "grad_norm": 4.14407304560882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56310 + }, + { + "epoch": 0.27314182205285575, + "grad_norm": 4.268473276169971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56320 + }, + { + "epoch": 0.27319032024569184, + "grad_norm": 6.475990085164085e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56330 + }, + { + "epoch": 0.27323881843852793, + "grad_norm": 1.882435753941536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56340 + }, + { + "epoch": 0.273287316631364, + "grad_norm": 1.657709958635678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56350 + }, + { + "epoch": 0.2733358148242001, + "grad_norm": 4.305652055336395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56360 + }, + { + "epoch": 0.2733843130170362, + "grad_norm": 4.457831892068498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56370 + }, + { + "epoch": 0.2734328112098723, + "grad_norm": 3.5488387766235974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56380 + }, + { + "epoch": 0.2734813094027084, + "grad_norm": 1.706229340925347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56390 + }, + { + "epoch": 0.27352980759554446, + "grad_norm": 1.6902977222343907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56400 + }, + { + "epoch": 0.27357830578838055, + "grad_norm": 3.66516837857489e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56410 + }, + { + "epoch": 0.27362680398121664, + "grad_norm": 3.8800499169155955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56420 + }, + { + "epoch": 0.2736753021740527, + "grad_norm": 4.104414074390661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56430 + }, + { + "epoch": 0.2737238003668888, + "grad_norm": 1.5354938796008355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56440 + }, + { + "epoch": 0.2737722985597249, + "grad_norm": 1.7440008832636522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56450 + }, + { + "epoch": 0.273820796752561, + "grad_norm": 3.7829915982001694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56460 + }, + { + "epoch": 0.2738692949453971, + "grad_norm": 3.7235515719657997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56470 + }, + { + "epoch": 0.27391779313823317, + "grad_norm": 3.8848766052979045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56480 + }, + { + "epoch": 0.27396629133106926, + "grad_norm": 1.5328870404118788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56490 + }, + { + "epoch": 0.27401478952390534, + "grad_norm": 1.7662741811363958e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56500 + }, + { + "epoch": 0.27406328771674143, + "grad_norm": 3.4365268675173866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56510 + }, + { + "epoch": 0.2741117859095775, + "grad_norm": 3.531806896717171e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56520 + }, + { + "epoch": 0.2741602841024136, + "grad_norm": 3.088514631599537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56530 + }, + { + "epoch": 0.2742087822952497, + "grad_norm": 1.6338926798198372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56540 + }, + { + "epoch": 0.2742572804880858, + "grad_norm": 1.5044729479996022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56550 + }, + { + "epoch": 0.2743057786809219, + "grad_norm": 2.965394969578483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56560 + }, + { + "epoch": 0.274354276873758, + "grad_norm": 2.990759185195202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56570 + }, + { + "epoch": 0.2744027750665941, + "grad_norm": 3.2919460863922723e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56580 + }, + { + "epoch": 0.2744512732594302, + "grad_norm": 1.4081401786825154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56590 + }, + { + "epoch": 0.2744997714522663, + "grad_norm": 1.605566239959444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56600 + }, + { + "epoch": 0.27454826964510237, + "grad_norm": 2.7760977445723256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56610 + }, + { + "epoch": 0.27459676783793846, + "grad_norm": 3.0093906389083713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56620 + }, + { + "epoch": 0.27464526603077455, + "grad_norm": 2.847271844075294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56630 + }, + { + "epoch": 0.27469376422361064, + "grad_norm": 1.3548528841056395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56640 + }, + { + "epoch": 0.2747422624164467, + "grad_norm": 1.4215150940799504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56650 + }, + { + "epoch": 0.2747907606092828, + "grad_norm": 2.7863488867296837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56660 + }, + { + "epoch": 0.2748392588021189, + "grad_norm": 7.819536222086754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56670 + }, + { + "epoch": 0.274887756994955, + "grad_norm": 8.177187737601344e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 56680 + }, + { + "epoch": 0.2749362551877911, + "grad_norm": 0.0016768660861998796, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 56690 + }, + { + "epoch": 0.27498475338062717, + "grad_norm": 9.485494229011238e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 56700 + }, + { + "epoch": 0.27503325157346326, + "grad_norm": 5.63705871172715e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56710 + }, + { + "epoch": 0.27508174976629934, + "grad_norm": 1.1791595170507208e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56720 + }, + { + "epoch": 0.27513024795913543, + "grad_norm": 6.346921054500854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56730 + }, + { + "epoch": 0.2751787461519715, + "grad_norm": 1.4474749150394928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56740 + }, + { + "epoch": 0.2752272443448076, + "grad_norm": 1.3171645605325466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56750 + }, + { + "epoch": 0.2752757425376437, + "grad_norm": 5.179932941246079e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56760 + }, + { + "epoch": 0.2753242407304798, + "grad_norm": 4.617085323843639e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56770 + }, + { + "epoch": 0.2753727389233159, + "grad_norm": 5.0957200983248185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56780 + }, + { + "epoch": 0.27542123711615196, + "grad_norm": 1.1822961596408277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56790 + }, + { + "epoch": 0.27546973530898805, + "grad_norm": 1.3273726153784082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56800 + }, + { + "epoch": 0.27551823350182414, + "grad_norm": 4.96361053592409e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56810 + }, + { + "epoch": 0.2755667316946602, + "grad_norm": 4.847469881497091e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56820 + }, + { + "epoch": 0.2756152298874963, + "grad_norm": 5.051570497016655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56830 + }, + { + "epoch": 0.2756637280803324, + "grad_norm": 1.2125198054491193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56840 + }, + { + "epoch": 0.2757122262731685, + "grad_norm": 1.2698494629148627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56850 + }, + { + "epoch": 0.2757607244660046, + "grad_norm": 3.7623576645273715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56860 + }, + { + "epoch": 0.27580922265884067, + "grad_norm": 4.069308943144279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56870 + }, + { + "epoch": 0.27585772085167676, + "grad_norm": 4.060231731273234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56880 + }, + { + "epoch": 0.27590621904451285, + "grad_norm": 1.1180861747561721e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56890 + }, + { + "epoch": 0.27595471723734893, + "grad_norm": 1.1067770628869766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56900 + }, + { + "epoch": 0.276003215430185, + "grad_norm": 3.93139225707273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56910 + }, + { + "epoch": 0.2760517136230211, + "grad_norm": 4.288106083549792e-06, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 56920 + }, + { + "epoch": 0.2761002118158572, + "grad_norm": 0.00012692995369434357, + "learning_rate": 0.0002, + "loss": 0.0041, + "step": 56930 + }, + { + "epoch": 0.2761487100086933, + "grad_norm": 0.0001688401389401406, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 56940 + }, + { + "epoch": 0.2761972082015294, + "grad_norm": 3.388931509107351e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56950 + }, + { + "epoch": 0.27624570639436546, + "grad_norm": 0.0024601200129836798, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 56960 + }, + { + "epoch": 0.27629420458720155, + "grad_norm": 4.2002055124612525e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56970 + }, + { + "epoch": 0.27634270278003764, + "grad_norm": 1.683063237578608e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56980 + }, + { + "epoch": 0.27639120097287373, + "grad_norm": 1.0640961590979714e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 56990 + }, + { + "epoch": 0.2764396991657098, + "grad_norm": 9.925511221808847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57000 + }, + { + "epoch": 0.2764881973585459, + "grad_norm": 1.6403735571657307e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57010 + }, + { + "epoch": 0.276536695551382, + "grad_norm": 1.4280657524068374e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57020 + }, + { + "epoch": 0.2765851937442181, + "grad_norm": 0.00011577830446185544, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 57030 + }, + { + "epoch": 0.27663369193705417, + "grad_norm": 2.0525538275251165e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57040 + }, + { + "epoch": 0.27668219012989026, + "grad_norm": 2.0624112949008122e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57050 + }, + { + "epoch": 0.27673068832272635, + "grad_norm": 7.760830339975655e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57060 + }, + { + "epoch": 0.27677918651556244, + "grad_norm": 6.147813110146672e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57070 + }, + { + "epoch": 0.2768276847083986, + "grad_norm": 3.6064055166207254e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57080 + }, + { + "epoch": 0.27687618290123467, + "grad_norm": 8.752860594540834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57090 + }, + { + "epoch": 0.27692468109407076, + "grad_norm": 7.667830686841626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57100 + }, + { + "epoch": 0.27697317928690685, + "grad_norm": 2.154021422029473e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57110 + }, + { + "epoch": 0.27702167747974293, + "grad_norm": 0.00020712010154966265, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 57120 + }, + { + "epoch": 0.277070175672579, + "grad_norm": 0.00021012318029534072, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57130 + }, + { + "epoch": 0.2771186738654151, + "grad_norm": 6.138550088508055e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57140 + }, + { + "epoch": 0.2771671720582512, + "grad_norm": 3.946850119973533e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57150 + }, + { + "epoch": 0.2772156702510873, + "grad_norm": 5.453067569760606e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57160 + }, + { + "epoch": 0.2772641684439234, + "grad_norm": 4.319191430113278e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57170 + }, + { + "epoch": 0.27731266663675946, + "grad_norm": 3.4128235711250454e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57180 + }, + { + "epoch": 0.27736116482959555, + "grad_norm": 1.7735788787831552e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57190 + }, + { + "epoch": 0.27740966302243164, + "grad_norm": 3.381228816579096e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57200 + }, + { + "epoch": 0.27745816121526773, + "grad_norm": 2.5120401915046386e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57210 + }, + { + "epoch": 0.2775066594081038, + "grad_norm": 2.743861659837421e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57220 + }, + { + "epoch": 0.2775551576009399, + "grad_norm": 0.5584098100662231, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 57230 + }, + { + "epoch": 0.277603655793776, + "grad_norm": 1.6426807633251883e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57240 + }, + { + "epoch": 0.2776521539866121, + "grad_norm": 0.00019156644702889025, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57250 + }, + { + "epoch": 0.27770065217944817, + "grad_norm": 6.861313158879057e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57260 + }, + { + "epoch": 0.27774915037228426, + "grad_norm": 0.015476185828447342, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 57270 + }, + { + "epoch": 0.27779764856512035, + "grad_norm": 0.0038169275503605604, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57280 + }, + { + "epoch": 0.27784614675795644, + "grad_norm": 0.00010095578181790188, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57290 + }, + { + "epoch": 0.2778946449507925, + "grad_norm": 2.7936166588915512e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57300 + }, + { + "epoch": 0.2779431431436286, + "grad_norm": 3.0464756491710432e-05, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 57310 + }, + { + "epoch": 0.2779916413364647, + "grad_norm": 0.01809871196746826, + "learning_rate": 0.0002, + "loss": 0.0064, + "step": 57320 + }, + { + "epoch": 0.2780401395293008, + "grad_norm": 0.0005478117382153869, + "learning_rate": 0.0002, + "loss": 0.0063, + "step": 57330 + }, + { + "epoch": 0.2780886377221369, + "grad_norm": 0.005725848022848368, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 57340 + }, + { + "epoch": 0.27813713591497297, + "grad_norm": 0.0002498992544133216, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57350 + }, + { + "epoch": 0.27818563410780905, + "grad_norm": 8.213253750000149e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57360 + }, + { + "epoch": 0.27823413230064514, + "grad_norm": 5.75210724491626e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57370 + }, + { + "epoch": 0.27828263049348123, + "grad_norm": 4.125719715375453e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57380 + }, + { + "epoch": 0.2783311286863173, + "grad_norm": 9.798995597520843e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57390 + }, + { + "epoch": 0.2783796268791534, + "grad_norm": 2.3049578885547817e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57400 + }, + { + "epoch": 0.2784281250719895, + "grad_norm": 2.2134063328849152e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57410 + }, + { + "epoch": 0.2784766232648256, + "grad_norm": 1.7847421986516565e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57420 + }, + { + "epoch": 0.2785251214576617, + "grad_norm": 1.9806286218226887e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57430 + }, + { + "epoch": 0.27857361965049776, + "grad_norm": 2.1031080905231647e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57440 + }, + { + "epoch": 0.27862211784333385, + "grad_norm": 2.0430055883480236e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57450 + }, + { + "epoch": 0.27867061603616994, + "grad_norm": 0.00016611849423497915, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57460 + }, + { + "epoch": 0.278719114229006, + "grad_norm": 1.9147024431731552e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57470 + }, + { + "epoch": 0.2787676124218421, + "grad_norm": 3.363339419593103e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57480 + }, + { + "epoch": 0.2788161106146782, + "grad_norm": 1.6834901543916203e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57490 + }, + { + "epoch": 0.2788646088075143, + "grad_norm": 1.6221971236518584e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57500 + }, + { + "epoch": 0.2789131070003504, + "grad_norm": 1.2882805094704963e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57510 + }, + { + "epoch": 0.27896160519318647, + "grad_norm": 1.829303801059723e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57520 + }, + { + "epoch": 0.27901010338602256, + "grad_norm": 3.380851921974681e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57530 + }, + { + "epoch": 0.27905860157885864, + "grad_norm": 1.4249079868022818e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57540 + }, + { + "epoch": 0.27910709977169473, + "grad_norm": 1.5296012861654162e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57550 + }, + { + "epoch": 0.2791555979645308, + "grad_norm": 1.286001679545734e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57560 + }, + { + "epoch": 0.2792040961573669, + "grad_norm": 1.1768194781325292e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57570 + }, + { + "epoch": 0.279252594350203, + "grad_norm": 1.4699120583827607e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 57580 + }, + { + "epoch": 0.27930109254303914, + "grad_norm": 8.816415356704965e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57590 + }, + { + "epoch": 0.27934959073587523, + "grad_norm": 0.00018001010175794363, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57600 + }, + { + "epoch": 0.2793980889287113, + "grad_norm": 0.00032803392969071865, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57610 + }, + { + "epoch": 0.2794465871215474, + "grad_norm": 5.6098095228662714e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 57620 + }, + { + "epoch": 0.2794950853143835, + "grad_norm": 8.327257091877982e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 57630 + }, + { + "epoch": 0.2795435835072196, + "grad_norm": 0.0001582786935614422, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 57640 + }, + { + "epoch": 0.2795920817000557, + "grad_norm": 0.00011425010598031804, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57650 + }, + { + "epoch": 0.27964057989289176, + "grad_norm": 4.998792428523302e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57660 + }, + { + "epoch": 0.27968907808572785, + "grad_norm": 5.775947283837013e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57670 + }, + { + "epoch": 0.27973757627856394, + "grad_norm": 3.3361571695422754e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57680 + }, + { + "epoch": 0.2797860744714, + "grad_norm": 5.403482646215707e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57690 + }, + { + "epoch": 0.2798345726642361, + "grad_norm": 4.3445870687719434e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57700 + }, + { + "epoch": 0.2798830708570722, + "grad_norm": 2.4769622541498393e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57710 + }, + { + "epoch": 0.2799315690499083, + "grad_norm": 2.0975840016035363e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57720 + }, + { + "epoch": 0.2799800672427444, + "grad_norm": 2.008835508604534e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57730 + }, + { + "epoch": 0.28002856543558047, + "grad_norm": 2.918304380727932e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57740 + }, + { + "epoch": 0.28007706362841656, + "grad_norm": 2.637126272020396e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57750 + }, + { + "epoch": 0.28012556182125264, + "grad_norm": 1.8209711925010197e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57760 + }, + { + "epoch": 0.28017406001408873, + "grad_norm": 1.6527663319720887e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57770 + }, + { + "epoch": 0.2802225582069248, + "grad_norm": 1.4164214917400386e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57780 + }, + { + "epoch": 0.2802710563997609, + "grad_norm": 2.2381058442988433e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57790 + }, + { + "epoch": 0.280319554592597, + "grad_norm": 2.0669955119956285e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57800 + }, + { + "epoch": 0.2803680527854331, + "grad_norm": 1.3386029422690626e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57810 + }, + { + "epoch": 0.2804165509782692, + "grad_norm": 1.4296960216597654e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57820 + }, + { + "epoch": 0.28046504917110526, + "grad_norm": 1.387357224302832e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57830 + }, + { + "epoch": 0.28051354736394135, + "grad_norm": 2.0104660507058725e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57840 + }, + { + "epoch": 0.28056204555677744, + "grad_norm": 1.8032415027846582e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57850 + }, + { + "epoch": 0.28061054374961353, + "grad_norm": 1.4107215974945575e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57860 + }, + { + "epoch": 0.2806590419424496, + "grad_norm": 1.0335051229048986e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57870 + }, + { + "epoch": 0.2807075401352857, + "grad_norm": 9.986162694985978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57880 + }, + { + "epoch": 0.2807560383281218, + "grad_norm": 1.5703850294812582e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57890 + }, + { + "epoch": 0.2808045365209579, + "grad_norm": 1.445426187274279e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57900 + }, + { + "epoch": 0.28085303471379397, + "grad_norm": 1.0548132195253856e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57910 + }, + { + "epoch": 0.28090153290663006, + "grad_norm": 9.272057468479034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57920 + }, + { + "epoch": 0.28095003109946615, + "grad_norm": 8.83606026036432e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57930 + }, + { + "epoch": 0.28099852929230223, + "grad_norm": 1.2665394024224952e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57940 + }, + { + "epoch": 0.2810470274851383, + "grad_norm": 1.2294827683945186e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57950 + }, + { + "epoch": 0.2810955256779744, + "grad_norm": 9.23742845770903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57960 + }, + { + "epoch": 0.2811440238708105, + "grad_norm": 7.340102911257418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57970 + }, + { + "epoch": 0.2811925220636466, + "grad_norm": 7.825547072570771e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57980 + }, + { + "epoch": 0.2812410202564827, + "grad_norm": 1.1971885214734357e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 57990 + }, + { + "epoch": 0.28128951844931877, + "grad_norm": 1.136597893491853e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58000 + }, + { + "epoch": 0.28133801664215485, + "grad_norm": 7.194551926659187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58010 + }, + { + "epoch": 0.28138651483499094, + "grad_norm": 7.200317213573726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58020 + }, + { + "epoch": 0.28143501302782703, + "grad_norm": 7.109960733941989e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58030 + }, + { + "epoch": 0.2814835112206631, + "grad_norm": 1.0398824088042602e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58040 + }, + { + "epoch": 0.2815320094134992, + "grad_norm": 1.0285163625667337e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58050 + }, + { + "epoch": 0.2815805076063353, + "grad_norm": 7.481794455088675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58060 + }, + { + "epoch": 0.2816290057991714, + "grad_norm": 6.852699698356446e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58070 + }, + { + "epoch": 0.28167750399200747, + "grad_norm": 6.3135294112726115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58080 + }, + { + "epoch": 0.28172600218484356, + "grad_norm": 9.549467904435005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58090 + }, + { + "epoch": 0.2817745003776797, + "grad_norm": 9.63476031756727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58100 + }, + { + "epoch": 0.2818229985705158, + "grad_norm": 6.508917067549191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58110 + }, + { + "epoch": 0.2818714967633519, + "grad_norm": 6.11673522143974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58120 + }, + { + "epoch": 0.28191999495618797, + "grad_norm": 5.86213855058304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58130 + }, + { + "epoch": 0.28196849314902406, + "grad_norm": 8.120783604681492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58140 + }, + { + "epoch": 0.28201699134186015, + "grad_norm": 7.990162885107566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58150 + }, + { + "epoch": 0.28206548953469623, + "grad_norm": 5.35223625774961e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58160 + }, + { + "epoch": 0.2821139877275323, + "grad_norm": 6.043604571459582e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58170 + }, + { + "epoch": 0.2821624859203684, + "grad_norm": 6.172546818561386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58180 + }, + { + "epoch": 0.2822109841132045, + "grad_norm": 7.673627806070726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58190 + }, + { + "epoch": 0.2822594823060406, + "grad_norm": 7.703869414399378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58200 + }, + { + "epoch": 0.2823079804988767, + "grad_norm": 5.338011760613881e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58210 + }, + { + "epoch": 0.28235647869171276, + "grad_norm": 5.886470717086922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58220 + }, + { + "epoch": 0.28240497688454885, + "grad_norm": 7.27120959709282e-06, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 58230 + }, + { + "epoch": 0.28245347507738494, + "grad_norm": 8.822037671052385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58240 + }, + { + "epoch": 0.28250197327022103, + "grad_norm": 8.122513463604264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58250 + }, + { + "epoch": 0.2825504714630571, + "grad_norm": 8.434220944764093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58260 + }, + { + "epoch": 0.2825989696558932, + "grad_norm": 0.041958555579185486, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 58270 + }, + { + "epoch": 0.2826474678487293, + "grad_norm": 0.0005966138560324907, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58280 + }, + { + "epoch": 0.2826959660415654, + "grad_norm": 8.335246093338355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58290 + }, + { + "epoch": 0.28274446423440147, + "grad_norm": 8.226174031733535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58300 + }, + { + "epoch": 0.28279296242723756, + "grad_norm": 1.3840521205565892e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58310 + }, + { + "epoch": 0.28284146062007365, + "grad_norm": 1.0670167284843046e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58320 + }, + { + "epoch": 0.28288995881290974, + "grad_norm": 9.959168892237358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58330 + }, + { + "epoch": 0.2829384570057458, + "grad_norm": 8.953748874773737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58340 + }, + { + "epoch": 0.2829869551985819, + "grad_norm": 7.643508070032112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58350 + }, + { + "epoch": 0.283035453391418, + "grad_norm": 9.202813998854253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58360 + }, + { + "epoch": 0.2830839515842541, + "grad_norm": 8.818349670036696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58370 + }, + { + "epoch": 0.2831324497770902, + "grad_norm": 8.353283192263916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58380 + }, + { + "epoch": 0.28318094796992627, + "grad_norm": 6.652007414231775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58390 + }, + { + "epoch": 0.28322944616276235, + "grad_norm": 4.534962135949172e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58400 + }, + { + "epoch": 0.28327794435559844, + "grad_norm": 1.2320235327933915e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58410 + }, + { + "epoch": 0.28332644254843453, + "grad_norm": 6.844211384304799e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58420 + }, + { + "epoch": 0.2833749407412706, + "grad_norm": 7.1867207225295715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58430 + }, + { + "epoch": 0.2834234389341067, + "grad_norm": 7.0059045356174465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58440 + }, + { + "epoch": 0.2834719371269428, + "grad_norm": 6.449468855862506e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58450 + }, + { + "epoch": 0.2835204353197789, + "grad_norm": 6.2010481087781955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58460 + }, + { + "epoch": 0.283568933512615, + "grad_norm": 6.933327313163318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58470 + }, + { + "epoch": 0.28361743170545106, + "grad_norm": 6.324441528704483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58480 + }, + { + "epoch": 0.28366592989828715, + "grad_norm": 6.186145583342295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58490 + }, + { + "epoch": 0.28371442809112324, + "grad_norm": 6.119735644460889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58500 + }, + { + "epoch": 0.2837629262839593, + "grad_norm": 0.0003146995441056788, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58510 + }, + { + "epoch": 0.2838114244767954, + "grad_norm": 5.650923867506208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58520 + }, + { + "epoch": 0.2838599226696315, + "grad_norm": 4.883716883341549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58530 + }, + { + "epoch": 0.2839084208624676, + "grad_norm": 5.684047664544778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58540 + }, + { + "epoch": 0.2839569190553037, + "grad_norm": 5.706119281967403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58550 + }, + { + "epoch": 0.28400541724813977, + "grad_norm": 6.343692803056911e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58560 + }, + { + "epoch": 0.28405391544097586, + "grad_norm": 5.133314971317304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58570 + }, + { + "epoch": 0.28410241363381195, + "grad_norm": 4.887946033704793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58580 + }, + { + "epoch": 0.28415091182664803, + "grad_norm": 5.537545803235844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58590 + }, + { + "epoch": 0.2841994100194841, + "grad_norm": 5.202497504797066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58600 + }, + { + "epoch": 0.28424790821232027, + "grad_norm": 5.661397153744474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58610 + }, + { + "epoch": 0.28429640640515635, + "grad_norm": 1.5496925698244013e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58620 + }, + { + "epoch": 0.28434490459799244, + "grad_norm": 4.243469447828829e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58630 + }, + { + "epoch": 0.28439340279082853, + "grad_norm": 5.330602562025888e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58640 + }, + { + "epoch": 0.2844419009836646, + "grad_norm": 4.853828613704536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58650 + }, + { + "epoch": 0.2844903991765007, + "grad_norm": 4.523159532254795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58660 + }, + { + "epoch": 0.2845388973693368, + "grad_norm": 4.923688720737118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58670 + }, + { + "epoch": 0.2845873955621729, + "grad_norm": 4.272093519830378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58680 + }, + { + "epoch": 0.284635893755009, + "grad_norm": 4.879461812379304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58690 + }, + { + "epoch": 0.28468439194784506, + "grad_norm": 4.7210064622049686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58700 + }, + { + "epoch": 0.28473289014068115, + "grad_norm": 4.488598733587423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58710 + }, + { + "epoch": 0.28478138833351724, + "grad_norm": 3.971776550315553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58720 + }, + { + "epoch": 0.2848298865263533, + "grad_norm": 3.8100949950603535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58730 + }, + { + "epoch": 0.2848783847191894, + "grad_norm": 4.4478961171989795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58740 + }, + { + "epoch": 0.2849268829120255, + "grad_norm": 4.394520146888681e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58750 + }, + { + "epoch": 0.2849753811048616, + "grad_norm": 4.140586042922223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58760 + }, + { + "epoch": 0.2850238792976977, + "grad_norm": 3.741128921319614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58770 + }, + { + "epoch": 0.28507237749053377, + "grad_norm": 3.4263398447365034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58780 + }, + { + "epoch": 0.28512087568336986, + "grad_norm": 4.33531022281386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58790 + }, + { + "epoch": 0.28516937387620594, + "grad_norm": 0.0004444182850420475, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58800 + }, + { + "epoch": 0.28521787206904203, + "grad_norm": 3.844511866191169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58810 + }, + { + "epoch": 0.2852663702618781, + "grad_norm": 3.773247726712725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58820 + }, + { + "epoch": 0.2853148684547142, + "grad_norm": 3.1665092592447763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58830 + }, + { + "epoch": 0.2853633666475503, + "grad_norm": 4.1106040953309275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58840 + }, + { + "epoch": 0.2854118648403864, + "grad_norm": 3.9702981666778214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58850 + }, + { + "epoch": 0.2854603630332225, + "grad_norm": 3.277168161730515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58860 + }, + { + "epoch": 0.28550886122605856, + "grad_norm": 3.358033382028225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58870 + }, + { + "epoch": 0.28555735941889465, + "grad_norm": 3.2108896448335145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58880 + }, + { + "epoch": 0.28560585761173074, + "grad_norm": 5.2744294407602865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58890 + }, + { + "epoch": 0.28565435580456683, + "grad_norm": 3.660959237095085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58900 + }, + { + "epoch": 0.2857028539974029, + "grad_norm": 3.126013098153635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58910 + }, + { + "epoch": 0.285751352190239, + "grad_norm": 3.5690470667759655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58920 + }, + { + "epoch": 0.2857998503830751, + "grad_norm": 3.1716683679405833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58930 + }, + { + "epoch": 0.2858483485759112, + "grad_norm": 3.9117840060498565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58940 + }, + { + "epoch": 0.28589684676874727, + "grad_norm": 4.970340341969859e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 58950 + }, + { + "epoch": 0.28594534496158336, + "grad_norm": 7.158461812650785e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58960 + }, + { + "epoch": 0.28599384315441945, + "grad_norm": 0.00010388827649876475, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58970 + }, + { + "epoch": 0.28604234134725554, + "grad_norm": 4.861714842263609e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58980 + }, + { + "epoch": 0.2860908395400916, + "grad_norm": 3.729989111889154e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 58990 + }, + { + "epoch": 0.2861393377329277, + "grad_norm": 3.096045838901773e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59000 + }, + { + "epoch": 0.2861878359257638, + "grad_norm": 1.650739250180777e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59010 + }, + { + "epoch": 0.2862363341185999, + "grad_norm": 2.039025457634125e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59020 + }, + { + "epoch": 0.286284832311436, + "grad_norm": 1.819032695493661e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59030 + }, + { + "epoch": 0.28633333050427207, + "grad_norm": 2.0819117708015256e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59040 + }, + { + "epoch": 0.28638182869710815, + "grad_norm": 1.429566509614233e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59050 + }, + { + "epoch": 0.28643032688994424, + "grad_norm": 9.914802831190173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59060 + }, + { + "epoch": 0.28647882508278033, + "grad_norm": 8.454147064185236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59070 + }, + { + "epoch": 0.2865273232756164, + "grad_norm": 8.168925887730438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59080 + }, + { + "epoch": 0.2865758214684525, + "grad_norm": 1.2732334653264843e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59090 + }, + { + "epoch": 0.2866243196612886, + "grad_norm": 1.140466338256374e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59100 + }, + { + "epoch": 0.2866728178541247, + "grad_norm": 6.945956556592137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59110 + }, + { + "epoch": 0.2867213160469608, + "grad_norm": 6.513293101306772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59120 + }, + { + "epoch": 0.2867698142397969, + "grad_norm": 6.0499151004478335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59130 + }, + { + "epoch": 0.286818312432633, + "grad_norm": 9.951138054020703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59140 + }, + { + "epoch": 0.2868668106254691, + "grad_norm": 9.184468581224792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59150 + }, + { + "epoch": 0.2869153088183052, + "grad_norm": 5.879229775018757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59160 + }, + { + "epoch": 0.28696380701114127, + "grad_norm": 5.440711447590729e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59170 + }, + { + "epoch": 0.28701230520397736, + "grad_norm": 5.666445304086665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59180 + }, + { + "epoch": 0.28706080339681345, + "grad_norm": 8.121621249301825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59190 + }, + { + "epoch": 0.28710930158964953, + "grad_norm": 7.5948069024889264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59200 + }, + { + "epoch": 0.2871577997824856, + "grad_norm": 4.631585852621356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59210 + }, + { + "epoch": 0.2872062979753217, + "grad_norm": 6.010692231939174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59220 + }, + { + "epoch": 0.2872547961681578, + "grad_norm": 4.450851520232391e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59230 + }, + { + "epoch": 0.2873032943609939, + "grad_norm": 6.6552911448525265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59240 + }, + { + "epoch": 0.28735179255383, + "grad_norm": 7.094739885360468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59250 + }, + { + "epoch": 0.28740029074666607, + "grad_norm": 3.959213699999964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59260 + }, + { + "epoch": 0.28744878893950215, + "grad_norm": 4.4194143811182585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59270 + }, + { + "epoch": 0.28749728713233824, + "grad_norm": 4.073489435540978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59280 + }, + { + "epoch": 0.28754578532517433, + "grad_norm": 7.046002792776562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59290 + }, + { + "epoch": 0.2875942835180104, + "grad_norm": 6.021928584232228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59300 + }, + { + "epoch": 0.2876427817108465, + "grad_norm": 4.058063041156856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59310 + }, + { + "epoch": 0.2876912799036826, + "grad_norm": 3.932088930014288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59320 + }, + { + "epoch": 0.2877397780965187, + "grad_norm": 4.079457994521363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59330 + }, + { + "epoch": 0.28778827628935477, + "grad_norm": 5.260830675979378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59340 + }, + { + "epoch": 0.28783677448219086, + "grad_norm": 5.230545866652392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59350 + }, + { + "epoch": 0.28788527267502695, + "grad_norm": 3.6207286484568613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59360 + }, + { + "epoch": 0.28793377086786304, + "grad_norm": 3.738142822840018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59370 + }, + { + "epoch": 0.2879822690606991, + "grad_norm": 3.1407582810061285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59380 + }, + { + "epoch": 0.2880307672535352, + "grad_norm": 5.2209747991582844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59390 + }, + { + "epoch": 0.2880792654463713, + "grad_norm": 4.58865042674006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59400 + }, + { + "epoch": 0.2881277636392074, + "grad_norm": 3.138708052574657e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59410 + }, + { + "epoch": 0.2881762618320435, + "grad_norm": 3.1710189887235174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59420 + }, + { + "epoch": 0.28822476002487957, + "grad_norm": 3.1400709303852636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59430 + }, + { + "epoch": 0.28827325821771566, + "grad_norm": 4.513063231570413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59440 + }, + { + "epoch": 0.28832175641055174, + "grad_norm": 4.472222826734651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59450 + }, + { + "epoch": 0.28837025460338783, + "grad_norm": 0.008578493259847164, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 59460 + }, + { + "epoch": 0.2884187527962239, + "grad_norm": 9.86284067039378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59470 + }, + { + "epoch": 0.28846725098906, + "grad_norm": 1.8191900380770676e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59480 + }, + { + "epoch": 0.2885157491818961, + "grad_norm": 3.484353510430083e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59490 + }, + { + "epoch": 0.2885642473747322, + "grad_norm": 3.344931246829219e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59500 + }, + { + "epoch": 0.2886127455675683, + "grad_norm": 2.0942583432770334e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59510 + }, + { + "epoch": 0.28866124376040436, + "grad_norm": 2.0478291844483465e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59520 + }, + { + "epoch": 0.28870974195324045, + "grad_norm": 1.494413663749583e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59530 + }, + { + "epoch": 0.28875824014607654, + "grad_norm": 2.2867727238917723e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59540 + }, + { + "epoch": 0.2888067383389126, + "grad_norm": 1.9970735593233258e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59550 + }, + { + "epoch": 0.2888552365317487, + "grad_norm": 1.2517299182945862e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59560 + }, + { + "epoch": 0.2889037347245848, + "grad_norm": 1.2309073099459056e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59570 + }, + { + "epoch": 0.2889522329174209, + "grad_norm": 1.117517695092829e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59580 + }, + { + "epoch": 0.289000731110257, + "grad_norm": 1.5638819604646415e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59590 + }, + { + "epoch": 0.28904922930309307, + "grad_norm": 1.4079557331569958e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59600 + }, + { + "epoch": 0.28909772749592916, + "grad_norm": 9.132588274951559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59610 + }, + { + "epoch": 0.28914622568876525, + "grad_norm": 8.268979399872478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59620 + }, + { + "epoch": 0.28919472388160133, + "grad_norm": 8.929246178013273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59630 + }, + { + "epoch": 0.2892432220744375, + "grad_norm": 1.185303699458018e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59640 + }, + { + "epoch": 0.28929172026727357, + "grad_norm": 1.1900694516953081e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59650 + }, + { + "epoch": 0.28934021846010965, + "grad_norm": 7.243748768814839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59660 + }, + { + "epoch": 0.28938871665294574, + "grad_norm": 6.946667781448923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59670 + }, + { + "epoch": 0.28943721484578183, + "grad_norm": 7.958540663821623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59680 + }, + { + "epoch": 0.2894857130386179, + "grad_norm": 9.987573321268428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59690 + }, + { + "epoch": 0.289534211231454, + "grad_norm": 9.77805848378921e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59700 + }, + { + "epoch": 0.2895827094242901, + "grad_norm": 6.173764177219709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59710 + }, + { + "epoch": 0.2896312076171262, + "grad_norm": 5.319546744431136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59720 + }, + { + "epoch": 0.2896797058099623, + "grad_norm": 5.483492714120075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59730 + }, + { + "epoch": 0.28972820400279836, + "grad_norm": 8.36699200590374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59740 + }, + { + "epoch": 0.28977670219563445, + "grad_norm": 7.777811333653517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59750 + }, + { + "epoch": 0.28982520038847054, + "grad_norm": 5.125706138642272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59760 + }, + { + "epoch": 0.2898736985813066, + "grad_norm": 5.232294824963901e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59770 + }, + { + "epoch": 0.2899221967741427, + "grad_norm": 4.673639068641933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59780 + }, + { + "epoch": 0.2899706949669788, + "grad_norm": 7.445058145094663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59790 + }, + { + "epoch": 0.2900191931598149, + "grad_norm": 6.949034741410287e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59800 + }, + { + "epoch": 0.290067691352651, + "grad_norm": 4.547041044133948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59810 + }, + { + "epoch": 0.29011618954548707, + "grad_norm": 4.819550667889416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59820 + }, + { + "epoch": 0.29016468773832316, + "grad_norm": 4.248509867466055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59830 + }, + { + "epoch": 0.29021318593115925, + "grad_norm": 6.404242412827443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59840 + }, + { + "epoch": 0.29026168412399533, + "grad_norm": 6.200484676810447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59850 + }, + { + "epoch": 0.2903101823168314, + "grad_norm": 3.818610366579378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59860 + }, + { + "epoch": 0.2903586805096675, + "grad_norm": 3.65132268598245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59870 + }, + { + "epoch": 0.2904071787025036, + "grad_norm": 3.506844677758636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59880 + }, + { + "epoch": 0.2904556768953397, + "grad_norm": 5.709352535632206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59890 + }, + { + "epoch": 0.2905041750881758, + "grad_norm": 5.1997503760503605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59900 + }, + { + "epoch": 0.29055267328101186, + "grad_norm": 3.5408597796049435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59910 + }, + { + "epoch": 0.29060117147384795, + "grad_norm": 3.5342525279702386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59920 + }, + { + "epoch": 0.29064966966668404, + "grad_norm": 3.5098362332064426e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59930 + }, + { + "epoch": 0.29069816785952013, + "grad_norm": 5.027512543165358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59940 + }, + { + "epoch": 0.2907466660523562, + "grad_norm": 4.930903287458932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59950 + }, + { + "epoch": 0.2907951642451923, + "grad_norm": 3.118723498118925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59960 + }, + { + "epoch": 0.2908436624380284, + "grad_norm": 2.9798538889735937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59970 + }, + { + "epoch": 0.2908921606308645, + "grad_norm": 3.320740006529377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59980 + }, + { + "epoch": 0.29094065882370057, + "grad_norm": 4.508045094553381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 59990 + }, + { + "epoch": 0.29098915701653666, + "grad_norm": 4.231903403706383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60000 + }, + { + "epoch": 0.29103765520937275, + "grad_norm": 3.0254313969635405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60010 + }, + { + "epoch": 0.29108615340220884, + "grad_norm": 2.644327196321683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60020 + }, + { + "epoch": 0.2911346515950449, + "grad_norm": 2.72549095825525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60030 + }, + { + "epoch": 0.291183149787881, + "grad_norm": 4.1748658077267464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60040 + }, + { + "epoch": 0.2912316479807171, + "grad_norm": 3.87577392757521e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60050 + }, + { + "epoch": 0.2912801461735532, + "grad_norm": 2.6154225452046376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60060 + }, + { + "epoch": 0.2913286443663893, + "grad_norm": 2.5093695512623526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60070 + }, + { + "epoch": 0.29137714255922537, + "grad_norm": 2.485877985236584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60080 + }, + { + "epoch": 0.29142564075206145, + "grad_norm": 3.7923480249446584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60090 + }, + { + "epoch": 0.29147413894489754, + "grad_norm": 3.548035692801932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60100 + }, + { + "epoch": 0.29152263713773363, + "grad_norm": 2.257816277051461e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60110 + }, + { + "epoch": 0.2915711353305697, + "grad_norm": 2.4447476789646316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60120 + }, + { + "epoch": 0.2916196335234058, + "grad_norm": 2.3285849692911142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60130 + }, + { + "epoch": 0.2916681317162419, + "grad_norm": 3.6446449485083576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60140 + }, + { + "epoch": 0.29171662990907804, + "grad_norm": 3.4242484616697766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60150 + }, + { + "epoch": 0.29176512810191413, + "grad_norm": 2.0022175704070833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60160 + }, + { + "epoch": 0.2918136262947502, + "grad_norm": 1.945351868926082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60170 + }, + { + "epoch": 0.2918621244875863, + "grad_norm": 2.1391338123066816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60180 + }, + { + "epoch": 0.2919106226804224, + "grad_norm": 3.216122877347516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60190 + }, + { + "epoch": 0.2919591208732585, + "grad_norm": 3.221818587917369e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60200 + }, + { + "epoch": 0.29200761906609457, + "grad_norm": 2.020987722062273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60210 + }, + { + "epoch": 0.29205611725893066, + "grad_norm": 2.02212504518684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60220 + }, + { + "epoch": 0.29210461545176675, + "grad_norm": 2.0686550215032184e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60230 + }, + { + "epoch": 0.29215311364460284, + "grad_norm": 3.128281605313532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60240 + }, + { + "epoch": 0.2922016118374389, + "grad_norm": 2.9290979455254273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60250 + }, + { + "epoch": 0.292250110030275, + "grad_norm": 2.0089123609068338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60260 + }, + { + "epoch": 0.2922986082231111, + "grad_norm": 1.9077258457400603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60270 + }, + { + "epoch": 0.2923471064159472, + "grad_norm": 1.9588551367633045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60280 + }, + { + "epoch": 0.2923956046087833, + "grad_norm": 2.816377900671796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60290 + }, + { + "epoch": 0.29244410280161937, + "grad_norm": 2.892100837925682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60300 + }, + { + "epoch": 0.29249260099445545, + "grad_norm": 1.909456614157534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60310 + }, + { + "epoch": 0.29254109918729154, + "grad_norm": 1.6939277429628419e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60320 + }, + { + "epoch": 0.29258959738012763, + "grad_norm": 1.7338276165901334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60330 + }, + { + "epoch": 0.2926380955729637, + "grad_norm": 2.585648871900048e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60340 + }, + { + "epoch": 0.2926865937657998, + "grad_norm": 2.95685208584473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60350 + }, + { + "epoch": 0.2927350919586359, + "grad_norm": 1.6241069715761114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60360 + }, + { + "epoch": 0.292783590151472, + "grad_norm": 0.009374492801725864, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 60370 + }, + { + "epoch": 0.2928320883443081, + "grad_norm": 0.00011540666309883818, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60380 + }, + { + "epoch": 0.29288058653714416, + "grad_norm": 0.00018477815319783986, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60390 + }, + { + "epoch": 0.29292908472998025, + "grad_norm": 8.828443242236972e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60400 + }, + { + "epoch": 0.29297758292281634, + "grad_norm": 6.454324466176331e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60410 + }, + { + "epoch": 0.2930260811156524, + "grad_norm": 3.3780772355385125e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60420 + }, + { + "epoch": 0.2930745793084885, + "grad_norm": 2.3352678908850066e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60430 + }, + { + "epoch": 0.2931230775013246, + "grad_norm": 1.4076404113438912e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60440 + }, + { + "epoch": 0.2931715756941607, + "grad_norm": 1.278767467738362e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60450 + }, + { + "epoch": 0.2932200738869968, + "grad_norm": 1.4026853023096919e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60460 + }, + { + "epoch": 0.29326857207983287, + "grad_norm": 1.3138112080923747e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60470 + }, + { + "epoch": 0.29331707027266896, + "grad_norm": 1.17389581646421e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60480 + }, + { + "epoch": 0.29336556846550504, + "grad_norm": 8.540078852092847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60490 + }, + { + "epoch": 0.29341406665834113, + "grad_norm": 8.394397809752263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60500 + }, + { + "epoch": 0.2934625648511772, + "grad_norm": 9.571482223691419e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60510 + }, + { + "epoch": 0.2935110630440133, + "grad_norm": 9.24829419091111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60520 + }, + { + "epoch": 0.2935595612368494, + "grad_norm": 8.671941941429395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60530 + }, + { + "epoch": 0.2936080594296855, + "grad_norm": 6.2138728935678955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60540 + }, + { + "epoch": 0.2936565576225216, + "grad_norm": 6.011709501763107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60550 + }, + { + "epoch": 0.29370505581535766, + "grad_norm": 7.267222372320248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60560 + }, + { + "epoch": 0.29375355400819375, + "grad_norm": 6.792836302338401e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60570 + }, + { + "epoch": 0.29380205220102984, + "grad_norm": 6.485145604528952e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60580 + }, + { + "epoch": 0.29385055039386593, + "grad_norm": 5.186482667340897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60590 + }, + { + "epoch": 0.293899048586702, + "grad_norm": 4.793704192707082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60600 + }, + { + "epoch": 0.2939475467795381, + "grad_norm": 5.394361323851626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60610 + }, + { + "epoch": 0.2939960449723742, + "grad_norm": 5.316396254784195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60620 + }, + { + "epoch": 0.2940445431652103, + "grad_norm": 5.623345714411698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60630 + }, + { + "epoch": 0.29409304135804637, + "grad_norm": 4.03433932660846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60640 + }, + { + "epoch": 0.29414153955088246, + "grad_norm": 3.984377144661266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60650 + }, + { + "epoch": 0.2941900377437186, + "grad_norm": 4.748242190544261e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60660 + }, + { + "epoch": 0.2942385359365547, + "grad_norm": 4.090767561137909e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60670 + }, + { + "epoch": 0.2942870341293908, + "grad_norm": 4.158957835898036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60680 + }, + { + "epoch": 0.29433553232222687, + "grad_norm": 3.4142144613724668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60690 + }, + { + "epoch": 0.29438403051506296, + "grad_norm": 3.343089019836043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60700 + }, + { + "epoch": 0.29443252870789904, + "grad_norm": 3.834670678770635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60710 + }, + { + "epoch": 0.29448102690073513, + "grad_norm": 3.758630100492155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60720 + }, + { + "epoch": 0.2945295250935712, + "grad_norm": 3.63937260772218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60730 + }, + { + "epoch": 0.2945780232864073, + "grad_norm": 3.0049041015445255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60740 + }, + { + "epoch": 0.2946265214792434, + "grad_norm": 2.936283181043109e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60750 + }, + { + "epoch": 0.2946750196720795, + "grad_norm": 3.185857622156618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60760 + }, + { + "epoch": 0.2947235178649156, + "grad_norm": 3.2903319606703008e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60770 + }, + { + "epoch": 0.29477201605775166, + "grad_norm": 3.343916887388332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60780 + }, + { + "epoch": 0.29482051425058775, + "grad_norm": 2.8404806471371558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60790 + }, + { + "epoch": 0.29486901244342384, + "grad_norm": 2.584414460216067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60800 + }, + { + "epoch": 0.2949175106362599, + "grad_norm": 2.9243590233818395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60810 + }, + { + "epoch": 0.294966008829096, + "grad_norm": 2.817299673552043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60820 + }, + { + "epoch": 0.2950145070219321, + "grad_norm": 2.700917093534372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60830 + }, + { + "epoch": 0.2950630052147682, + "grad_norm": 2.4853411559888627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60840 + }, + { + "epoch": 0.2951115034076043, + "grad_norm": 2.364615738770226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60850 + }, + { + "epoch": 0.29516000160044037, + "grad_norm": 2.7239887003815966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60860 + }, + { + "epoch": 0.29520849979327646, + "grad_norm": 2.694722297746921e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60870 + }, + { + "epoch": 0.29525699798611255, + "grad_norm": 2.5773813376872567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60880 + }, + { + "epoch": 0.29530549617894863, + "grad_norm": 2.1776804715045728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60890 + }, + { + "epoch": 0.2953539943717847, + "grad_norm": 2.0391494217619766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60900 + }, + { + "epoch": 0.2954024925646208, + "grad_norm": 2.281160959682893e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60910 + }, + { + "epoch": 0.2954509907574569, + "grad_norm": 2.1866514998691855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60920 + }, + { + "epoch": 0.295499488950293, + "grad_norm": 2.3473401142837247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60930 + }, + { + "epoch": 0.2955479871431291, + "grad_norm": 1.910881564981537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60940 + }, + { + "epoch": 0.29559648533596516, + "grad_norm": 1.9366104879736667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60950 + }, + { + "epoch": 0.29564498352880125, + "grad_norm": 2.1769683371530846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60960 + }, + { + "epoch": 0.29569348172163734, + "grad_norm": 2.1807675238960655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60970 + }, + { + "epoch": 0.29574197991447343, + "grad_norm": 2.1209498299867846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60980 + }, + { + "epoch": 0.2957904781073095, + "grad_norm": 1.943490815392579e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 60990 + }, + { + "epoch": 0.2958389763001456, + "grad_norm": 1.865898298092361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61000 + }, + { + "epoch": 0.2958874744929817, + "grad_norm": 2.0002228211524198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61010 + }, + { + "epoch": 0.2959359726858178, + "grad_norm": 2.0337506612122525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61020 + }, + { + "epoch": 0.29598447087865387, + "grad_norm": 1.986604047488072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61030 + }, + { + "epoch": 0.29603296907148996, + "grad_norm": 1.688848669800791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61040 + }, + { + "epoch": 0.29608146726432605, + "grad_norm": 1.8091303672918002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61050 + }, + { + "epoch": 0.29612996545716214, + "grad_norm": 1.9582175809773616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61060 + }, + { + "epoch": 0.2961784636499982, + "grad_norm": 1.7956780311578768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61070 + }, + { + "epoch": 0.2962269618428343, + "grad_norm": 1.7732829746819334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61080 + }, + { + "epoch": 0.2962754600356704, + "grad_norm": 1.6284332104987698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61090 + }, + { + "epoch": 0.2963239582285065, + "grad_norm": 1.6739207922000787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61100 + }, + { + "epoch": 0.2963724564213426, + "grad_norm": 1.6948179109022021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61110 + }, + { + "epoch": 0.29642095461417867, + "grad_norm": 1.7364042150802561e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61120 + }, + { + "epoch": 0.29646945280701476, + "grad_norm": 1.6093517842818983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61130 + }, + { + "epoch": 0.29651795099985084, + "grad_norm": 1.5359318012997392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61140 + }, + { + "epoch": 0.29656644919268693, + "grad_norm": 1.4434654076467268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61150 + }, + { + "epoch": 0.296614947385523, + "grad_norm": 1.6657226069582975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61160 + }, + { + "epoch": 0.29666344557835916, + "grad_norm": 1.6594640328548849e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61170 + }, + { + "epoch": 0.29671194377119525, + "grad_norm": 1.61982279678341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61180 + }, + { + "epoch": 0.29676044196403134, + "grad_norm": 1.3712492545892019e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61190 + }, + { + "epoch": 0.29680894015686743, + "grad_norm": 1.3874323485651985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61200 + }, + { + "epoch": 0.2968574383497035, + "grad_norm": 1.4725945902682724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61210 + }, + { + "epoch": 0.2969059365425396, + "grad_norm": 1.4433778687816812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61220 + }, + { + "epoch": 0.2969544347353757, + "grad_norm": 1.4383949746843427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61230 + }, + { + "epoch": 0.2970029329282118, + "grad_norm": 1.4025919199411874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61240 + }, + { + "epoch": 0.29705143112104787, + "grad_norm": 1.3677434935743804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61250 + }, + { + "epoch": 0.29709992931388396, + "grad_norm": 1.3574342574429465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61260 + }, + { + "epoch": 0.29714842750672005, + "grad_norm": 1.7465863493271172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61270 + }, + { + "epoch": 0.29719692569955614, + "grad_norm": 1.3401876231000642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61280 + }, + { + "epoch": 0.2972454238923922, + "grad_norm": 1.2461307505873265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61290 + }, + { + "epoch": 0.2972939220852283, + "grad_norm": 1.3571475392382126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61300 + }, + { + "epoch": 0.2973424202780644, + "grad_norm": 1.3202936770539964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61310 + }, + { + "epoch": 0.2973909184709005, + "grad_norm": 1.294668322771031e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61320 + }, + { + "epoch": 0.2974394166637366, + "grad_norm": 1.3526986322176526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61330 + }, + { + "epoch": 0.29748791485657267, + "grad_norm": 1.215244651575631e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61340 + }, + { + "epoch": 0.29753641304940875, + "grad_norm": 1.1934081385334139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61350 + }, + { + "epoch": 0.29758491124224484, + "grad_norm": 1.2323940836722613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61360 + }, + { + "epoch": 0.29763340943508093, + "grad_norm": 1.1959001540162717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61370 + }, + { + "epoch": 0.297681907627917, + "grad_norm": 1.3046784488324192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61380 + }, + { + "epoch": 0.2977304058207531, + "grad_norm": 1.145960254689271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61390 + }, + { + "epoch": 0.2977789040135892, + "grad_norm": 1.050078481057426e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61400 + }, + { + "epoch": 0.2978274022064253, + "grad_norm": 1.191654519061558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61410 + }, + { + "epoch": 0.2978759003992614, + "grad_norm": 1.1819776091215317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61420 + }, + { + "epoch": 0.29792439859209746, + "grad_norm": 1.16684611839446e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61430 + }, + { + "epoch": 0.29797289678493355, + "grad_norm": 1.140762378781801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61440 + }, + { + "epoch": 0.29802139497776964, + "grad_norm": 1.031167585097137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61450 + }, + { + "epoch": 0.2980698931706057, + "grad_norm": 1.2341456567810383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61460 + }, + { + "epoch": 0.2981183913634418, + "grad_norm": 1.0027514463217813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61470 + }, + { + "epoch": 0.2981668895562779, + "grad_norm": 1.0932934628726798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61480 + }, + { + "epoch": 0.298215387749114, + "grad_norm": 9.668815437180456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61490 + }, + { + "epoch": 0.2982638859419501, + "grad_norm": 1.0283266647093114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61500 + }, + { + "epoch": 0.29831238413478617, + "grad_norm": 1.1046464578612358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61510 + }, + { + "epoch": 0.29836088232762226, + "grad_norm": 1.500146595390106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61520 + }, + { + "epoch": 0.29840938052045834, + "grad_norm": 1.0455800065756193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61530 + }, + { + "epoch": 0.29845787871329443, + "grad_norm": 1.1094264209532412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61540 + }, + { + "epoch": 0.2985063769061305, + "grad_norm": 9.83245172392344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61550 + }, + { + "epoch": 0.2985548750989666, + "grad_norm": 1.0141906159333303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61560 + }, + { + "epoch": 0.2986033732918027, + "grad_norm": 1.0036271760327509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61570 + }, + { + "epoch": 0.2986518714846388, + "grad_norm": 9.739916322359932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61580 + }, + { + "epoch": 0.2987003696774749, + "grad_norm": 9.278602988160856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61590 + }, + { + "epoch": 0.29874886787031096, + "grad_norm": 8.957979389379034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61600 + }, + { + "epoch": 0.29879736606314705, + "grad_norm": 9.270883651879558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61610 + }, + { + "epoch": 0.29884586425598314, + "grad_norm": 9.340815836367256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61620 + }, + { + "epoch": 0.29889436244881923, + "grad_norm": 9.660792557042441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61630 + }, + { + "epoch": 0.2989428606416553, + "grad_norm": 9.236335358764336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61640 + }, + { + "epoch": 0.2989913588344914, + "grad_norm": 9.083614713745192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61650 + }, + { + "epoch": 0.2990398570273275, + "grad_norm": 9.527507245365996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61660 + }, + { + "epoch": 0.2990883552201636, + "grad_norm": 8.94592972144892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61670 + }, + { + "epoch": 0.29913685341299967, + "grad_norm": 9.235722586709016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61680 + }, + { + "epoch": 0.2991853516058358, + "grad_norm": 8.463607059638889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61690 + }, + { + "epoch": 0.2992338497986719, + "grad_norm": 8.217730282922275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61700 + }, + { + "epoch": 0.299282347991508, + "grad_norm": 8.628895216133969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61710 + }, + { + "epoch": 0.2993308461843441, + "grad_norm": 8.399849207307852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61720 + }, + { + "epoch": 0.29937934437718017, + "grad_norm": 9.138791483565001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61730 + }, + { + "epoch": 0.29942784257001626, + "grad_norm": 6.854267553535465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61740 + }, + { + "epoch": 0.29947634076285234, + "grad_norm": 7.562831001450832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61750 + }, + { + "epoch": 0.29952483895568843, + "grad_norm": 8.413393857154006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61760 + }, + { + "epoch": 0.2995733371485245, + "grad_norm": 8.216954938689014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61770 + }, + { + "epoch": 0.2996218353413606, + "grad_norm": 7.945785682750284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61780 + }, + { + "epoch": 0.2996703335341967, + "grad_norm": 7.523123031205614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61790 + }, + { + "epoch": 0.2997188317270328, + "grad_norm": 7.927525871309626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61800 + }, + { + "epoch": 0.2997673299198689, + "grad_norm": 7.760215225971479e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61810 + }, + { + "epoch": 0.29981582811270496, + "grad_norm": 8.17406032638246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61820 + }, + { + "epoch": 0.29986432630554105, + "grad_norm": 7.607086445204914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61830 + }, + { + "epoch": 0.29991282449837714, + "grad_norm": 7.352448392339284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61840 + }, + { + "epoch": 0.29996132269121323, + "grad_norm": 7.854529826545331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61850 + }, + { + "epoch": 0.3000098208840493, + "grad_norm": 7.212752279883716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61860 + }, + { + "epoch": 0.3000583190768854, + "grad_norm": 7.255634386638121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61870 + }, + { + "epoch": 0.3001068172697215, + "grad_norm": 7.220450584100035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61880 + }, + { + "epoch": 0.3001553154625576, + "grad_norm": 6.454260415011959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61890 + }, + { + "epoch": 0.30020381365539367, + "grad_norm": 6.782807986382977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61900 + }, + { + "epoch": 0.30025231184822976, + "grad_norm": 8.185954811779084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61910 + }, + { + "epoch": 0.30030081004106585, + "grad_norm": 7.09394043951761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61920 + }, + { + "epoch": 0.30034930823390193, + "grad_norm": 8.216746323341795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61930 + }, + { + "epoch": 0.300397806426738, + "grad_norm": 6.688486564598861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61940 + }, + { + "epoch": 0.3004463046195741, + "grad_norm": 6.225820357030898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61950 + }, + { + "epoch": 0.3004948028124102, + "grad_norm": 9.096846156353422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61960 + }, + { + "epoch": 0.3005433010052463, + "grad_norm": 7.557358685517102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61970 + }, + { + "epoch": 0.3005917991980824, + "grad_norm": 6.448831300076563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61980 + }, + { + "epoch": 0.30064029739091847, + "grad_norm": 6.097202458477113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 61990 + }, + { + "epoch": 0.30068879558375455, + "grad_norm": 6.031312977938796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62000 + }, + { + "epoch": 0.30073729377659064, + "grad_norm": 6.904275551278261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62010 + }, + { + "epoch": 0.30078579196942673, + "grad_norm": 6.544810844388849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62020 + }, + { + "epoch": 0.3008342901622628, + "grad_norm": 7.545355629190453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62030 + }, + { + "epoch": 0.3008827883550989, + "grad_norm": 7.077036343616783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62040 + }, + { + "epoch": 0.300931286547935, + "grad_norm": 6.314299980658689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62050 + }, + { + "epoch": 0.3009797847407711, + "grad_norm": 5.99144129864726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62060 + }, + { + "epoch": 0.30102828293360717, + "grad_norm": 6.204488727235002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62070 + }, + { + "epoch": 0.30107678112644326, + "grad_norm": 5.697497726941947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62080 + }, + { + "epoch": 0.30112527931927935, + "grad_norm": 5.696677476407785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62090 + }, + { + "epoch": 0.30117377751211544, + "grad_norm": 5.86046098760562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62100 + }, + { + "epoch": 0.3012222757049515, + "grad_norm": 5.528119686459831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62110 + }, + { + "epoch": 0.3012707738977876, + "grad_norm": 5.957960524938244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62120 + }, + { + "epoch": 0.3013192720906237, + "grad_norm": 5.882171763005317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62130 + }, + { + "epoch": 0.3013677702834598, + "grad_norm": 5.378043397286092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62140 + }, + { + "epoch": 0.3014162684762959, + "grad_norm": 5.147197157384653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62150 + }, + { + "epoch": 0.30146476666913197, + "grad_norm": 5.785270218439109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62160 + }, + { + "epoch": 0.30151326486196806, + "grad_norm": 5.62749164600973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62170 + }, + { + "epoch": 0.30156176305480414, + "grad_norm": 5.403531986303278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62180 + }, + { + "epoch": 0.30161026124764023, + "grad_norm": 5.577127808464866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62190 + }, + { + "epoch": 0.3016587594404764, + "grad_norm": 5.004150125387241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62200 + }, + { + "epoch": 0.30170725763331246, + "grad_norm": 6.036447643964493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62210 + }, + { + "epoch": 0.30175575582614855, + "grad_norm": 5.466645802698622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62220 + }, + { + "epoch": 0.30180425401898464, + "grad_norm": 6.240245511435205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62230 + }, + { + "epoch": 0.30185275221182073, + "grad_norm": 5.093638719699811e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62240 + }, + { + "epoch": 0.3019012504046568, + "grad_norm": 5.055526912656205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62250 + }, + { + "epoch": 0.3019497485974929, + "grad_norm": 5.522197739082912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62260 + }, + { + "epoch": 0.301998246790329, + "grad_norm": 5.328527663550631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62270 + }, + { + "epoch": 0.3020467449831651, + "grad_norm": 5.03162254972267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62280 + }, + { + "epoch": 0.30209524317600117, + "grad_norm": 4.5770133283440373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62290 + }, + { + "epoch": 0.30214374136883726, + "grad_norm": 5.514090162250795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62300 + }, + { + "epoch": 0.30219223956167335, + "grad_norm": 4.621527125436842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62310 + }, + { + "epoch": 0.30224073775450944, + "grad_norm": 5.230799615674186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62320 + }, + { + "epoch": 0.3022892359473455, + "grad_norm": 5.285572228785895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62330 + }, + { + "epoch": 0.3023377341401816, + "grad_norm": 5.172300916456152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62340 + }, + { + "epoch": 0.3023862323330177, + "grad_norm": 4.5460723185897223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62350 + }, + { + "epoch": 0.3024347305258538, + "grad_norm": 5.011964390178036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62360 + }, + { + "epoch": 0.3024832287186899, + "grad_norm": 5.321068670127715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62370 + }, + { + "epoch": 0.30253172691152597, + "grad_norm": 5.809122853861481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62380 + }, + { + "epoch": 0.30258022510436205, + "grad_norm": 4.470921624033508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62390 + }, + { + "epoch": 0.30262872329719814, + "grad_norm": 4.3398955540396855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62400 + }, + { + "epoch": 0.30267722149003423, + "grad_norm": 4.689637194132956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62410 + }, + { + "epoch": 0.3027257196828703, + "grad_norm": 4.892215770269104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62420 + }, + { + "epoch": 0.3027742178757064, + "grad_norm": 5.072168960396084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62430 + }, + { + "epoch": 0.3028227160685425, + "grad_norm": 4.4944258092982636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62440 + }, + { + "epoch": 0.3028712142613786, + "grad_norm": 4.568126144022244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62450 + }, + { + "epoch": 0.3029197124542147, + "grad_norm": 4.456611577552394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62460 + }, + { + "epoch": 0.30296821064705076, + "grad_norm": 5.24303743532073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62470 + }, + { + "epoch": 0.30301670883988685, + "grad_norm": 4.7048692408679926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62480 + }, + { + "epoch": 0.30306520703272294, + "grad_norm": 4.014593741885619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62490 + }, + { + "epoch": 0.303113705225559, + "grad_norm": 4.866915332968347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62500 + }, + { + "epoch": 0.3031622034183951, + "grad_norm": 4.5615891508532513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62510 + }, + { + "epoch": 0.3032107016112312, + "grad_norm": 5.235625621935469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62520 + }, + { + "epoch": 0.3032591998040673, + "grad_norm": 4.777579079018324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62530 + }, + { + "epoch": 0.3033076979969034, + "grad_norm": 3.7406346109492006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62540 + }, + { + "epoch": 0.30335619618973947, + "grad_norm": 3.745339824945404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62550 + }, + { + "epoch": 0.30340469438257556, + "grad_norm": 4.5487954025702493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62560 + }, + { + "epoch": 0.30345319257541165, + "grad_norm": 4.5400392423289304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62570 + }, + { + "epoch": 0.30350169076824773, + "grad_norm": 4.285895158773201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62580 + }, + { + "epoch": 0.3035501889610838, + "grad_norm": 4.0411310919807875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62590 + }, + { + "epoch": 0.3035986871539199, + "grad_norm": 3.819979781383154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62600 + }, + { + "epoch": 0.303647185346756, + "grad_norm": 4.0941117163129093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62610 + }, + { + "epoch": 0.3036956835395921, + "grad_norm": 4.283560031126399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62620 + }, + { + "epoch": 0.3037441817324282, + "grad_norm": 4.2113072140637087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62630 + }, + { + "epoch": 0.30379267992526426, + "grad_norm": 3.566182442682475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62640 + }, + { + "epoch": 0.30384117811810035, + "grad_norm": 3.719852372796595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62650 + }, + { + "epoch": 0.30388967631093644, + "grad_norm": 4.424642270350887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62660 + }, + { + "epoch": 0.30393817450377253, + "grad_norm": 3.947773734580551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62670 + }, + { + "epoch": 0.3039866726966086, + "grad_norm": 4.0801722889227676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62680 + }, + { + "epoch": 0.3040351708894447, + "grad_norm": 3.639675583144708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62690 + }, + { + "epoch": 0.3040836690822808, + "grad_norm": 3.7552106846305833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62700 + }, + { + "epoch": 0.30413216727511694, + "grad_norm": 4.022621453714237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62710 + }, + { + "epoch": 0.304180665467953, + "grad_norm": 4.2387492271700467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62720 + }, + { + "epoch": 0.3042291636607891, + "grad_norm": 4.382786755741108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62730 + }, + { + "epoch": 0.3042776618536252, + "grad_norm": 3.5831885725201573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62740 + }, + { + "epoch": 0.3043261600464613, + "grad_norm": 4.3869627575077175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62750 + }, + { + "epoch": 0.3043746582392974, + "grad_norm": 4.1437394315835263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62760 + }, + { + "epoch": 0.30442315643213347, + "grad_norm": 3.663132304154715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62770 + }, + { + "epoch": 0.30447165462496956, + "grad_norm": 3.7598104540848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62780 + }, + { + "epoch": 0.30452015281780564, + "grad_norm": 4.3182464537494525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62790 + }, + { + "epoch": 0.30456865101064173, + "grad_norm": 3.3110316621787206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62800 + }, + { + "epoch": 0.3046171492034778, + "grad_norm": 5.372926921154431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62810 + }, + { + "epoch": 0.3046656473963139, + "grad_norm": 3.6964857486054825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62820 + }, + { + "epoch": 0.30471414558915, + "grad_norm": 3.863541735427134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62830 + }, + { + "epoch": 0.3047626437819861, + "grad_norm": 3.2148301443157834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62840 + }, + { + "epoch": 0.3048111419748222, + "grad_norm": 3.419402787585568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62850 + }, + { + "epoch": 0.30485964016765826, + "grad_norm": 3.792714835526567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62860 + }, + { + "epoch": 0.30490813836049435, + "grad_norm": 3.8329815765791864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62870 + }, + { + "epoch": 0.30495663655333044, + "grad_norm": 3.7048698686703574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62880 + }, + { + "epoch": 0.30500513474616653, + "grad_norm": 3.449063115112949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62890 + }, + { + "epoch": 0.3050536329390026, + "grad_norm": 3.580870782116108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62900 + }, + { + "epoch": 0.3051021311318387, + "grad_norm": 3.4699627349255024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62910 + }, + { + "epoch": 0.3051506293246748, + "grad_norm": 3.63124371460799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62920 + }, + { + "epoch": 0.3051991275175109, + "grad_norm": 3.2803035310280393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62930 + }, + { + "epoch": 0.30524762571034697, + "grad_norm": 3.099518437466031e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62940 + }, + { + "epoch": 0.30529612390318306, + "grad_norm": 3.2186676435230765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62950 + }, + { + "epoch": 0.30534462209601915, + "grad_norm": 3.405486381780065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62960 + }, + { + "epoch": 0.30539312028885524, + "grad_norm": 3.4876239851655555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62970 + }, + { + "epoch": 0.3054416184816913, + "grad_norm": 3.5121252039971296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62980 + }, + { + "epoch": 0.3054901166745274, + "grad_norm": 3.223085229819844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 62990 + }, + { + "epoch": 0.3055386148673635, + "grad_norm": 3.6033503647558973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63000 + }, + { + "epoch": 0.3055871130601996, + "grad_norm": 5.4815568546473514e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 63010 + }, + { + "epoch": 0.3056356112530357, + "grad_norm": 0.0002786049444694072, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63020 + }, + { + "epoch": 0.30568410944587177, + "grad_norm": 1.854554284363985e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63030 + }, + { + "epoch": 0.30573260763870785, + "grad_norm": 5.908532784815179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63040 + }, + { + "epoch": 0.30578110583154394, + "grad_norm": 4.496150268096244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63050 + }, + { + "epoch": 0.30582960402438003, + "grad_norm": 4.679201992985327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63060 + }, + { + "epoch": 0.3058781022172161, + "grad_norm": 4.322504992160248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63070 + }, + { + "epoch": 0.3059266004100522, + "grad_norm": 3.9149040276242886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63080 + }, + { + "epoch": 0.3059750986028883, + "grad_norm": 3.351219447722542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63090 + }, + { + "epoch": 0.3060235967957244, + "grad_norm": 3.183046828780789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63100 + }, + { + "epoch": 0.3060720949885605, + "grad_norm": 3.420816710786312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63110 + }, + { + "epoch": 0.30612059318139656, + "grad_norm": 3.4021848023257917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63120 + }, + { + "epoch": 0.30616909137423265, + "grad_norm": 3.060821427425253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63130 + }, + { + "epoch": 0.30621758956706874, + "grad_norm": 2.6118671030417318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63140 + }, + { + "epoch": 0.3062660877599048, + "grad_norm": 2.4907424176490167e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63150 + }, + { + "epoch": 0.3063145859527409, + "grad_norm": 2.8171880330774e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63160 + }, + { + "epoch": 0.306363084145577, + "grad_norm": 2.73967248176632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63170 + }, + { + "epoch": 0.3064115823384131, + "grad_norm": 2.482119271007832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63180 + }, + { + "epoch": 0.3064600805312492, + "grad_norm": 2.1310686406650348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63190 + }, + { + "epoch": 0.30650857872408527, + "grad_norm": 2.0781401417480083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63200 + }, + { + "epoch": 0.30655707691692136, + "grad_norm": 2.3223205971589778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63210 + }, + { + "epoch": 0.3066055751097575, + "grad_norm": 8.147999324137345e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 63220 + }, + { + "epoch": 0.3066540733025936, + "grad_norm": 0.00030478619737550616, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63230 + }, + { + "epoch": 0.3067025714954297, + "grad_norm": 8.48713971208781e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63240 + }, + { + "epoch": 0.30675106968826577, + "grad_norm": 2.8005440981360152e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63250 + }, + { + "epoch": 0.30679956788110185, + "grad_norm": 2.353496711293701e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63260 + }, + { + "epoch": 0.30684806607393794, + "grad_norm": 1.535731462354306e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63270 + }, + { + "epoch": 0.30689656426677403, + "grad_norm": 1.1964093573624268e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63280 + }, + { + "epoch": 0.3069450624596101, + "grad_norm": 6.558133463840932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63290 + }, + { + "epoch": 0.3069935606524462, + "grad_norm": 6.011673121975036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63300 + }, + { + "epoch": 0.3070420588452823, + "grad_norm": 7.981252565514296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63310 + }, + { + "epoch": 0.3070905570381184, + "grad_norm": 7.118115718185436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63320 + }, + { + "epoch": 0.30713905523095447, + "grad_norm": 6.394330739567522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63330 + }, + { + "epoch": 0.30718755342379056, + "grad_norm": 4.101275408174843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63340 + }, + { + "epoch": 0.30723605161662665, + "grad_norm": 3.758857246793923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63350 + }, + { + "epoch": 0.30728454980946274, + "grad_norm": 5.049724677519407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63360 + }, + { + "epoch": 0.3073330480022988, + "grad_norm": 5.128378234076081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63370 + }, + { + "epoch": 0.3073815461951349, + "grad_norm": 4.540781901596347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63380 + }, + { + "epoch": 0.307430044387971, + "grad_norm": 3.011776470884797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63390 + }, + { + "epoch": 0.3074785425808071, + "grad_norm": 2.739712499533198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63400 + }, + { + "epoch": 0.3075270407736432, + "grad_norm": 3.993531663581962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63410 + }, + { + "epoch": 0.30757553896647927, + "grad_norm": 3.710629925990361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63420 + }, + { + "epoch": 0.30762403715931536, + "grad_norm": 3.5446337278699502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63430 + }, + { + "epoch": 0.30767253535215144, + "grad_norm": 2.3264772153197555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63440 + }, + { + "epoch": 0.30772103354498753, + "grad_norm": 2.4273490453197155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63450 + }, + { + "epoch": 0.3077695317378236, + "grad_norm": 3.204709855708643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63460 + }, + { + "epoch": 0.3078180299306597, + "grad_norm": 3.0284240892797243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63470 + }, + { + "epoch": 0.3078665281234958, + "grad_norm": 3.0093265195318963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63480 + }, + { + "epoch": 0.3079150263163319, + "grad_norm": 1.891702709144738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63490 + }, + { + "epoch": 0.307963524509168, + "grad_norm": 1.911970230139559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63500 + }, + { + "epoch": 0.30801202270200406, + "grad_norm": 2.5887063657137332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63510 + }, + { + "epoch": 0.30806052089484015, + "grad_norm": 2.46065724240907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63520 + }, + { + "epoch": 0.30810901908767624, + "grad_norm": 2.432569544907892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63530 + }, + { + "epoch": 0.3081575172805123, + "grad_norm": 1.6746381561461021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63540 + }, + { + "epoch": 0.3082060154733484, + "grad_norm": 1.6087292351585347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63550 + }, + { + "epoch": 0.3082545136661845, + "grad_norm": 2.404225142527139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63560 + }, + { + "epoch": 0.3083030118590206, + "grad_norm": 2.3219536160468124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63570 + }, + { + "epoch": 0.3083515100518567, + "grad_norm": 2.1401483536465093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63580 + }, + { + "epoch": 0.30840000824469277, + "grad_norm": 1.465044192627829e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63590 + }, + { + "epoch": 0.30844850643752886, + "grad_norm": 1.3844945669916342e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63600 + }, + { + "epoch": 0.30849700463036495, + "grad_norm": 1.932942268467741e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63610 + }, + { + "epoch": 0.30854550282320103, + "grad_norm": 1.778786099748686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63620 + }, + { + "epoch": 0.3085940010160371, + "grad_norm": 1.716233441584336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63630 + }, + { + "epoch": 0.3086424992088732, + "grad_norm": 1.276375655834272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63640 + }, + { + "epoch": 0.3086909974017093, + "grad_norm": 1.2277889709366718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63650 + }, + { + "epoch": 0.3087394955945454, + "grad_norm": 1.762531724125438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63660 + }, + { + "epoch": 0.3087879937873815, + "grad_norm": 1.7038108808264951e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63670 + }, + { + "epoch": 0.30883649198021756, + "grad_norm": 1.5715376093794475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63680 + }, + { + "epoch": 0.30888499017305365, + "grad_norm": 1.1074126859966782e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63690 + }, + { + "epoch": 0.30893348836588974, + "grad_norm": 1.1274374855929636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63700 + }, + { + "epoch": 0.30898198655872583, + "grad_norm": 1.5062872762428015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63710 + }, + { + "epoch": 0.3090304847515619, + "grad_norm": 1.3727290024689864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63720 + }, + { + "epoch": 0.30907898294439806, + "grad_norm": 1.4474925364993396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63730 + }, + { + "epoch": 0.30912748113723415, + "grad_norm": 1.056066594173899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63740 + }, + { + "epoch": 0.30917597933007024, + "grad_norm": 1.0163377055505407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63750 + }, + { + "epoch": 0.3092244775229063, + "grad_norm": 1.3556051499108435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63760 + }, + { + "epoch": 0.3092729757157424, + "grad_norm": 1.3167070846975548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63770 + }, + { + "epoch": 0.3093214739085785, + "grad_norm": 1.3254440318632987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63780 + }, + { + "epoch": 0.3093699721014146, + "grad_norm": 9.268811140827893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63790 + }, + { + "epoch": 0.3094184702942507, + "grad_norm": 9.16100759695837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63800 + }, + { + "epoch": 0.30946696848708677, + "grad_norm": 1.2780325278072269e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63810 + }, + { + "epoch": 0.30951546667992286, + "grad_norm": 1.203609031108499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63820 + }, + { + "epoch": 0.30956396487275895, + "grad_norm": 1.17432932711381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63830 + }, + { + "epoch": 0.30961246306559503, + "grad_norm": 8.348293363269477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63840 + }, + { + "epoch": 0.3096609612584311, + "grad_norm": 8.386019771933206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63850 + }, + { + "epoch": 0.3097094594512672, + "grad_norm": 1.1611635954977828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63860 + }, + { + "epoch": 0.3097579576441033, + "grad_norm": 1.103815293390653e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63870 + }, + { + "epoch": 0.3098064558369394, + "grad_norm": 1.1052037507397472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63880 + }, + { + "epoch": 0.3098549540297755, + "grad_norm": 7.542597586507327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63890 + }, + { + "epoch": 0.30990345222261156, + "grad_norm": 7.564577799712424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63900 + }, + { + "epoch": 0.30995195041544765, + "grad_norm": 1.0189227168666548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63910 + }, + { + "epoch": 0.31000044860828374, + "grad_norm": 9.659836450737203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63920 + }, + { + "epoch": 0.31004894680111983, + "grad_norm": 9.811388963498757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63930 + }, + { + "epoch": 0.3100974449939559, + "grad_norm": 7.113365541044914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63940 + }, + { + "epoch": 0.310145943186792, + "grad_norm": 7.163690156630764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63950 + }, + { + "epoch": 0.3101944413796281, + "grad_norm": 1.0064180742119788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63960 + }, + { + "epoch": 0.3102429395724642, + "grad_norm": 9.463015544497466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63970 + }, + { + "epoch": 0.31029143776530027, + "grad_norm": 9.550985851092264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63980 + }, + { + "epoch": 0.31033993595813636, + "grad_norm": 6.3984873577283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 63990 + }, + { + "epoch": 0.31038843415097245, + "grad_norm": 6.430979624383326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64000 + }, + { + "epoch": 0.31043693234380854, + "grad_norm": 9.242352234650753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64010 + }, + { + "epoch": 0.3104854305366446, + "grad_norm": 8.627337138022995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64020 + }, + { + "epoch": 0.3105339287294807, + "grad_norm": 8.895083851712116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64030 + }, + { + "epoch": 0.3105824269223168, + "grad_norm": 6.315122504929604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64040 + }, + { + "epoch": 0.3106309251151529, + "grad_norm": 5.947976546849532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64050 + }, + { + "epoch": 0.310679423307989, + "grad_norm": 8.903766683943104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64060 + }, + { + "epoch": 0.31072792150082507, + "grad_norm": 8.240379543167364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64070 + }, + { + "epoch": 0.31077641969366115, + "grad_norm": 7.86629186677601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64080 + }, + { + "epoch": 0.31082491788649724, + "grad_norm": 5.626346251119685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64090 + }, + { + "epoch": 0.31087341607933333, + "grad_norm": 5.340388611330127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64100 + }, + { + "epoch": 0.3109219142721694, + "grad_norm": 8.248845233538304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64110 + }, + { + "epoch": 0.3109704124650055, + "grad_norm": 7.834787538740784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64120 + }, + { + "epoch": 0.3110189106578416, + "grad_norm": 7.366633667515998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64130 + }, + { + "epoch": 0.3110674088506777, + "grad_norm": 5.392794264480472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64140 + }, + { + "epoch": 0.3111159070435138, + "grad_norm": 5.33990544226981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64150 + }, + { + "epoch": 0.31116440523634986, + "grad_norm": 7.25983284155518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64160 + }, + { + "epoch": 0.31121290342918595, + "grad_norm": 6.917417749718879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64170 + }, + { + "epoch": 0.31126140162202204, + "grad_norm": 7.187150572462997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64180 + }, + { + "epoch": 0.3113098998148581, + "grad_norm": 4.832252216147026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64190 + }, + { + "epoch": 0.3113583980076942, + "grad_norm": 4.937395488013863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64200 + }, + { + "epoch": 0.3114068962005303, + "grad_norm": 6.699538630527968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64210 + }, + { + "epoch": 0.3114553943933664, + "grad_norm": 7.126153036551841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64220 + }, + { + "epoch": 0.3115038925862025, + "grad_norm": 6.358797008942929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64230 + }, + { + "epoch": 0.3115523907790386, + "grad_norm": 4.790616685568239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64240 + }, + { + "epoch": 0.3116008889718747, + "grad_norm": 4.545160834368289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64250 + }, + { + "epoch": 0.3116493871647108, + "grad_norm": 6.272528594308824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64260 + }, + { + "epoch": 0.3116978853575469, + "grad_norm": 6.40332643797592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64270 + }, + { + "epoch": 0.311746383550383, + "grad_norm": 6.206627745086735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64280 + }, + { + "epoch": 0.31179488174321907, + "grad_norm": 4.5132188120078354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64290 + }, + { + "epoch": 0.31184337993605515, + "grad_norm": 4.3693100337804935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64300 + }, + { + "epoch": 0.31189187812889124, + "grad_norm": 6.346485292851867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64310 + }, + { + "epoch": 0.31194037632172733, + "grad_norm": 5.98770441229135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64320 + }, + { + "epoch": 0.3119888745145634, + "grad_norm": 6.249119337553566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64330 + }, + { + "epoch": 0.3120373727073995, + "grad_norm": 4.282113081899297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64340 + }, + { + "epoch": 0.3120858709002356, + "grad_norm": 4.1617118995418423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64350 + }, + { + "epoch": 0.3121343690930717, + "grad_norm": 5.859259317730903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64360 + }, + { + "epoch": 0.3121828672859078, + "grad_norm": 5.302887302605086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64370 + }, + { + "epoch": 0.31223136547874386, + "grad_norm": 5.616799967356201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64380 + }, + { + "epoch": 0.31227986367157995, + "grad_norm": 3.8724144246771175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64390 + }, + { + "epoch": 0.31232836186441604, + "grad_norm": 3.824101781901845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64400 + }, + { + "epoch": 0.3123768600572521, + "grad_norm": 8.116592198348371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64410 + }, + { + "epoch": 0.3124253582500882, + "grad_norm": 5.021993274567649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64420 + }, + { + "epoch": 0.3124738564429243, + "grad_norm": 5.368548272599583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64430 + }, + { + "epoch": 0.3125223546357604, + "grad_norm": 3.6653895563176775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64440 + }, + { + "epoch": 0.3125708528285965, + "grad_norm": 3.652668283393723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64450 + }, + { + "epoch": 0.31261935102143257, + "grad_norm": 5.218513479121611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64460 + }, + { + "epoch": 0.31266784921426866, + "grad_norm": 5.135914307174971e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64470 + }, + { + "epoch": 0.31271634740710474, + "grad_norm": 5.191939180804184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64480 + }, + { + "epoch": 0.31276484559994083, + "grad_norm": 3.527392777868954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64490 + }, + { + "epoch": 0.3128133437927769, + "grad_norm": 3.483173998120037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64500 + }, + { + "epoch": 0.312861841985613, + "grad_norm": 5.109710059514327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64510 + }, + { + "epoch": 0.3129103401784491, + "grad_norm": 5.171477255316859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64520 + }, + { + "epoch": 0.3129588383712852, + "grad_norm": 5.042658131060307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64530 + }, + { + "epoch": 0.3130073365641213, + "grad_norm": 3.33207367475552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64540 + }, + { + "epoch": 0.31305583475695736, + "grad_norm": 3.4716481422947254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64550 + }, + { + "epoch": 0.31310433294979345, + "grad_norm": 4.4812122723669745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64560 + }, + { + "epoch": 0.31315283114262954, + "grad_norm": 4.734903598091478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64570 + }, + { + "epoch": 0.31320132933546563, + "grad_norm": 5.105255240778206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64580 + }, + { + "epoch": 0.3132498275283017, + "grad_norm": 3.174767755353969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64590 + }, + { + "epoch": 0.3132983257211378, + "grad_norm": 3.0658941341243917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64600 + }, + { + "epoch": 0.3133468239139739, + "grad_norm": 5.062011041445658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64610 + }, + { + "epoch": 0.31339532210681, + "grad_norm": 5.371164775169746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64620 + }, + { + "epoch": 0.31344382029964607, + "grad_norm": 4.867640086558822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64630 + }, + { + "epoch": 0.31349231849248216, + "grad_norm": 2.9576204951808904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64640 + }, + { + "epoch": 0.31354081668531825, + "grad_norm": 3.0410663498514623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64650 + }, + { + "epoch": 0.31358931487815433, + "grad_norm": 4.795201675733551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64660 + }, + { + "epoch": 0.3136378130709904, + "grad_norm": 4.879901780441287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64670 + }, + { + "epoch": 0.3136863112638265, + "grad_norm": 4.931029025101452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64680 + }, + { + "epoch": 0.3137348094566626, + "grad_norm": 3.3254463005505386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64690 + }, + { + "epoch": 0.3137833076494987, + "grad_norm": 2.8111895744586946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64700 + }, + { + "epoch": 0.3138318058423348, + "grad_norm": 4.4463357085078314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64710 + }, + { + "epoch": 0.31388030403517087, + "grad_norm": 4.4303422441771545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64720 + }, + { + "epoch": 0.31392880222800695, + "grad_norm": 4.3604396182672644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64730 + }, + { + "epoch": 0.31397730042084304, + "grad_norm": 2.8407180252543185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64740 + }, + { + "epoch": 0.31402579861367913, + "grad_norm": 2.8249527872503677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64750 + }, + { + "epoch": 0.3140742968065153, + "grad_norm": 4.272493185908388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64760 + }, + { + "epoch": 0.31412279499935136, + "grad_norm": 4.506055120145902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64770 + }, + { + "epoch": 0.31417129319218745, + "grad_norm": 4.115723299946694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64780 + }, + { + "epoch": 0.31421979138502354, + "grad_norm": 2.6593087909532187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64790 + }, + { + "epoch": 0.3142682895778596, + "grad_norm": 2.714927802571765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64800 + }, + { + "epoch": 0.3143167877706957, + "grad_norm": 4.2469480376894353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64810 + }, + { + "epoch": 0.3143652859635318, + "grad_norm": 4.149763697114395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64820 + }, + { + "epoch": 0.3144137841563679, + "grad_norm": 4.3741596300606034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64830 + }, + { + "epoch": 0.314462282349204, + "grad_norm": 2.55654924785631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64840 + }, + { + "epoch": 0.31451078054204007, + "grad_norm": 2.6806614528140926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64850 + }, + { + "epoch": 0.31455927873487616, + "grad_norm": 3.664959251636901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64860 + }, + { + "epoch": 0.31460777692771225, + "grad_norm": 4.0366870734942495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64870 + }, + { + "epoch": 0.31465627512054833, + "grad_norm": 4.004200206964015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64880 + }, + { + "epoch": 0.3147047733133844, + "grad_norm": 2.463285113663005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64890 + }, + { + "epoch": 0.3147532715062205, + "grad_norm": 2.4956901256700803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64900 + }, + { + "epoch": 0.3148017696990566, + "grad_norm": 3.729374213889969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64910 + }, + { + "epoch": 0.3148502678918927, + "grad_norm": 3.7982641742928536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64920 + }, + { + "epoch": 0.3148987660847288, + "grad_norm": 3.8002599467290565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64930 + }, + { + "epoch": 0.31494726427756486, + "grad_norm": 2.567855403867725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64940 + }, + { + "epoch": 0.31499576247040095, + "grad_norm": 2.4604727855148667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64950 + }, + { + "epoch": 0.31504426066323704, + "grad_norm": 3.580823317861359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64960 + }, + { + "epoch": 0.31509275885607313, + "grad_norm": 3.766327267840097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64970 + }, + { + "epoch": 0.3151412570489092, + "grad_norm": 3.5503052231433685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64980 + }, + { + "epoch": 0.3151897552417453, + "grad_norm": 2.3699892892636854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 64990 + }, + { + "epoch": 0.3152382534345814, + "grad_norm": 2.3056206543969893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65000 + }, + { + "epoch": 0.3152867516274175, + "grad_norm": 3.5294237932248507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65010 + }, + { + "epoch": 0.31533524982025357, + "grad_norm": 3.4740580190373294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65020 + }, + { + "epoch": 0.31538374801308966, + "grad_norm": 3.550059943790984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65030 + }, + { + "epoch": 0.31543224620592575, + "grad_norm": 2.3201255316962488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65040 + }, + { + "epoch": 0.31548074439876184, + "grad_norm": 2.2786895215176628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65050 + }, + { + "epoch": 0.3155292425915979, + "grad_norm": 3.3725257253536256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65060 + }, + { + "epoch": 0.315577740784434, + "grad_norm": 3.3371816243743524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65070 + }, + { + "epoch": 0.3156262389772701, + "grad_norm": 3.1910664688439283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65080 + }, + { + "epoch": 0.3156747371701062, + "grad_norm": 2.1892341806051263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65090 + }, + { + "epoch": 0.3157232353629423, + "grad_norm": 2.2031348123618955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65100 + }, + { + "epoch": 0.31577173355577837, + "grad_norm": 3.3746093208719685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65110 + }, + { + "epoch": 0.31582023174861446, + "grad_norm": 3.2602477517684747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65120 + }, + { + "epoch": 0.31586872994145054, + "grad_norm": 3.1410630185746413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65130 + }, + { + "epoch": 0.31591722813428663, + "grad_norm": 2.1647983317052422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65140 + }, + { + "epoch": 0.3159657263271227, + "grad_norm": 2.278515864873043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65150 + }, + { + "epoch": 0.3160142245199588, + "grad_norm": 2.983017566293711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65160 + }, + { + "epoch": 0.3160627227127949, + "grad_norm": 3.251411442306562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65170 + }, + { + "epoch": 0.316111220905631, + "grad_norm": 3.17248037617901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65180 + }, + { + "epoch": 0.3161597190984671, + "grad_norm": 2.2346513617321762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65190 + }, + { + "epoch": 0.31620821729130316, + "grad_norm": 2.1524925841731601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65200 + }, + { + "epoch": 0.31625671548413925, + "grad_norm": 3.056906336951215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65210 + }, + { + "epoch": 0.31630521367697534, + "grad_norm": 3.058013078316435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65220 + }, + { + "epoch": 0.3163537118698114, + "grad_norm": 2.923187594205956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65230 + }, + { + "epoch": 0.3164022100626475, + "grad_norm": 2.0763965835612908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65240 + }, + { + "epoch": 0.3164507082554836, + "grad_norm": 2.083827581600417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65250 + }, + { + "epoch": 0.3164992064483197, + "grad_norm": 2.849693316875346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65260 + }, + { + "epoch": 0.31654770464115584, + "grad_norm": 2.974753101625538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65270 + }, + { + "epoch": 0.3165962028339919, + "grad_norm": 2.9042865890005487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65280 + }, + { + "epoch": 0.316644701026828, + "grad_norm": 2.0445035886496044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65290 + }, + { + "epoch": 0.3166931992196641, + "grad_norm": 2.2500564966776437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65300 + }, + { + "epoch": 0.3167416974125002, + "grad_norm": 2.9385833499873115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65310 + }, + { + "epoch": 0.3167901956053363, + "grad_norm": 2.8709362709378183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65320 + }, + { + "epoch": 0.31683869379817237, + "grad_norm": 2.917678614267061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65330 + }, + { + "epoch": 0.31688719199100845, + "grad_norm": 2.0242717369001184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65340 + }, + { + "epoch": 0.31693569018384454, + "grad_norm": 1.970637981685286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65350 + }, + { + "epoch": 0.31698418837668063, + "grad_norm": 2.727630032950401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65360 + }, + { + "epoch": 0.3170326865695167, + "grad_norm": 2.938162992904836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65370 + }, + { + "epoch": 0.3170811847623528, + "grad_norm": 6.565596208929492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65380 + }, + { + "epoch": 0.3171296829551889, + "grad_norm": 1.9618653368524974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65390 + }, + { + "epoch": 0.317178181148025, + "grad_norm": 1.968703884358547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65400 + }, + { + "epoch": 0.3172266793408611, + "grad_norm": 2.818356392708665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65410 + }, + { + "epoch": 0.31727517753369716, + "grad_norm": 2.876887776892545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65420 + }, + { + "epoch": 0.31732367572653325, + "grad_norm": 2.555423748162866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65430 + }, + { + "epoch": 0.31737217391936934, + "grad_norm": 1.9217364410906157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65440 + }, + { + "epoch": 0.3174206721122054, + "grad_norm": 1.9197028677808703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65450 + }, + { + "epoch": 0.3174691703050415, + "grad_norm": 2.561129690548114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65460 + }, + { + "epoch": 0.3175176684978776, + "grad_norm": 2.596236186036549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65470 + }, + { + "epoch": 0.3175661666907137, + "grad_norm": 2.7814581926577375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65480 + }, + { + "epoch": 0.3176146648835498, + "grad_norm": 2.0135895795192482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65490 + }, + { + "epoch": 0.31766316307638587, + "grad_norm": 1.918933349998042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65500 + }, + { + "epoch": 0.31771166126922196, + "grad_norm": 2.429830203709571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65510 + }, + { + "epoch": 0.31776015946205804, + "grad_norm": 2.575773407897941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65520 + }, + { + "epoch": 0.31780865765489413, + "grad_norm": 2.6744999104266753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65530 + }, + { + "epoch": 0.3178571558477302, + "grad_norm": 1.898413444223479e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65540 + }, + { + "epoch": 0.3179056540405663, + "grad_norm": 1.9424358299602318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65550 + }, + { + "epoch": 0.3179541522334024, + "grad_norm": 2.535912528855988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65560 + }, + { + "epoch": 0.3180026504262385, + "grad_norm": 2.529019980102021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65570 + }, + { + "epoch": 0.3180511486190746, + "grad_norm": 2.39815648228614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65580 + }, + { + "epoch": 0.31809964681191066, + "grad_norm": 1.890246039693011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65590 + }, + { + "epoch": 0.31814814500474675, + "grad_norm": 1.8496805864742782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65600 + }, + { + "epoch": 0.31819664319758284, + "grad_norm": 2.3706233776010777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65610 + }, + { + "epoch": 0.31824514139041893, + "grad_norm": 2.352874730604526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65620 + }, + { + "epoch": 0.318293639583255, + "grad_norm": 2.314112208523511e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65630 + }, + { + "epoch": 0.3183421377760911, + "grad_norm": 1.8062964102227852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65640 + }, + { + "epoch": 0.3183906359689272, + "grad_norm": 1.7615695924178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65650 + }, + { + "epoch": 0.3184391341617633, + "grad_norm": 4.975460683454003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65660 + }, + { + "epoch": 0.31848763235459937, + "grad_norm": 2.3211310917758965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65670 + }, + { + "epoch": 0.31853613054743546, + "grad_norm": 2.2582415226679586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65680 + }, + { + "epoch": 0.31858462874027155, + "grad_norm": 1.804264968541247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65690 + }, + { + "epoch": 0.31863312693310764, + "grad_norm": 1.77360348629918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65700 + }, + { + "epoch": 0.3186816251259437, + "grad_norm": 2.2632278273704287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65710 + }, + { + "epoch": 0.3187301233187798, + "grad_norm": 2.257107922787327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65720 + }, + { + "epoch": 0.3187786215116159, + "grad_norm": 2.2553615508513758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65730 + }, + { + "epoch": 0.318827119704452, + "grad_norm": 1.7379133510075917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65740 + }, + { + "epoch": 0.3188756178972881, + "grad_norm": 1.68974239045383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65750 + }, + { + "epoch": 0.31892411609012417, + "grad_norm": 2.3331574539042776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65760 + }, + { + "epoch": 0.31897261428296025, + "grad_norm": 2.2547393996319443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65770 + }, + { + "epoch": 0.3190211124757964, + "grad_norm": 2.2371089869466232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65780 + }, + { + "epoch": 0.3190696106686325, + "grad_norm": 1.7856883971489879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65790 + }, + { + "epoch": 0.3191181088614686, + "grad_norm": 1.6842277261730487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65800 + }, + { + "epoch": 0.31916660705430466, + "grad_norm": 2.1474657785347517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65810 + }, + { + "epoch": 0.31921510524714075, + "grad_norm": 2.1756079604529077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65820 + }, + { + "epoch": 0.31926360343997684, + "grad_norm": 2.176760887095952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65830 + }, + { + "epoch": 0.31931210163281293, + "grad_norm": 1.6950220071976219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65840 + }, + { + "epoch": 0.319360599825649, + "grad_norm": 1.6701044103228924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65850 + }, + { + "epoch": 0.3194090980184851, + "grad_norm": 1.9789192151620227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65860 + }, + { + "epoch": 0.3194575962113212, + "grad_norm": 2.0351667728846223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65870 + }, + { + "epoch": 0.3195060944041573, + "grad_norm": 1.9877498402820493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65880 + }, + { + "epoch": 0.31955459259699337, + "grad_norm": 1.6493292775976442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65890 + }, + { + "epoch": 0.31960309078982946, + "grad_norm": 1.649337377784832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65900 + }, + { + "epoch": 0.31965158898266555, + "grad_norm": 2.134970600309316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65910 + }, + { + "epoch": 0.31970008717550163, + "grad_norm": 2.0185187565857632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65920 + }, + { + "epoch": 0.3197485853683377, + "grad_norm": 2.1367405622640945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65930 + }, + { + "epoch": 0.3197970835611738, + "grad_norm": 1.6420190718235972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65940 + }, + { + "epoch": 0.3198455817540099, + "grad_norm": 1.583012476658041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65950 + }, + { + "epoch": 0.319894079946846, + "grad_norm": 1.953131203435987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65960 + }, + { + "epoch": 0.3199425781396821, + "grad_norm": 1.9347223201293673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65970 + }, + { + "epoch": 0.31999107633251817, + "grad_norm": 1.9989985844404146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65980 + }, + { + "epoch": 0.32003957452535425, + "grad_norm": 1.547506798260656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 65990 + }, + { + "epoch": 0.32008807271819034, + "grad_norm": 1.6675954839229234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66000 + }, + { + "epoch": 0.32013657091102643, + "grad_norm": 1.9363349679224484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66010 + }, + { + "epoch": 0.3201850691038625, + "grad_norm": 1.9953735375111137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66020 + }, + { + "epoch": 0.3202335672966986, + "grad_norm": 1.9828853226044885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66030 + }, + { + "epoch": 0.3202820654895347, + "grad_norm": 1.570380447901698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66040 + }, + { + "epoch": 0.3203305636823708, + "grad_norm": 1.5326192226439161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66050 + }, + { + "epoch": 0.32037906187520687, + "grad_norm": 1.93252716940151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66060 + }, + { + "epoch": 0.32042756006804296, + "grad_norm": 1.7952295650047745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66070 + }, + { + "epoch": 0.32047605826087905, + "grad_norm": 1.8800353984715912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66080 + }, + { + "epoch": 0.32052455645371514, + "grad_norm": 1.54535300112002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66090 + }, + { + "epoch": 0.3205730546465512, + "grad_norm": 1.505446363125884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66100 + }, + { + "epoch": 0.3206215528393873, + "grad_norm": 1.8459404316217842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66110 + }, + { + "epoch": 0.3206700510322234, + "grad_norm": 1.9353116442744067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66120 + }, + { + "epoch": 0.3207185492250595, + "grad_norm": 1.8194131712334638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66130 + }, + { + "epoch": 0.3207670474178956, + "grad_norm": 1.623020864371938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66140 + }, + { + "epoch": 0.32081554561073167, + "grad_norm": 1.472357098464272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66150 + }, + { + "epoch": 0.32086404380356776, + "grad_norm": 1.80900613599988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66160 + }, + { + "epoch": 0.32091254199640384, + "grad_norm": 1.8212526242677995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66170 + }, + { + "epoch": 0.32096104018923993, + "grad_norm": 1.7857206557891914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66180 + }, + { + "epoch": 0.321009538382076, + "grad_norm": 1.405216778493923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66190 + }, + { + "epoch": 0.3210580365749121, + "grad_norm": 1.429096414540254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66200 + }, + { + "epoch": 0.3211065347677482, + "grad_norm": 1.81644992380825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66210 + }, + { + "epoch": 0.3211550329605843, + "grad_norm": 1.914258263013835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66220 + }, + { + "epoch": 0.3212035311534204, + "grad_norm": 1.7109776706547564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66230 + }, + { + "epoch": 0.32125202934625646, + "grad_norm": 1.4198546693933167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66240 + }, + { + "epoch": 0.32130052753909255, + "grad_norm": 1.4223719801975676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66250 + }, + { + "epoch": 0.32134902573192864, + "grad_norm": 1.722439861850944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66260 + }, + { + "epoch": 0.3213975239247647, + "grad_norm": 1.6416623793702456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66270 + }, + { + "epoch": 0.3214460221176008, + "grad_norm": 1.7234721383374563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66280 + }, + { + "epoch": 0.32149452031043696, + "grad_norm": 1.313338060526803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66290 + }, + { + "epoch": 0.32154301850327305, + "grad_norm": 1.3418647881735524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66300 + }, + { + "epoch": 0.32159151669610914, + "grad_norm": 1.9068589551807236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66310 + }, + { + "epoch": 0.3216400148889452, + "grad_norm": 1.6921342194109457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66320 + }, + { + "epoch": 0.3216885130817813, + "grad_norm": 1.8430716863804264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66330 + }, + { + "epoch": 0.3217370112746174, + "grad_norm": 1.3313308500073617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66340 + }, + { + "epoch": 0.3217855094674535, + "grad_norm": 1.3108443397413794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66350 + }, + { + "epoch": 0.3218340076602896, + "grad_norm": 1.7238603788882756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66360 + }, + { + "epoch": 0.32188250585312567, + "grad_norm": 1.6939094393819687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66370 + }, + { + "epoch": 0.32193100404596176, + "grad_norm": 1.6596696639226138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66380 + }, + { + "epoch": 0.32197950223879784, + "grad_norm": 1.3391411357588368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66390 + }, + { + "epoch": 0.32202800043163393, + "grad_norm": 1.30214559135311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66400 + }, + { + "epoch": 0.32207649862447, + "grad_norm": 1.6177548900486727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66410 + }, + { + "epoch": 0.3221249968173061, + "grad_norm": 1.5522940088885662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66420 + }, + { + "epoch": 0.3221734950101422, + "grad_norm": 1.6314402273565065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66430 + }, + { + "epoch": 0.3222219932029783, + "grad_norm": 1.2596846943324636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66440 + }, + { + "epoch": 0.3222704913958144, + "grad_norm": 1.227295172157028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66450 + }, + { + "epoch": 0.32231898958865046, + "grad_norm": 1.6421267901023384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66460 + }, + { + "epoch": 0.32236748778148655, + "grad_norm": 1.5919334828140563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66470 + }, + { + "epoch": 0.32241598597432264, + "grad_norm": 1.5801224151346105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66480 + }, + { + "epoch": 0.3224644841671587, + "grad_norm": 1.283369357452102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66490 + }, + { + "epoch": 0.3225129823599948, + "grad_norm": 1.2182118780401652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66500 + }, + { + "epoch": 0.3225614805528309, + "grad_norm": 1.5178359547007858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66510 + }, + { + "epoch": 0.322609978745667, + "grad_norm": 1.6101775202059798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66520 + }, + { + "epoch": 0.3226584769385031, + "grad_norm": 1.531263507104086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66530 + }, + { + "epoch": 0.32270697513133917, + "grad_norm": 1.203610082711748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66540 + }, + { + "epoch": 0.32275547332417526, + "grad_norm": 1.1662729093586677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66550 + }, + { + "epoch": 0.32280397151701135, + "grad_norm": 1.4572037798643578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66560 + }, + { + "epoch": 0.32285246970984743, + "grad_norm": 1.4727260122526786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66570 + }, + { + "epoch": 0.3229009679026835, + "grad_norm": 1.50516314079141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66580 + }, + { + "epoch": 0.3229494660955196, + "grad_norm": 1.2029089191401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66590 + }, + { + "epoch": 0.3229979642883557, + "grad_norm": 1.1881394357260433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66600 + }, + { + "epoch": 0.3230464624811918, + "grad_norm": 1.5274488873728842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66610 + }, + { + "epoch": 0.3230949606740279, + "grad_norm": 1.5486342874737602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66620 + }, + { + "epoch": 0.32314345886686396, + "grad_norm": 1.576947283865593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66630 + }, + { + "epoch": 0.32319195705970005, + "grad_norm": 1.1310510217299452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66640 + }, + { + "epoch": 0.32324045525253614, + "grad_norm": 1.169956362900848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66650 + }, + { + "epoch": 0.32328895344537223, + "grad_norm": 1.4634686351655546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66660 + }, + { + "epoch": 0.3233374516382083, + "grad_norm": 1.4316009355752612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66670 + }, + { + "epoch": 0.3233859498310444, + "grad_norm": 1.5037694822694903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66680 + }, + { + "epoch": 0.3234344480238805, + "grad_norm": 1.1339410832533758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66690 + }, + { + "epoch": 0.3234829462167166, + "grad_norm": 1.1335866645367787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66700 + }, + { + "epoch": 0.32353144440955267, + "grad_norm": 1.4563846661985735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66710 + }, + { + "epoch": 0.32357994260238876, + "grad_norm": 1.3874847581973881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66720 + }, + { + "epoch": 0.32362844079522485, + "grad_norm": 1.4168649897783325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66730 + }, + { + "epoch": 0.32367693898806094, + "grad_norm": 1.134967675398002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66740 + }, + { + "epoch": 0.323725437180897, + "grad_norm": 1.1096308583091741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66750 + }, + { + "epoch": 0.3237739353737331, + "grad_norm": 1.3822473476921004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66760 + }, + { + "epoch": 0.3238224335665692, + "grad_norm": 1.3769574991329137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66770 + }, + { + "epoch": 0.3238709317594053, + "grad_norm": 1.3933885156802717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66780 + }, + { + "epoch": 0.3239194299522414, + "grad_norm": 1.1072022942926196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66790 + }, + { + "epoch": 0.3239679281450775, + "grad_norm": 1.0830924423999022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66800 + }, + { + "epoch": 0.3240164263379136, + "grad_norm": 1.4610000675929768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66810 + }, + { + "epoch": 0.3240649245307497, + "grad_norm": 1.4042852569673414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66820 + }, + { + "epoch": 0.3241134227235858, + "grad_norm": 1.4156685779198597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66830 + }, + { + "epoch": 0.3241619209164219, + "grad_norm": 1.0903065827960745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66840 + }, + { + "epoch": 0.32421041910925796, + "grad_norm": 1.0727399768484247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66850 + }, + { + "epoch": 0.32425891730209405, + "grad_norm": 1.2877237054453872e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66860 + }, + { + "epoch": 0.32430741549493014, + "grad_norm": 1.2610533417500847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66870 + }, + { + "epoch": 0.32435591368776623, + "grad_norm": 1.4087684974128933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66880 + }, + { + "epoch": 0.3244044118806023, + "grad_norm": 1.1098031649225959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66890 + }, + { + "epoch": 0.3244529100734384, + "grad_norm": 1.0869882061115277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66900 + }, + { + "epoch": 0.3245014082662745, + "grad_norm": 1.3484533667451615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66910 + }, + { + "epoch": 0.3245499064591106, + "grad_norm": 1.3456828185098857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66920 + }, + { + "epoch": 0.32459840465194667, + "grad_norm": 1.2854320630140137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66930 + }, + { + "epoch": 0.32464690284478276, + "grad_norm": 1.0924750881713408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66940 + }, + { + "epoch": 0.32469540103761885, + "grad_norm": 1.0621604928928718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66950 + }, + { + "epoch": 0.32474389923045494, + "grad_norm": 1.2644375146919629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66960 + }, + { + "epoch": 0.324792397423291, + "grad_norm": 1.2824436623759539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66970 + }, + { + "epoch": 0.3248408956161271, + "grad_norm": 1.437435628304229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66980 + }, + { + "epoch": 0.3248893938089632, + "grad_norm": 1.0454460408482191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 66990 + }, + { + "epoch": 0.3249378920017993, + "grad_norm": 1.0504688674473073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67000 + }, + { + "epoch": 0.3249863901946354, + "grad_norm": 1.4180149321418867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67010 + }, + { + "epoch": 0.32503488838747147, + "grad_norm": 1.2591769404934894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67020 + }, + { + "epoch": 0.32508338658030755, + "grad_norm": 1.2978065910829173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67030 + }, + { + "epoch": 0.32513188477314364, + "grad_norm": 1.0750436985063061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67040 + }, + { + "epoch": 0.32518038296597973, + "grad_norm": 1.0294284180645263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67050 + }, + { + "epoch": 0.3252288811588158, + "grad_norm": 1.2600725085576414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67060 + }, + { + "epoch": 0.3252773793516519, + "grad_norm": 1.205955442173945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67070 + }, + { + "epoch": 0.325325877544488, + "grad_norm": 1.2025624585021433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67080 + }, + { + "epoch": 0.3253743757373241, + "grad_norm": 1.0324373533876496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67090 + }, + { + "epoch": 0.3254228739301602, + "grad_norm": 1.0481814172180748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67100 + }, + { + "epoch": 0.32547137212299626, + "grad_norm": 1.246645666697077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67110 + }, + { + "epoch": 0.32551987031583235, + "grad_norm": 1.2735364407490124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67120 + }, + { + "epoch": 0.32556836850866844, + "grad_norm": 1.2171315688647155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67130 + }, + { + "epoch": 0.3256168667015045, + "grad_norm": 1.0395675076324551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67140 + }, + { + "epoch": 0.3256653648943406, + "grad_norm": 1.0105506476065784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67150 + }, + { + "epoch": 0.3257138630871767, + "grad_norm": 1.1873581939880751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67160 + }, + { + "epoch": 0.3257623612800128, + "grad_norm": 1.1685702361319272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67170 + }, + { + "epoch": 0.3258108594728489, + "grad_norm": 1.1736781857507594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67180 + }, + { + "epoch": 0.32585935766568497, + "grad_norm": 1.0118723281493658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67190 + }, + { + "epoch": 0.32590785585852106, + "grad_norm": 1.0185892307390532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67200 + }, + { + "epoch": 0.32595635405135714, + "grad_norm": 1.1143882971964558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67210 + }, + { + "epoch": 0.32600485224419323, + "grad_norm": 1.2033224550123123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67220 + }, + { + "epoch": 0.3260533504370293, + "grad_norm": 1.4762622413400095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67230 + }, + { + "epoch": 0.3261018486298654, + "grad_norm": 1.032448011528686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67240 + }, + { + "epoch": 0.3261503468227015, + "grad_norm": 1.0273814865513486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67250 + }, + { + "epoch": 0.3261988450155376, + "grad_norm": 1.1590210391432265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67260 + }, + { + "epoch": 0.3262473432083737, + "grad_norm": 1.1273358069274764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67270 + }, + { + "epoch": 0.32629584140120976, + "grad_norm": 1.1387341913859927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67280 + }, + { + "epoch": 0.32634433959404585, + "grad_norm": 1.0335265443472963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67290 + }, + { + "epoch": 0.32639283778688194, + "grad_norm": 1.0145399187422299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67300 + }, + { + "epoch": 0.32644133597971803, + "grad_norm": 1.1819927436818034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67310 + }, + { + "epoch": 0.32648983417255417, + "grad_norm": 1.1291586332617953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67320 + }, + { + "epoch": 0.32653833236539026, + "grad_norm": 1.1391691145945515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67330 + }, + { + "epoch": 0.32658683055822635, + "grad_norm": 9.886851159990329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67340 + }, + { + "epoch": 0.32663532875106244, + "grad_norm": 9.943546075419363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67350 + }, + { + "epoch": 0.3266838269438985, + "grad_norm": 1.1070201111351707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67360 + }, + { + "epoch": 0.3267323251367346, + "grad_norm": 1.1292282664498998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67370 + }, + { + "epoch": 0.3267808233295707, + "grad_norm": 1.4600894360228267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67380 + }, + { + "epoch": 0.3268293215224068, + "grad_norm": 9.890094787579073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67390 + }, + { + "epoch": 0.3268778197152429, + "grad_norm": 9.849575377529618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67400 + }, + { + "epoch": 0.32692631790807897, + "grad_norm": 1.114384247102862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67410 + }, + { + "epoch": 0.32697481610091506, + "grad_norm": 1.1561622415001693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67420 + }, + { + "epoch": 0.32702331429375114, + "grad_norm": 1.0606839140336888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67430 + }, + { + "epoch": 0.32707181248658723, + "grad_norm": 9.691724045524097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67440 + }, + { + "epoch": 0.3271203106794233, + "grad_norm": 9.92256659060331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67450 + }, + { + "epoch": 0.3271688088722594, + "grad_norm": 1.1283723466704032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67460 + }, + { + "epoch": 0.3272173070650955, + "grad_norm": 1.114716567940377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67470 + }, + { + "epoch": 0.3272658052579316, + "grad_norm": 1.1011805867155999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67480 + }, + { + "epoch": 0.3273143034507677, + "grad_norm": 9.698334935137609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67490 + }, + { + "epoch": 0.32736280164360376, + "grad_norm": 9.657319566258593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67500 + }, + { + "epoch": 0.32741129983643985, + "grad_norm": 1.1324761572950592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67510 + }, + { + "epoch": 0.32745979802927594, + "grad_norm": 1.0460121302457992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67520 + }, + { + "epoch": 0.327508296222112, + "grad_norm": 1.1382088871414453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67530 + }, + { + "epoch": 0.3275567944149481, + "grad_norm": 9.556242019925776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67540 + }, + { + "epoch": 0.3276052926077842, + "grad_norm": 9.542029033582367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67550 + }, + { + "epoch": 0.3276537908006203, + "grad_norm": 1.1019223222774599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67560 + }, + { + "epoch": 0.3277022889934564, + "grad_norm": 1.04120566390975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67570 + }, + { + "epoch": 0.32775078718629247, + "grad_norm": 1.009686982911262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67580 + }, + { + "epoch": 0.32779928537912856, + "grad_norm": 9.687990143447678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67590 + }, + { + "epoch": 0.32784778357196465, + "grad_norm": 9.746928952836242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67600 + }, + { + "epoch": 0.32789628176480073, + "grad_norm": 1.0814800788239154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67610 + }, + { + "epoch": 0.3279447799576368, + "grad_norm": 9.99455593841958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67620 + }, + { + "epoch": 0.3279932781504729, + "grad_norm": 1.0662342475598052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67630 + }, + { + "epoch": 0.328041776343309, + "grad_norm": 9.47986791288713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67640 + }, + { + "epoch": 0.3280902745361451, + "grad_norm": 9.747046902930379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67650 + }, + { + "epoch": 0.3281387727289812, + "grad_norm": 1.0096037783569045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67660 + }, + { + "epoch": 0.32818727092181726, + "grad_norm": 1.0444259146424884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67670 + }, + { + "epoch": 0.32823576911465335, + "grad_norm": 9.95159723515826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67680 + }, + { + "epoch": 0.32828426730748944, + "grad_norm": 9.364642039599858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67690 + }, + { + "epoch": 0.32833276550032553, + "grad_norm": 9.439293080504285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67700 + }, + { + "epoch": 0.3283812636931616, + "grad_norm": 1.0556692586760619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67710 + }, + { + "epoch": 0.3284297618859977, + "grad_norm": 1.0600696498386242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67720 + }, + { + "epoch": 0.3284782600788338, + "grad_norm": 9.595071759349594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67730 + }, + { + "epoch": 0.3285267582716699, + "grad_norm": 9.488937280366372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67740 + }, + { + "epoch": 0.32857525646450597, + "grad_norm": 9.430927860876182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67750 + }, + { + "epoch": 0.32862375465734206, + "grad_norm": 1.0219120127885617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67760 + }, + { + "epoch": 0.32867225285017815, + "grad_norm": 9.82895542733786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67770 + }, + { + "epoch": 0.32872075104301424, + "grad_norm": 1.003037723990019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67780 + }, + { + "epoch": 0.3287692492358503, + "grad_norm": 9.35923694100893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67790 + }, + { + "epoch": 0.3288177474286864, + "grad_norm": 9.425119174011343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67800 + }, + { + "epoch": 0.3288662456215225, + "grad_norm": 9.867486738812659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67810 + }, + { + "epoch": 0.3289147438143586, + "grad_norm": 1.0268329475593418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67820 + }, + { + "epoch": 0.32896324200719473, + "grad_norm": 9.448503135445208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67830 + }, + { + "epoch": 0.3290117402000308, + "grad_norm": 9.255901289861868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67840 + }, + { + "epoch": 0.3290602383928669, + "grad_norm": 1.0066808897590818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67850 + }, + { + "epoch": 0.329108736585703, + "grad_norm": 9.820748658739831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67860 + }, + { + "epoch": 0.3291572347785391, + "grad_norm": 9.848350401853168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67870 + }, + { + "epoch": 0.3292057329713752, + "grad_norm": 9.320240934584945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67880 + }, + { + "epoch": 0.32925423116421126, + "grad_norm": 9.271878553818169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67890 + }, + { + "epoch": 0.32930272935704735, + "grad_norm": 9.129411893127326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67900 + }, + { + "epoch": 0.32935122754988344, + "grad_norm": 1.007796797125593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67910 + }, + { + "epoch": 0.32939972574271953, + "grad_norm": 9.411055401642443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67920 + }, + { + "epoch": 0.3294482239355556, + "grad_norm": 9.481301788127894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67930 + }, + { + "epoch": 0.3294967221283917, + "grad_norm": 9.24097420806902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67940 + }, + { + "epoch": 0.3295452203212278, + "grad_norm": 9.514317866887723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67950 + }, + { + "epoch": 0.3295937185140639, + "grad_norm": 9.425668423546085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67960 + }, + { + "epoch": 0.32964221670689997, + "grad_norm": 9.682511858954967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67970 + }, + { + "epoch": 0.32969071489973606, + "grad_norm": 1.0280130169348922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67980 + }, + { + "epoch": 0.32973921309257215, + "grad_norm": 9.242721432656253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 67990 + }, + { + "epoch": 0.32978771128540824, + "grad_norm": 9.185987437376752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68000 + }, + { + "epoch": 0.3298362094782443, + "grad_norm": 9.11234110390069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68010 + }, + { + "epoch": 0.3298847076710804, + "grad_norm": 9.44316340678597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68020 + }, + { + "epoch": 0.3299332058639165, + "grad_norm": 9.383781218730292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68030 + }, + { + "epoch": 0.3299817040567526, + "grad_norm": 9.058929606453603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68040 + }, + { + "epoch": 0.3300302022495887, + "grad_norm": 9.231300879264381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68050 + }, + { + "epoch": 0.33007870044242477, + "grad_norm": 9.010283719135259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68060 + }, + { + "epoch": 0.33012719863526085, + "grad_norm": 9.211041174239654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68070 + }, + { + "epoch": 0.33017569682809694, + "grad_norm": 9.688881164038321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68080 + }, + { + "epoch": 0.33022419502093303, + "grad_norm": 8.907680637548765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68090 + }, + { + "epoch": 0.3302726932137691, + "grad_norm": 9.050179983205453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68100 + }, + { + "epoch": 0.3303211914066052, + "grad_norm": 9.550579704864504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68110 + }, + { + "epoch": 0.3303696895994413, + "grad_norm": 9.265178846362687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68120 + }, + { + "epoch": 0.3304181877922774, + "grad_norm": 8.819014141181469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68130 + }, + { + "epoch": 0.3304666859851135, + "grad_norm": 9.483385809971878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68140 + }, + { + "epoch": 0.33051518417794956, + "grad_norm": 8.764823178353254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68150 + }, + { + "epoch": 0.33056368237078565, + "grad_norm": 9.471899886648316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68160 + }, + { + "epoch": 0.33061218056362174, + "grad_norm": 9.323020577767238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68170 + }, + { + "epoch": 0.3306606787564578, + "grad_norm": 9.158427616284825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68180 + }, + { + "epoch": 0.3307091769492939, + "grad_norm": 8.897455927581177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68190 + }, + { + "epoch": 0.33075767514213, + "grad_norm": 9.041939108556107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68200 + }, + { + "epoch": 0.3308061733349661, + "grad_norm": 9.145657742237745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68210 + }, + { + "epoch": 0.3308546715278022, + "grad_norm": 8.7810576587799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68220 + }, + { + "epoch": 0.33090316972063827, + "grad_norm": 9.199725781172674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68230 + }, + { + "epoch": 0.33095166791347436, + "grad_norm": 9.138474865721946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68240 + }, + { + "epoch": 0.33100016610631045, + "grad_norm": 8.844556731446573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68250 + }, + { + "epoch": 0.33104866429914653, + "grad_norm": 9.16174158760441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68260 + }, + { + "epoch": 0.3310971624919826, + "grad_norm": 9.006206624917468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68270 + }, + { + "epoch": 0.3311456606848187, + "grad_norm": 9.294259228909141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68280 + }, + { + "epoch": 0.3311941588776548, + "grad_norm": 8.724757805111949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68290 + }, + { + "epoch": 0.3312426570704909, + "grad_norm": 8.852669708403482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68300 + }, + { + "epoch": 0.331291155263327, + "grad_norm": 9.267412792723917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68310 + }, + { + "epoch": 0.33133965345616306, + "grad_norm": 8.638239279434856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68320 + }, + { + "epoch": 0.33138815164899915, + "grad_norm": 8.769542603204172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68330 + }, + { + "epoch": 0.3314366498418353, + "grad_norm": 8.770685155923275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68340 + }, + { + "epoch": 0.3314851480346714, + "grad_norm": 8.740218504499353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68350 + }, + { + "epoch": 0.3315336462275075, + "grad_norm": 8.476224877540517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68360 + }, + { + "epoch": 0.33158214442034356, + "grad_norm": 8.862349432092742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68370 + }, + { + "epoch": 0.33163064261317965, + "grad_norm": 8.258469819111269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68380 + }, + { + "epoch": 0.33167914080601574, + "grad_norm": 8.943558782448235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68390 + }, + { + "epoch": 0.3317276389988518, + "grad_norm": 8.719651845012777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68400 + }, + { + "epoch": 0.3317761371916879, + "grad_norm": 8.6138918220513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68410 + }, + { + "epoch": 0.331824635384524, + "grad_norm": 8.720160593611581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68420 + }, + { + "epoch": 0.3318731335773601, + "grad_norm": 8.789636751771468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68430 + }, + { + "epoch": 0.3319216317701962, + "grad_norm": 8.875175439015948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68440 + }, + { + "epoch": 0.33197012996303227, + "grad_norm": 8.768520132207414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68450 + }, + { + "epoch": 0.33201862815586836, + "grad_norm": 8.858771138875454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68460 + }, + { + "epoch": 0.33206712634870444, + "grad_norm": 8.563719688936544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68470 + }, + { + "epoch": 0.33211562454154053, + "grad_norm": 8.228391834563809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68480 + }, + { + "epoch": 0.3321641227343766, + "grad_norm": 8.80966624094981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68490 + }, + { + "epoch": 0.3322126209272127, + "grad_norm": 8.904898152195528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68500 + }, + { + "epoch": 0.3322611191200488, + "grad_norm": 8.570704324029066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68510 + }, + { + "epoch": 0.3323096173128849, + "grad_norm": 8.573333332151378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68520 + }, + { + "epoch": 0.332358115505721, + "grad_norm": 8.52310790833144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68530 + }, + { + "epoch": 0.33240661369855706, + "grad_norm": 8.62877342910906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68540 + }, + { + "epoch": 0.33245511189139315, + "grad_norm": 1.0324636434688728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68550 + }, + { + "epoch": 0.33250361008422924, + "grad_norm": 8.845168508742063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68560 + }, + { + "epoch": 0.33255210827706533, + "grad_norm": 8.621616842674484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68570 + }, + { + "epoch": 0.3326006064699014, + "grad_norm": 8.442040666523098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68580 + }, + { + "epoch": 0.3326491046627375, + "grad_norm": 8.533113060593678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68590 + }, + { + "epoch": 0.3326976028555736, + "grad_norm": 8.806239293335238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68600 + }, + { + "epoch": 0.3327461010484097, + "grad_norm": 8.220286673576993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68610 + }, + { + "epoch": 0.33279459924124577, + "grad_norm": 8.559723596590629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68620 + }, + { + "epoch": 0.33284309743408186, + "grad_norm": 8.393421779828714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68630 + }, + { + "epoch": 0.33289159562691795, + "grad_norm": 8.765555747913822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68640 + }, + { + "epoch": 0.33294009381975403, + "grad_norm": 8.582782129451516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68650 + }, + { + "epoch": 0.3329885920125901, + "grad_norm": 8.467134904321938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68660 + }, + { + "epoch": 0.3330370902054262, + "grad_norm": 8.411601015723136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68670 + }, + { + "epoch": 0.3330855883982623, + "grad_norm": 7.976247218266508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68680 + }, + { + "epoch": 0.3331340865910984, + "grad_norm": 8.603014833852285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68690 + }, + { + "epoch": 0.3331825847839345, + "grad_norm": 8.616635938096806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68700 + }, + { + "epoch": 0.33323108297677057, + "grad_norm": 8.446732380207322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68710 + }, + { + "epoch": 0.33327958116960665, + "grad_norm": 8.099608805878233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68720 + }, + { + "epoch": 0.33332807936244274, + "grad_norm": 8.71615881692378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68730 + }, + { + "epoch": 0.33337657755527883, + "grad_norm": 8.572294518671697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68740 + }, + { + "epoch": 0.3334250757481149, + "grad_norm": 8.718448896161135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68750 + }, + { + "epoch": 0.333473573940951, + "grad_norm": 8.716599353419952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68760 + }, + { + "epoch": 0.3335220721337871, + "grad_norm": 7.716893435372185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68770 + }, + { + "epoch": 0.3335705703266232, + "grad_norm": 9.428607228301189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68780 + }, + { + "epoch": 0.33361906851945927, + "grad_norm": 8.40823517478384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68790 + }, + { + "epoch": 0.33366756671229536, + "grad_norm": 8.791415950781811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68800 + }, + { + "epoch": 0.33371606490513145, + "grad_norm": 7.985186556425106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68810 + }, + { + "epoch": 0.33376456309796754, + "grad_norm": 8.003649298871096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68820 + }, + { + "epoch": 0.3338130612908036, + "grad_norm": 8.082006530685248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68830 + }, + { + "epoch": 0.3338615594836397, + "grad_norm": 8.596068568067494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68840 + }, + { + "epoch": 0.33391005767647586, + "grad_norm": 8.61507842842002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68850 + }, + { + "epoch": 0.33395855586931195, + "grad_norm": 7.885332564683267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68860 + }, + { + "epoch": 0.33400705406214803, + "grad_norm": 8.301721976522458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68870 + }, + { + "epoch": 0.3340555522549841, + "grad_norm": 7.831791748458272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68880 + }, + { + "epoch": 0.3341040504478202, + "grad_norm": 1.637122295505833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68890 + }, + { + "epoch": 0.3341525486406563, + "grad_norm": 8.583425881170115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68900 + }, + { + "epoch": 0.3342010468334924, + "grad_norm": 8.253852001871564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68910 + }, + { + "epoch": 0.3342495450263285, + "grad_norm": 7.7725090363856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68920 + }, + { + "epoch": 0.33429804321916456, + "grad_norm": 8.097768500192615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68930 + }, + { + "epoch": 0.33434654141200065, + "grad_norm": 8.283110020101958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68940 + }, + { + "epoch": 0.33439503960483674, + "grad_norm": 8.447714350268143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68950 + }, + { + "epoch": 0.33444353779767283, + "grad_norm": 7.828577253121694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68960 + }, + { + "epoch": 0.3344920359905089, + "grad_norm": 7.761232012626351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68970 + }, + { + "epoch": 0.334540534183345, + "grad_norm": 9.341979279042789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68980 + }, + { + "epoch": 0.3345890323761811, + "grad_norm": 8.611920065959566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 68990 + }, + { + "epoch": 0.3346375305690172, + "grad_norm": 8.404169449249821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69000 + }, + { + "epoch": 0.33468602876185327, + "grad_norm": 7.76402089286421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69010 + }, + { + "epoch": 0.33473452695468936, + "grad_norm": 7.683980385309042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69020 + }, + { + "epoch": 0.33478302514752545, + "grad_norm": 7.911614119393562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69030 + }, + { + "epoch": 0.33483152334036154, + "grad_norm": 8.64809592826532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69040 + }, + { + "epoch": 0.3348800215331976, + "grad_norm": 8.436944654022227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69050 + }, + { + "epoch": 0.3349285197260337, + "grad_norm": 7.59348992573905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69060 + }, + { + "epoch": 0.3349770179188698, + "grad_norm": 7.730719175924605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69070 + }, + { + "epoch": 0.3350255161117059, + "grad_norm": 8.200065337859996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69080 + }, + { + "epoch": 0.335074014304542, + "grad_norm": 8.606394885646296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69090 + }, + { + "epoch": 0.33512251249737807, + "grad_norm": 8.504437687406607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69100 + }, + { + "epoch": 0.33517101069021416, + "grad_norm": 7.933258672210286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69110 + }, + { + "epoch": 0.33521950888305024, + "grad_norm": 7.458846340568925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69120 + }, + { + "epoch": 0.33526800707588633, + "grad_norm": 7.419200898084455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69130 + }, + { + "epoch": 0.3353165052687224, + "grad_norm": 8.505541870817979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69140 + }, + { + "epoch": 0.3353650034615585, + "grad_norm": 8.471130996667853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69150 + }, + { + "epoch": 0.3354135016543946, + "grad_norm": 7.174789118380431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69160 + }, + { + "epoch": 0.3354619998472307, + "grad_norm": 8.131022610768923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69170 + }, + { + "epoch": 0.3355104980400668, + "grad_norm": 7.535400214919719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69180 + }, + { + "epoch": 0.33555899623290286, + "grad_norm": 8.313040211760381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69190 + }, + { + "epoch": 0.33560749442573895, + "grad_norm": 8.585041655351233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69200 + }, + { + "epoch": 0.33565599261857504, + "grad_norm": 7.857210704287354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69210 + }, + { + "epoch": 0.3357044908114111, + "grad_norm": 7.660694478772712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69220 + }, + { + "epoch": 0.3357529890042472, + "grad_norm": 7.797034839995831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69230 + }, + { + "epoch": 0.3358014871970833, + "grad_norm": 8.524123273900841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69240 + }, + { + "epoch": 0.3358499853899194, + "grad_norm": 8.67658371817015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69250 + }, + { + "epoch": 0.3358984835827555, + "grad_norm": 7.200689822184358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69260 + }, + { + "epoch": 0.33594698177559157, + "grad_norm": 7.571222226943064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69270 + }, + { + "epoch": 0.33599547996842766, + "grad_norm": 7.371477295237128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69280 + }, + { + "epoch": 0.33604397816126375, + "grad_norm": 1.413980612596788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69290 + }, + { + "epoch": 0.33609247635409983, + "grad_norm": 8.405218920870539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69300 + }, + { + "epoch": 0.3361409745469359, + "grad_norm": 7.682869096470313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69310 + }, + { + "epoch": 0.336189472739772, + "grad_norm": 8.078384183818343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69320 + }, + { + "epoch": 0.3362379709326081, + "grad_norm": 7.601521190281346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69330 + }, + { + "epoch": 0.3362864691254442, + "grad_norm": 8.50345358571758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69340 + }, + { + "epoch": 0.3363349673182803, + "grad_norm": 8.247185689924663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69350 + }, + { + "epoch": 0.3363834655111164, + "grad_norm": 7.513413891047094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69360 + }, + { + "epoch": 0.3364319637039525, + "grad_norm": 7.596992901426347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69370 + }, + { + "epoch": 0.3364804618967886, + "grad_norm": 7.375685839861035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69380 + }, + { + "epoch": 0.3365289600896247, + "grad_norm": 8.258914618863855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69390 + }, + { + "epoch": 0.3365774582824608, + "grad_norm": 8.453105238004355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69400 + }, + { + "epoch": 0.33662595647529686, + "grad_norm": 7.595618001232651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69410 + }, + { + "epoch": 0.33667445466813295, + "grad_norm": 7.112874556014503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69420 + }, + { + "epoch": 0.33672295286096904, + "grad_norm": 7.740234053699169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69430 + }, + { + "epoch": 0.3367714510538051, + "grad_norm": 8.187119249214447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69440 + }, + { + "epoch": 0.3368199492466412, + "grad_norm": 1.6645742562104715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69450 + }, + { + "epoch": 0.3368684474394773, + "grad_norm": 7.201301599479848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69460 + }, + { + "epoch": 0.3369169456323134, + "grad_norm": 1.0396360039521824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69470 + }, + { + "epoch": 0.3369654438251495, + "grad_norm": 7.380337763152056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69480 + }, + { + "epoch": 0.33701394201798557, + "grad_norm": 8.352403568778755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69490 + }, + { + "epoch": 0.33706244021082166, + "grad_norm": 8.23926953330556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69500 + }, + { + "epoch": 0.33711093840365774, + "grad_norm": 7.185610684246058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69510 + }, + { + "epoch": 0.33715943659649383, + "grad_norm": 7.414451630438634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69520 + }, + { + "epoch": 0.3372079347893299, + "grad_norm": 6.999454171818797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69530 + }, + { + "epoch": 0.337256432982166, + "grad_norm": 8.440866139380887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69540 + }, + { + "epoch": 0.3373049311750021, + "grad_norm": 8.474003720948531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69550 + }, + { + "epoch": 0.3373534293678382, + "grad_norm": 6.93456314593277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69560 + }, + { + "epoch": 0.3374019275606743, + "grad_norm": 7.232313947724833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69570 + }, + { + "epoch": 0.33745042575351036, + "grad_norm": 7.145531810692773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69580 + }, + { + "epoch": 0.33749892394634645, + "grad_norm": 8.475784341044346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69590 + }, + { + "epoch": 0.33754742213918254, + "grad_norm": 8.519336347490025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69600 + }, + { + "epoch": 0.33759592033201863, + "grad_norm": 7.123041712020495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69610 + }, + { + "epoch": 0.3376444185248547, + "grad_norm": 7.190160999925865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69620 + }, + { + "epoch": 0.3376929167176908, + "grad_norm": 7.294467962992712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69630 + }, + { + "epoch": 0.3377414149105269, + "grad_norm": 8.276447260868736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69640 + }, + { + "epoch": 0.337789913103363, + "grad_norm": 8.536799356306801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69650 + }, + { + "epoch": 0.33783841129619907, + "grad_norm": 7.09713887658836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69660 + }, + { + "epoch": 0.33788690948903516, + "grad_norm": 6.785072059756203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69670 + }, + { + "epoch": 0.33793540768187125, + "grad_norm": 7.218581288270798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69680 + }, + { + "epoch": 0.33798390587470734, + "grad_norm": 8.119992145338983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69690 + }, + { + "epoch": 0.3380324040675434, + "grad_norm": 8.230489356719772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69700 + }, + { + "epoch": 0.3380809022603795, + "grad_norm": 7.141213842487559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69710 + }, + { + "epoch": 0.3381294004532156, + "grad_norm": 6.913589345458604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69720 + }, + { + "epoch": 0.3381778986460517, + "grad_norm": 7.020441472604944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69730 + }, + { + "epoch": 0.3382263968388878, + "grad_norm": 8.191455691530791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69740 + }, + { + "epoch": 0.33827489503172387, + "grad_norm": 8.532908424285779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69750 + }, + { + "epoch": 0.33832339322455995, + "grad_norm": 7.0976483357299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69760 + }, + { + "epoch": 0.33837189141739604, + "grad_norm": 7.44147214959412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69770 + }, + { + "epoch": 0.33842038961023213, + "grad_norm": 6.822603637601787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69780 + }, + { + "epoch": 0.3384688878030682, + "grad_norm": 8.507336701768509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69790 + }, + { + "epoch": 0.3385173859959043, + "grad_norm": 8.541871920897393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69800 + }, + { + "epoch": 0.3385658841887404, + "grad_norm": 6.64470363176406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69810 + }, + { + "epoch": 0.3386143823815765, + "grad_norm": 6.780187078447852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69820 + }, + { + "epoch": 0.3386628805744126, + "grad_norm": 6.612863501231914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69830 + }, + { + "epoch": 0.33871137876724866, + "grad_norm": 8.39095548599289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69840 + }, + { + "epoch": 0.33875987696008475, + "grad_norm": 8.226945169553801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69850 + }, + { + "epoch": 0.33880837515292084, + "grad_norm": 7.43158281579781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69860 + }, + { + "epoch": 0.338856873345757, + "grad_norm": 6.706483901552929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69870 + }, + { + "epoch": 0.33890537153859307, + "grad_norm": 6.858250856112136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69880 + }, + { + "epoch": 0.33895386973142916, + "grad_norm": 8.440680687726854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69890 + }, + { + "epoch": 0.33900236792426525, + "grad_norm": 8.115650729223489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69900 + }, + { + "epoch": 0.33905086611710133, + "grad_norm": 6.770315508219937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69910 + }, + { + "epoch": 0.3390993643099374, + "grad_norm": 7.168868165763342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69920 + }, + { + "epoch": 0.3391478625027735, + "grad_norm": 6.662715890115578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69930 + }, + { + "epoch": 0.3391963606956096, + "grad_norm": 8.089259040389152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69940 + }, + { + "epoch": 0.3392448588884457, + "grad_norm": 8.204230539377022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69950 + }, + { + "epoch": 0.3392933570812818, + "grad_norm": 6.717943534795268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69960 + }, + { + "epoch": 0.33934185527411787, + "grad_norm": 6.865970902936169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69970 + }, + { + "epoch": 0.33939035346695395, + "grad_norm": 6.92880846031585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69980 + }, + { + "epoch": 0.33943885165979004, + "grad_norm": 8.366598081011034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 69990 + }, + { + "epoch": 0.33948734985262613, + "grad_norm": 8.29748501018912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70000 + }, + { + "epoch": 0.3395358480454622, + "grad_norm": 7.023358250535239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70010 + }, + { + "epoch": 0.3395843462382983, + "grad_norm": 7.011667690903778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70020 + }, + { + "epoch": 0.3396328444311344, + "grad_norm": 6.735331226082053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70030 + }, + { + "epoch": 0.3396813426239705, + "grad_norm": 7.954339054094817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70040 + }, + { + "epoch": 0.33972984081680657, + "grad_norm": 8.328139955438019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70050 + }, + { + "epoch": 0.33977833900964266, + "grad_norm": 1.5966105593179236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70060 + }, + { + "epoch": 0.33982683720247875, + "grad_norm": 6.609666058920993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70070 + }, + { + "epoch": 0.33987533539531484, + "grad_norm": 6.81157601434279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70080 + }, + { + "epoch": 0.3399238335881509, + "grad_norm": 8.097398307427284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70090 + }, + { + "epoch": 0.339972331780987, + "grad_norm": 8.187615208044008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70100 + }, + { + "epoch": 0.3400208299738231, + "grad_norm": 6.405210228876967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70110 + }, + { + "epoch": 0.3400693281666592, + "grad_norm": 6.47202256232049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70120 + }, + { + "epoch": 0.3401178263594953, + "grad_norm": 7.21100263945118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70130 + }, + { + "epoch": 0.34016632455233137, + "grad_norm": 4.392677510622889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70140 + }, + { + "epoch": 0.34021482274516746, + "grad_norm": 8.087501868203617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70150 + }, + { + "epoch": 0.34026332093800354, + "grad_norm": 6.829627352544776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70160 + }, + { + "epoch": 0.34031181913083963, + "grad_norm": 6.447543654530818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70170 + }, + { + "epoch": 0.3403603173236757, + "grad_norm": 6.467889335226573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70180 + }, + { + "epoch": 0.3404088155165118, + "grad_norm": 8.16870127096081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70190 + }, + { + "epoch": 0.3404573137093479, + "grad_norm": 7.985217820305479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70200 + }, + { + "epoch": 0.340505811902184, + "grad_norm": 6.389674922502309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70210 + }, + { + "epoch": 0.3405543100950201, + "grad_norm": 6.704436117388468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70220 + }, + { + "epoch": 0.34060280828785616, + "grad_norm": 6.200561841751551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70230 + }, + { + "epoch": 0.34065130648069225, + "grad_norm": 8.187792133185212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70240 + }, + { + "epoch": 0.34069980467352834, + "grad_norm": 1.4096093536863918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70250 + }, + { + "epoch": 0.3407483028663644, + "grad_norm": 6.54234284525046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70260 + }, + { + "epoch": 0.3407968010592005, + "grad_norm": 6.41371968868043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70270 + }, + { + "epoch": 0.3408452992520366, + "grad_norm": 6.414277464728002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70280 + }, + { + "epoch": 0.3408937974448727, + "grad_norm": 8.012645480448555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70290 + }, + { + "epoch": 0.3409422956377088, + "grad_norm": 7.81473374900088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70300 + }, + { + "epoch": 0.34099079383054487, + "grad_norm": 6.623230319746654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70310 + }, + { + "epoch": 0.34103929202338096, + "grad_norm": 6.448355094335056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70320 + }, + { + "epoch": 0.34108779021621705, + "grad_norm": 6.552053122277357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70330 + }, + { + "epoch": 0.34113628840905313, + "grad_norm": 7.984004923855537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70340 + }, + { + "epoch": 0.3411847866018892, + "grad_norm": 8.18991736650787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70350 + }, + { + "epoch": 0.3412332847947253, + "grad_norm": 6.27469134428793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70360 + }, + { + "epoch": 0.3412817829875614, + "grad_norm": 6.473947422591664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70370 + }, + { + "epoch": 0.3413302811803975, + "grad_norm": 6.683720954470118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70380 + }, + { + "epoch": 0.34137877937323363, + "grad_norm": 7.855841488435544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70390 + }, + { + "epoch": 0.3414272775660697, + "grad_norm": 7.654201539253336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70400 + }, + { + "epoch": 0.3414757757589058, + "grad_norm": 6.166841615140584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70410 + }, + { + "epoch": 0.3415242739517419, + "grad_norm": 6.323819690123855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70420 + }, + { + "epoch": 0.341572772144578, + "grad_norm": 6.098057525605327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70430 + }, + { + "epoch": 0.3416212703374141, + "grad_norm": 7.96580792439272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70440 + }, + { + "epoch": 0.34166976853025016, + "grad_norm": 7.875735263951356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70450 + }, + { + "epoch": 0.34171826672308625, + "grad_norm": 6.097479143818418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70460 + }, + { + "epoch": 0.34176676491592234, + "grad_norm": 6.147081421659095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70470 + }, + { + "epoch": 0.3418152631087584, + "grad_norm": 6.176362177257033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70480 + }, + { + "epoch": 0.3418637613015945, + "grad_norm": 7.748650432404247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70490 + }, + { + "epoch": 0.3419122594944306, + "grad_norm": 7.782471556083692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70500 + }, + { + "epoch": 0.3419607576872667, + "grad_norm": 6.822359921443422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70510 + }, + { + "epoch": 0.3420092558801028, + "grad_norm": 6.061209489871544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70520 + }, + { + "epoch": 0.34205775407293887, + "grad_norm": 6.07573298339048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70530 + }, + { + "epoch": 0.34210625226577496, + "grad_norm": 7.68771286629999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70540 + }, + { + "epoch": 0.34215475045861105, + "grad_norm": 7.952614566875127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70550 + }, + { + "epoch": 0.34220324865144713, + "grad_norm": 6.037630839728081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70560 + }, + { + "epoch": 0.3422517468442832, + "grad_norm": 6.314257916528732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70570 + }, + { + "epoch": 0.3423002450371193, + "grad_norm": 6.48590514629177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70580 + }, + { + "epoch": 0.3423487432299554, + "grad_norm": 7.808752400251251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70590 + }, + { + "epoch": 0.3423972414227915, + "grad_norm": 7.932712264846487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70600 + }, + { + "epoch": 0.3424457396156276, + "grad_norm": 5.946724002114934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70610 + }, + { + "epoch": 0.34249423780846366, + "grad_norm": 6.389911533233317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70620 + }, + { + "epoch": 0.34254273600129975, + "grad_norm": 5.936537661455077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70630 + }, + { + "epoch": 0.34259123419413584, + "grad_norm": 8.069674350963396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70640 + }, + { + "epoch": 0.34263973238697193, + "grad_norm": 7.907760135594799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70650 + }, + { + "epoch": 0.342688230579808, + "grad_norm": 6.344901493093857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70660 + }, + { + "epoch": 0.3427367287726441, + "grad_norm": 6.528394180804753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70670 + }, + { + "epoch": 0.3427852269654802, + "grad_norm": 5.9248570494219166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70680 + }, + { + "epoch": 0.3428337251583163, + "grad_norm": 7.820388248092058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70690 + }, + { + "epoch": 0.34288222335115237, + "grad_norm": 7.5918258346519e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70700 + }, + { + "epoch": 0.34293072154398846, + "grad_norm": 6.228187032775168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70710 + }, + { + "epoch": 0.34297921973682455, + "grad_norm": 6.645593231269231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70720 + }, + { + "epoch": 0.34302771792966064, + "grad_norm": 5.847726569641054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70730 + }, + { + "epoch": 0.3430762161224967, + "grad_norm": 8.058387379605847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70740 + }, + { + "epoch": 0.3431247143153328, + "grad_norm": 7.623257403110983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70750 + }, + { + "epoch": 0.3431732125081689, + "grad_norm": 5.8664706870104055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70760 + }, + { + "epoch": 0.343221710701005, + "grad_norm": 6.036537314457746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70770 + }, + { + "epoch": 0.3432702088938411, + "grad_norm": 5.955017101655358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70780 + }, + { + "epoch": 0.34331870708667717, + "grad_norm": 7.671945212450737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70790 + }, + { + "epoch": 0.34336720527951325, + "grad_norm": 7.743418706240845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70800 + }, + { + "epoch": 0.34341570347234934, + "grad_norm": 5.8382685352853514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70810 + }, + { + "epoch": 0.34346420166518543, + "grad_norm": 5.804193037306504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70820 + }, + { + "epoch": 0.3435126998580215, + "grad_norm": 5.7950014564767116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70830 + }, + { + "epoch": 0.3435611980508576, + "grad_norm": 7.591187767275187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70840 + }, + { + "epoch": 0.3436096962436937, + "grad_norm": 7.569090598735784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70850 + }, + { + "epoch": 0.3436581944365298, + "grad_norm": 5.9272302621593553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70860 + }, + { + "epoch": 0.3437066926293659, + "grad_norm": 5.675200043242512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70870 + }, + { + "epoch": 0.34375519082220196, + "grad_norm": 5.794088764332628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70880 + }, + { + "epoch": 0.34380368901503805, + "grad_norm": 7.498127274629951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70890 + }, + { + "epoch": 0.3438521872078742, + "grad_norm": 7.418317693463905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70900 + }, + { + "epoch": 0.3439006854007103, + "grad_norm": 5.710371908662637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70910 + }, + { + "epoch": 0.34394918359354637, + "grad_norm": 5.7950842347054277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70920 + }, + { + "epoch": 0.34399768178638246, + "grad_norm": 5.895966737057279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70930 + }, + { + "epoch": 0.34404617997921855, + "grad_norm": 7.678085722773176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70940 + }, + { + "epoch": 0.34409467817205464, + "grad_norm": 7.839023652422838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70950 + }, + { + "epoch": 0.3441431763648907, + "grad_norm": 6.066608193577849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70960 + }, + { + "epoch": 0.3441916745577268, + "grad_norm": 5.7383950036182796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70970 + }, + { + "epoch": 0.3442401727505629, + "grad_norm": 1.697444105275281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70980 + }, + { + "epoch": 0.344288670943399, + "grad_norm": 7.924833056449643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 70990 + }, + { + "epoch": 0.3443371691362351, + "grad_norm": 7.575123106562387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71000 + }, + { + "epoch": 0.34438566732907117, + "grad_norm": 5.796513846689777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71010 + }, + { + "epoch": 0.34443416552190725, + "grad_norm": 5.8183353246477054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71020 + }, + { + "epoch": 0.34448266371474334, + "grad_norm": 5.698607807858025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71030 + }, + { + "epoch": 0.34453116190757943, + "grad_norm": 7.515541966540695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71040 + }, + { + "epoch": 0.3445796601004155, + "grad_norm": 0.004031545482575893, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71050 + }, + { + "epoch": 0.3446281582932516, + "grad_norm": 8.304363291244954e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 71060 + }, + { + "epoch": 0.3446766564860877, + "grad_norm": 4.667847679229453e-05, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 71070 + }, + { + "epoch": 0.3447251546789238, + "grad_norm": 0.0008648383081890643, + "learning_rate": 0.0002, + "loss": 0.004, + "step": 71080 + }, + { + "epoch": 0.3447736528717599, + "grad_norm": 0.0008023789268918335, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 71090 + }, + { + "epoch": 0.34482215106459596, + "grad_norm": 0.0003991344419773668, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71100 + }, + { + "epoch": 0.34487064925743205, + "grad_norm": 0.0002669016248546541, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 71110 + }, + { + "epoch": 0.34491914745026814, + "grad_norm": 0.0013453707797452807, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 71120 + }, + { + "epoch": 0.3449676456431042, + "grad_norm": 0.00037950489786453545, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 71130 + }, + { + "epoch": 0.3450161438359403, + "grad_norm": 0.00012452859664335847, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71140 + }, + { + "epoch": 0.3450646420287764, + "grad_norm": 5.7980189012596384e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71150 + }, + { + "epoch": 0.3451131402216125, + "grad_norm": 2.252638114441652e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71160 + }, + { + "epoch": 0.3451616384144486, + "grad_norm": 1.8834542061085813e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71170 + }, + { + "epoch": 0.34521013660728467, + "grad_norm": 1.7358819604851305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71180 + }, + { + "epoch": 0.34525863480012076, + "grad_norm": 2.6651772714103572e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71190 + }, + { + "epoch": 0.34530713299295684, + "grad_norm": 0.0003181437496095896, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71200 + }, + { + "epoch": 0.34535563118579293, + "grad_norm": 1.4210587323759682e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71210 + }, + { + "epoch": 0.345404129378629, + "grad_norm": 1.2910787518194411e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71220 + }, + { + "epoch": 0.3454526275714651, + "grad_norm": 1.266344406758435e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71230 + }, + { + "epoch": 0.3455011257643012, + "grad_norm": 1.864946898422204e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71240 + }, + { + "epoch": 0.3455496239571373, + "grad_norm": 2.4899707568692975e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71250 + }, + { + "epoch": 0.3455981221499734, + "grad_norm": 1.1290632755844854e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71260 + }, + { + "epoch": 0.34564662034280946, + "grad_norm": 3.157128594466485e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 71270 + }, + { + "epoch": 0.34569511853564555, + "grad_norm": 0.0002698145981412381, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71280 + }, + { + "epoch": 0.34574361672848164, + "grad_norm": 2.8624286642298102e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71290 + }, + { + "epoch": 0.34579211492131773, + "grad_norm": 0.0007120834197849035, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71300 + }, + { + "epoch": 0.3458406131141538, + "grad_norm": 9.323609447164927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71310 + }, + { + "epoch": 0.3458891113069899, + "grad_norm": 9.339349162473809e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71320 + }, + { + "epoch": 0.345937609499826, + "grad_norm": 8.268587407656014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71330 + }, + { + "epoch": 0.3459861076926621, + "grad_norm": 1.1236625141464174e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71340 + }, + { + "epoch": 0.34603460588549817, + "grad_norm": 1.0789831321744714e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71350 + }, + { + "epoch": 0.34608310407833426, + "grad_norm": 7.560926860605832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71360 + }, + { + "epoch": 0.34613160227117035, + "grad_norm": 7.73730243963655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71370 + }, + { + "epoch": 0.34618010046400643, + "grad_norm": 9.119888090936001e-06, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 71380 + }, + { + "epoch": 0.3462285986568425, + "grad_norm": 0.00017693453992251307, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71390 + }, + { + "epoch": 0.3462770968496786, + "grad_norm": 0.0002385045518167317, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71400 + }, + { + "epoch": 0.34632559504251476, + "grad_norm": 7.23524863133207e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 71410 + }, + { + "epoch": 0.34637409323535084, + "grad_norm": 5.28454220329877e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71420 + }, + { + "epoch": 0.34642259142818693, + "grad_norm": 3.5600933188106865e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71430 + }, + { + "epoch": 0.346471089621023, + "grad_norm": 5.232233161223121e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71440 + }, + { + "epoch": 0.3465195878138591, + "grad_norm": 4.154807538725436e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71450 + }, + { + "epoch": 0.3465680860066952, + "grad_norm": 1.9407603758736514e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71460 + }, + { + "epoch": 0.3466165841995313, + "grad_norm": 1.7069396562874317e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71470 + }, + { + "epoch": 0.3466650823923674, + "grad_norm": 1.3327993656275794e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71480 + }, + { + "epoch": 0.34671358058520346, + "grad_norm": 2.5518031179672107e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71490 + }, + { + "epoch": 0.34676207877803955, + "grad_norm": 2.3002854504738934e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71500 + }, + { + "epoch": 0.34681057697087564, + "grad_norm": 1.1405672012188006e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71510 + }, + { + "epoch": 0.3468590751637117, + "grad_norm": 1.574551970406901e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71520 + }, + { + "epoch": 0.3469075733565478, + "grad_norm": 1.1077082490373868e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71530 + }, + { + "epoch": 0.3469560715493839, + "grad_norm": 1.6746360415709205e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71540 + }, + { + "epoch": 0.34700456974222, + "grad_norm": 0.00021046787151135504, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 71550 + }, + { + "epoch": 0.3470530679350561, + "grad_norm": 0.00024707752163521945, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71560 + }, + { + "epoch": 0.34710156612789217, + "grad_norm": 0.00017274555284529924, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71570 + }, + { + "epoch": 0.34715006432072826, + "grad_norm": 8.55536200106144e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71580 + }, + { + "epoch": 0.34719856251356435, + "grad_norm": 0.00011538140824995935, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 71590 + }, + { + "epoch": 0.34724706070640043, + "grad_norm": 0.0006457261624746025, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 71600 + }, + { + "epoch": 0.3472955588992365, + "grad_norm": 0.0003195768513251096, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71610 + }, + { + "epoch": 0.3473440570920726, + "grad_norm": 0.00014668938820250332, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71620 + }, + { + "epoch": 0.3473925552849087, + "grad_norm": 6.344139546854421e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71630 + }, + { + "epoch": 0.3474410534777448, + "grad_norm": 9.15628916118294e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71640 + }, + { + "epoch": 0.3474895516705809, + "grad_norm": 0.0015437130350619555, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 71650 + }, + { + "epoch": 0.34753804986341696, + "grad_norm": 1.894132401503157e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 71660 + }, + { + "epoch": 0.34758654805625305, + "grad_norm": 2.1560474124271423e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71670 + }, + { + "epoch": 0.34763504624908914, + "grad_norm": 1.826898915169295e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71680 + }, + { + "epoch": 0.34768354444192523, + "grad_norm": 3.187280162819661e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71690 + }, + { + "epoch": 0.3477320426347613, + "grad_norm": 2.9206617909949273e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71700 + }, + { + "epoch": 0.3477805408275974, + "grad_norm": 1.8606066078064032e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71710 + }, + { + "epoch": 0.3478290390204335, + "grad_norm": 1.4292982996266801e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71720 + }, + { + "epoch": 0.3478775372132696, + "grad_norm": 1.2702530511887744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71730 + }, + { + "epoch": 0.34792603540610567, + "grad_norm": 2.149002466467209e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71740 + }, + { + "epoch": 0.34797453359894176, + "grad_norm": 1.8504199033486657e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71750 + }, + { + "epoch": 0.34802303179177785, + "grad_norm": 8.99085989658488e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71760 + }, + { + "epoch": 0.34807152998461394, + "grad_norm": 9.22391700441949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71770 + }, + { + "epoch": 0.34812002817745, + "grad_norm": 9.36822107178159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71780 + }, + { + "epoch": 0.3481685263702861, + "grad_norm": 1.4756358723388985e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71790 + }, + { + "epoch": 0.3482170245631222, + "grad_norm": 1.6241327102761716e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71800 + }, + { + "epoch": 0.3482655227559583, + "grad_norm": 8.502302989654709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71810 + }, + { + "epoch": 0.3483140209487944, + "grad_norm": 7.555034699180396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71820 + }, + { + "epoch": 0.34836251914163047, + "grad_norm": 7.783437467878684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71830 + }, + { + "epoch": 0.34841101733446656, + "grad_norm": 1.2019108908134513e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71840 + }, + { + "epoch": 0.34845951552730264, + "grad_norm": 1.89148177014431e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 71850 + }, + { + "epoch": 0.34850801372013873, + "grad_norm": 1.6871001207618974e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71860 + }, + { + "epoch": 0.3485565119129748, + "grad_norm": 9.03393083717674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71870 + }, + { + "epoch": 0.3486050101058109, + "grad_norm": 0.0001078441200661473, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71880 + }, + { + "epoch": 0.348653508298647, + "grad_norm": 4.8880359827307984e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71890 + }, + { + "epoch": 0.3487020064914831, + "grad_norm": 1.4756960808881558e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71900 + }, + { + "epoch": 0.3487505046843192, + "grad_norm": 1.8845377780962735e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71910 + }, + { + "epoch": 0.3487990028771553, + "grad_norm": 7.483617537218379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71920 + }, + { + "epoch": 0.3488475010699914, + "grad_norm": 5.490354851644952e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71930 + }, + { + "epoch": 0.3488959992628275, + "grad_norm": 9.404953743796796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71940 + }, + { + "epoch": 0.3489444974556636, + "grad_norm": 8.909149983082898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71950 + }, + { + "epoch": 0.34899299564849967, + "grad_norm": 0.019263336434960365, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 71960 + }, + { + "epoch": 0.34904149384133576, + "grad_norm": 8.53437086334452e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 71970 + }, + { + "epoch": 0.34908999203417185, + "grad_norm": 2.8829103030147962e-05, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 71980 + }, + { + "epoch": 0.34913849022700794, + "grad_norm": 8.8681117631495e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 71990 + }, + { + "epoch": 0.349186988419844, + "grad_norm": 7.714157254667953e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72000 + }, + { + "epoch": 0.3492354866126801, + "grad_norm": 2.726838647504337e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72010 + }, + { + "epoch": 0.3492839848055162, + "grad_norm": 2.1307323549990542e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72020 + }, + { + "epoch": 0.3493324829983523, + "grad_norm": 4.743355020764284e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72030 + }, + { + "epoch": 0.3493809811911884, + "grad_norm": 2.9462740712915547e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72040 + }, + { + "epoch": 0.34942947938402447, + "grad_norm": 4.012637145933695e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72050 + }, + { + "epoch": 0.34947797757686055, + "grad_norm": 1.4775097952224314e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72060 + }, + { + "epoch": 0.34952647576969664, + "grad_norm": 2.3094593416317366e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72070 + }, + { + "epoch": 0.34957497396253273, + "grad_norm": 9.884066457743756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72080 + }, + { + "epoch": 0.3496234721553688, + "grad_norm": 0.0004079147765878588, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72090 + }, + { + "epoch": 0.3496719703482049, + "grad_norm": 2.552129990363028e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72100 + }, + { + "epoch": 0.349720468541041, + "grad_norm": 0.0032345568761229515, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72110 + }, + { + "epoch": 0.3497689667338771, + "grad_norm": 6.655589459114708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72120 + }, + { + "epoch": 0.3498174649267132, + "grad_norm": 6.5930125856539235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72130 + }, + { + "epoch": 0.34986596311954926, + "grad_norm": 9.61038767854916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72140 + }, + { + "epoch": 0.34991446131238535, + "grad_norm": 1.0877873137360439e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72150 + }, + { + "epoch": 0.34996295950522144, + "grad_norm": 5.219235845288495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72160 + }, + { + "epoch": 0.3500114576980575, + "grad_norm": 5.73565557715483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72170 + }, + { + "epoch": 0.3500599558908936, + "grad_norm": 4.3411864680820145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72180 + }, + { + "epoch": 0.3501084540837297, + "grad_norm": 8.128045010380447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72190 + }, + { + "epoch": 0.3501569522765658, + "grad_norm": 7.488140454370296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72200 + }, + { + "epoch": 0.3502054504694019, + "grad_norm": 6.34889920547721e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72210 + }, + { + "epoch": 0.35025394866223797, + "grad_norm": 4.03409285354428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72220 + }, + { + "epoch": 0.35030244685507406, + "grad_norm": 4.497083409660263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72230 + }, + { + "epoch": 0.35035094504791015, + "grad_norm": 9.187318028125446e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72240 + }, + { + "epoch": 0.35039944324074623, + "grad_norm": 6.406719421647722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72250 + }, + { + "epoch": 0.3504479414335823, + "grad_norm": 3.938142526749289e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72260 + }, + { + "epoch": 0.3504964396264184, + "grad_norm": 3.3515875657030847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72270 + }, + { + "epoch": 0.3505449378192545, + "grad_norm": 3.739135763680679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72280 + }, + { + "epoch": 0.3505934360120906, + "grad_norm": 6.525854587380309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72290 + }, + { + "epoch": 0.3506419342049267, + "grad_norm": 5.736371349485125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72300 + }, + { + "epoch": 0.35069043239776276, + "grad_norm": 3.7980048546160106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72310 + }, + { + "epoch": 0.35073893059059885, + "grad_norm": 3.1403108096128562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72320 + }, + { + "epoch": 0.35078742878343494, + "grad_norm": 0.00024116557324305177, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72330 + }, + { + "epoch": 0.35083592697627103, + "grad_norm": 5.277066975395428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72340 + }, + { + "epoch": 0.3508844251691071, + "grad_norm": 5.6704516282479744e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72350 + }, + { + "epoch": 0.3509329233619432, + "grad_norm": 3.563119207683485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72360 + }, + { + "epoch": 0.3509814215547793, + "grad_norm": 2.5945339530153433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72370 + }, + { + "epoch": 0.3510299197476154, + "grad_norm": 2.719156555031077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72380 + }, + { + "epoch": 0.35107841794045147, + "grad_norm": 6.010429842717713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72390 + }, + { + "epoch": 0.35112691613328756, + "grad_norm": 4.001744400738971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72400 + }, + { + "epoch": 0.35117541432612365, + "grad_norm": 2.5831245693552773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72410 + }, + { + "epoch": 0.35122391251895974, + "grad_norm": 2.9808381896145875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72420 + }, + { + "epoch": 0.3512724107117959, + "grad_norm": 2.289627445861697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72430 + }, + { + "epoch": 0.35132090890463197, + "grad_norm": 4.018495019408874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72440 + }, + { + "epoch": 0.35136940709746806, + "grad_norm": 3.912752617907245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72450 + }, + { + "epoch": 0.35141790529030414, + "grad_norm": 2.4155640403478174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72460 + }, + { + "epoch": 0.35146640348314023, + "grad_norm": 2.93539892481931e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72470 + }, + { + "epoch": 0.3515149016759763, + "grad_norm": 9.386424608237576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72480 + }, + { + "epoch": 0.3515633998688124, + "grad_norm": 5.389152647694573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72490 + }, + { + "epoch": 0.3516118980616485, + "grad_norm": 3.5685288821696304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72500 + }, + { + "epoch": 0.3516603962544846, + "grad_norm": 2.0728939489345066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72510 + }, + { + "epoch": 0.3517088944473207, + "grad_norm": 2.0094425963179674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72520 + }, + { + "epoch": 0.35175739264015676, + "grad_norm": 2.0436734757822705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72530 + }, + { + "epoch": 0.35180589083299285, + "grad_norm": 3.2685939004295506e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72540 + }, + { + "epoch": 0.35185438902582894, + "grad_norm": 4.081085990037536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72550 + }, + { + "epoch": 0.35190288721866503, + "grad_norm": 1.9718220301001566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72560 + }, + { + "epoch": 0.3519513854115011, + "grad_norm": 1.7949008679352119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72570 + }, + { + "epoch": 0.3519998836043372, + "grad_norm": 1.8919357671620674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72580 + }, + { + "epoch": 0.3520483817971733, + "grad_norm": 3.407402118682512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72590 + }, + { + "epoch": 0.3520968799900094, + "grad_norm": 3.625820909292088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72600 + }, + { + "epoch": 0.35214537818284547, + "grad_norm": 2.322709633517661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72610 + }, + { + "epoch": 0.35219387637568156, + "grad_norm": 1.8265238850290189e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72620 + }, + { + "epoch": 0.35224237456851765, + "grad_norm": 1.95216352949501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72630 + }, + { + "epoch": 0.35229087276135373, + "grad_norm": 3.7397182950371644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72640 + }, + { + "epoch": 0.3523393709541898, + "grad_norm": 2.6514981072978117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72650 + }, + { + "epoch": 0.3523878691470259, + "grad_norm": 1.8116530782208429e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72660 + }, + { + "epoch": 0.352436367339862, + "grad_norm": 2.5846611606539227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72670 + }, + { + "epoch": 0.3524848655326981, + "grad_norm": 1.5867494767007884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72680 + }, + { + "epoch": 0.3525333637255342, + "grad_norm": 2.897949570979108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72690 + }, + { + "epoch": 0.35258186191837027, + "grad_norm": 2.537088676035637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72700 + }, + { + "epoch": 0.35263036011120635, + "grad_norm": 1.7324036889476702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72710 + }, + { + "epoch": 0.35267885830404244, + "grad_norm": 1.5183383084149682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72720 + }, + { + "epoch": 0.35272735649687853, + "grad_norm": 1.5005102795839775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72730 + }, + { + "epoch": 0.3527758546897146, + "grad_norm": 3.755877287403564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72740 + }, + { + "epoch": 0.3528243528825507, + "grad_norm": 2.299268999195192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72750 + }, + { + "epoch": 0.3528728510753868, + "grad_norm": 2.126299477822613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72760 + }, + { + "epoch": 0.3529213492682229, + "grad_norm": 1.2989205515623325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72770 + }, + { + "epoch": 0.35296984746105897, + "grad_norm": 1.3043023727732361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72780 + }, + { + "epoch": 0.35301834565389506, + "grad_norm": 1.9594281184254214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72790 + }, + { + "epoch": 0.35306684384673115, + "grad_norm": 2.5676934001239715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72800 + }, + { + "epoch": 0.35311534203956724, + "grad_norm": 1.3343109230845585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72810 + }, + { + "epoch": 0.3531638402324033, + "grad_norm": 1.350377260678215e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72820 + }, + { + "epoch": 0.3532123384252394, + "grad_norm": 1.3395319911069237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72830 + }, + { + "epoch": 0.3532608366180755, + "grad_norm": 2.336177203687839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72840 + }, + { + "epoch": 0.3533093348109116, + "grad_norm": 2.1096025193401147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72850 + }, + { + "epoch": 0.3533578330037477, + "grad_norm": 1.4649382364950725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72860 + }, + { + "epoch": 0.35340633119658377, + "grad_norm": 1.175500528916018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72870 + }, + { + "epoch": 0.35345482938941986, + "grad_norm": 1.806827640393749e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72880 + }, + { + "epoch": 0.35350332758225594, + "grad_norm": 3.941057457268471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72890 + }, + { + "epoch": 0.35355182577509203, + "grad_norm": 2.1338330498110736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72900 + }, + { + "epoch": 0.3536003239679281, + "grad_norm": 1.1551160241651814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72910 + }, + { + "epoch": 0.3536488221607642, + "grad_norm": 1.1194300668648793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72920 + }, + { + "epoch": 0.3536973203536003, + "grad_norm": 1.1385974403310684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72930 + }, + { + "epoch": 0.3537458185464364, + "grad_norm": 1.8227397049486171e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72940 + }, + { + "epoch": 0.35379431673927253, + "grad_norm": 1.7168854355986696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72950 + }, + { + "epoch": 0.3538428149321086, + "grad_norm": 1.222373725795478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72960 + }, + { + "epoch": 0.3538913131249447, + "grad_norm": 1.099512587643403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72970 + }, + { + "epoch": 0.3539398113177808, + "grad_norm": 1.2069333479303168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72980 + }, + { + "epoch": 0.3539883095106169, + "grad_norm": 1.659564873079944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 72990 + }, + { + "epoch": 0.35403680770345297, + "grad_norm": 1.8297438373338082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73000 + }, + { + "epoch": 0.35408530589628906, + "grad_norm": 1.0674233408280998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73010 + }, + { + "epoch": 0.35413380408912515, + "grad_norm": 1.0981357263517566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73020 + }, + { + "epoch": 0.35418230228196124, + "grad_norm": 1.0679368642740883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73030 + }, + { + "epoch": 0.3542308004747973, + "grad_norm": 1.578579144734249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73040 + }, + { + "epoch": 0.3542792986676334, + "grad_norm": 1.531489488115767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73050 + }, + { + "epoch": 0.3543277968604695, + "grad_norm": 9.070756163964688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73060 + }, + { + "epoch": 0.3543762950533056, + "grad_norm": 1.9900971892639063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73070 + }, + { + "epoch": 0.3544247932461417, + "grad_norm": 5.837902790517546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73080 + }, + { + "epoch": 0.35447329143897777, + "grad_norm": 5.00629139423836e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73090 + }, + { + "epoch": 0.35452178963181386, + "grad_norm": 2.6388588594272733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73100 + }, + { + "epoch": 0.35457028782464994, + "grad_norm": 1.1066490515077021e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73110 + }, + { + "epoch": 0.35461878601748603, + "grad_norm": 3.3185991924256086e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73120 + }, + { + "epoch": 0.3546672842103221, + "grad_norm": 6.102472525526537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73130 + }, + { + "epoch": 0.3547157824031582, + "grad_norm": 3.1039594432513695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73140 + }, + { + "epoch": 0.3547642805959943, + "grad_norm": 2.1633929918607464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73150 + }, + { + "epoch": 0.3548127787888304, + "grad_norm": 3.869759893859737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73160 + }, + { + "epoch": 0.3548612769816665, + "grad_norm": 3.453581939538708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73170 + }, + { + "epoch": 0.35490977517450256, + "grad_norm": 3.0632213565695565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73180 + }, + { + "epoch": 0.35495827336733865, + "grad_norm": 2.2878512027091347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73190 + }, + { + "epoch": 0.35500677156017474, + "grad_norm": 2.568971467553638e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73200 + }, + { + "epoch": 0.3550552697530108, + "grad_norm": 2.408110049145762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73210 + }, + { + "epoch": 0.3551037679458469, + "grad_norm": 2.921326995419804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73220 + }, + { + "epoch": 0.355152266138683, + "grad_norm": 3.0289259029814275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73230 + }, + { + "epoch": 0.3552007643315191, + "grad_norm": 1.9621581941464683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73240 + }, + { + "epoch": 0.3552492625243552, + "grad_norm": 6.523158390336903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73250 + }, + { + "epoch": 0.35529776071719127, + "grad_norm": 9.769782082003076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73260 + }, + { + "epoch": 0.35534625891002736, + "grad_norm": 1.718327666822006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73270 + }, + { + "epoch": 0.35539475710286345, + "grad_norm": 1.993423666135641e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73280 + }, + { + "epoch": 0.35544325529569953, + "grad_norm": 1.7536176528665237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73290 + }, + { + "epoch": 0.3554917534885356, + "grad_norm": 1.5040133121146937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73300 + }, + { + "epoch": 0.3555402516813717, + "grad_norm": 2.5604495021980256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73310 + }, + { + "epoch": 0.3555887498742078, + "grad_norm": 1.8794380594044924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73320 + }, + { + "epoch": 0.3556372480670439, + "grad_norm": 1.454353196095326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73330 + }, + { + "epoch": 0.35568574625988, + "grad_norm": 1.479128059145296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73340 + }, + { + "epoch": 0.35573424445271606, + "grad_norm": 2.6165603230765555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73350 + }, + { + "epoch": 0.35578274264555215, + "grad_norm": 1.4809128288106876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73360 + }, + { + "epoch": 0.35583124083838824, + "grad_norm": 1.3485703220794676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73370 + }, + { + "epoch": 0.35587973903122433, + "grad_norm": 1.2572578498293296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73380 + }, + { + "epoch": 0.3559282372240604, + "grad_norm": 1.2821921018257854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73390 + }, + { + "epoch": 0.3559767354168965, + "grad_norm": 1.2010528962491662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73400 + }, + { + "epoch": 0.3560252336097326, + "grad_norm": 1.390363877362688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73410 + }, + { + "epoch": 0.3560737318025687, + "grad_norm": 1.2795480870408937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73420 + }, + { + "epoch": 0.35612222999540477, + "grad_norm": 1.1371405435056658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73430 + }, + { + "epoch": 0.35617072818824086, + "grad_norm": 1.3101829381412244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73440 + }, + { + "epoch": 0.35621922638107695, + "grad_norm": 1.6388032690883847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73450 + }, + { + "epoch": 0.3562677245739131, + "grad_norm": 1.1309738283671322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73460 + }, + { + "epoch": 0.3563162227667492, + "grad_norm": 1.1073160521846148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73470 + }, + { + "epoch": 0.35636472095958527, + "grad_norm": 1.1290596830804134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73480 + }, + { + "epoch": 0.35641321915242136, + "grad_norm": 1.2691638175965636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73490 + }, + { + "epoch": 0.35646171734525745, + "grad_norm": 1.1133096222692984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73500 + }, + { + "epoch": 0.35651021553809353, + "grad_norm": 1.12100531168835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73510 + }, + { + "epoch": 0.3565587137309296, + "grad_norm": 1.0690067711038864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73520 + }, + { + "epoch": 0.3566072119237657, + "grad_norm": 1.4452211871684995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73530 + }, + { + "epoch": 0.3566557101166018, + "grad_norm": 1.152188133346499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73540 + }, + { + "epoch": 0.3567042083094379, + "grad_norm": 9.935816933648312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73550 + }, + { + "epoch": 0.356752706502274, + "grad_norm": 9.002870910990168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73560 + }, + { + "epoch": 0.35680120469511006, + "grad_norm": 2.4469657091685804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73570 + }, + { + "epoch": 0.35684970288794615, + "grad_norm": 8.535825486433168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73580 + }, + { + "epoch": 0.35689820108078224, + "grad_norm": 2.228559424111154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73590 + }, + { + "epoch": 0.35694669927361833, + "grad_norm": 1.0409640935904463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73600 + }, + { + "epoch": 0.3569951974664544, + "grad_norm": 8.658502110847621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73610 + }, + { + "epoch": 0.3570436956592905, + "grad_norm": 1.033210651257832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73620 + }, + { + "epoch": 0.3570921938521266, + "grad_norm": 8.750342885832652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73630 + }, + { + "epoch": 0.3571406920449627, + "grad_norm": 1.9499998415994924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73640 + }, + { + "epoch": 0.35718919023779877, + "grad_norm": 9.197751182909997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73650 + }, + { + "epoch": 0.35723768843063486, + "grad_norm": 8.174458798748674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73660 + }, + { + "epoch": 0.35728618662347095, + "grad_norm": 8.825568897918856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73670 + }, + { + "epoch": 0.35733468481630704, + "grad_norm": 8.184314879144949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73680 + }, + { + "epoch": 0.3573831830091431, + "grad_norm": 9.045357387549302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73690 + }, + { + "epoch": 0.3574316812019792, + "grad_norm": 1.014138888422167e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73700 + }, + { + "epoch": 0.3574801793948153, + "grad_norm": 8.937873872127966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73710 + }, + { + "epoch": 0.3575286775876514, + "grad_norm": 9.415772979082249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73720 + }, + { + "epoch": 0.3575771757804875, + "grad_norm": 7.709963938395958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73730 + }, + { + "epoch": 0.35762567397332357, + "grad_norm": 8.528782586836314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73740 + }, + { + "epoch": 0.35767417216615965, + "grad_norm": 8.774584330240032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73750 + }, + { + "epoch": 0.35772267035899574, + "grad_norm": 1.5376718920379062e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73760 + }, + { + "epoch": 0.35777116855183183, + "grad_norm": 8.268881970252551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73770 + }, + { + "epoch": 0.3578196667446679, + "grad_norm": 9.089837931242073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73780 + }, + { + "epoch": 0.357868164937504, + "grad_norm": 8.865753216014127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73790 + }, + { + "epoch": 0.3579166631303401, + "grad_norm": 1.2628323929675389e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73800 + }, + { + "epoch": 0.3579651613231762, + "grad_norm": 7.650541533621436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73810 + }, + { + "epoch": 0.3580136595160123, + "grad_norm": 7.100075549715257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73820 + }, + { + "epoch": 0.35806215770884836, + "grad_norm": 6.741689730915823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73830 + }, + { + "epoch": 0.35811065590168445, + "grad_norm": 6.505737928819144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73840 + }, + { + "epoch": 0.35815915409452054, + "grad_norm": 7.399827950393956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73850 + }, + { + "epoch": 0.3582076522873566, + "grad_norm": 7.067675937832973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73860 + }, + { + "epoch": 0.3582561504801927, + "grad_norm": 7.138247042348667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73870 + }, + { + "epoch": 0.3583046486730288, + "grad_norm": 6.821914553256647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73880 + }, + { + "epoch": 0.3583531468658649, + "grad_norm": 9.99095732368005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73890 + }, + { + "epoch": 0.358401645058701, + "grad_norm": 7.06390778759669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73900 + }, + { + "epoch": 0.35845014325153707, + "grad_norm": 6.891968951094896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73910 + }, + { + "epoch": 0.35849864144437316, + "grad_norm": 5.951439447926532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73920 + }, + { + "epoch": 0.35854713963720924, + "grad_norm": 6.048560408089543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73930 + }, + { + "epoch": 0.35859563783004533, + "grad_norm": 6.49735227398196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73940 + }, + { + "epoch": 0.3586441360228814, + "grad_norm": 6.962011411815183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73950 + }, + { + "epoch": 0.3586926342157175, + "grad_norm": 6.3176725006997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73960 + }, + { + "epoch": 0.35874113240855365, + "grad_norm": 6.647920258728846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73970 + }, + { + "epoch": 0.35878963060138974, + "grad_norm": 6.449042757594725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73980 + }, + { + "epoch": 0.35883812879422583, + "grad_norm": 8.06364596428466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 73990 + }, + { + "epoch": 0.3588866269870619, + "grad_norm": 1.6041515209508361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74000 + }, + { + "epoch": 0.358935125179898, + "grad_norm": 6.65769107399683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74010 + }, + { + "epoch": 0.3589836233727341, + "grad_norm": 5.923046160205558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74020 + }, + { + "epoch": 0.3590321215655702, + "grad_norm": 7.210651347122621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74030 + }, + { + "epoch": 0.35908061975840627, + "grad_norm": 6.096798301769013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74040 + }, + { + "epoch": 0.35912911795124236, + "grad_norm": 2.3267887172551127e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74050 + }, + { + "epoch": 0.35917761614407845, + "grad_norm": 5.87468889534648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74060 + }, + { + "epoch": 0.35922611433691454, + "grad_norm": 5.66597350371012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74070 + }, + { + "epoch": 0.3592746125297506, + "grad_norm": 5.747257318944321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74080 + }, + { + "epoch": 0.3593231107225867, + "grad_norm": 1.3192616279411595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74090 + }, + { + "epoch": 0.3593716089154228, + "grad_norm": 6.797158107474388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74100 + }, + { + "epoch": 0.3594201071082589, + "grad_norm": 5.894351602364623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74110 + }, + { + "epoch": 0.359468605301095, + "grad_norm": 8.177344170690048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74120 + }, + { + "epoch": 0.35951710349393107, + "grad_norm": 5.735305990128836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74130 + }, + { + "epoch": 0.35956560168676716, + "grad_norm": 8.745172976887261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74140 + }, + { + "epoch": 0.35961409987960324, + "grad_norm": 8.216628657464753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74150 + }, + { + "epoch": 0.35966259807243933, + "grad_norm": 5.76296429244394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74160 + }, + { + "epoch": 0.3597110962652754, + "grad_norm": 5.560265208259807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74170 + }, + { + "epoch": 0.3597595944581115, + "grad_norm": 6.053846846043598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74180 + }, + { + "epoch": 0.3598080926509476, + "grad_norm": 6.89680234700063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74190 + }, + { + "epoch": 0.3598565908437837, + "grad_norm": 6.58552608001628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74200 + }, + { + "epoch": 0.3599050890366198, + "grad_norm": 5.229215389590536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74210 + }, + { + "epoch": 0.35995358722945586, + "grad_norm": 5.509858169716608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74220 + }, + { + "epoch": 0.36000208542229195, + "grad_norm": 5.361270041248645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74230 + }, + { + "epoch": 0.36005058361512804, + "grad_norm": 5.882690743419516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74240 + }, + { + "epoch": 0.3600990818079641, + "grad_norm": 8.40853658701235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74250 + }, + { + "epoch": 0.3601475800008002, + "grad_norm": 4.620253264420171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74260 + }, + { + "epoch": 0.3601960781936363, + "grad_norm": 5.144030410519917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74270 + }, + { + "epoch": 0.3602445763864724, + "grad_norm": 5.543791417039756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74280 + }, + { + "epoch": 0.3602930745793085, + "grad_norm": 5.705507533093623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74290 + }, + { + "epoch": 0.36034157277214457, + "grad_norm": 7.585410344290722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74300 + }, + { + "epoch": 0.36039007096498066, + "grad_norm": 4.977692356078478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74310 + }, + { + "epoch": 0.36043856915781675, + "grad_norm": 4.587823241308797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74320 + }, + { + "epoch": 0.36048706735065283, + "grad_norm": 5.922152581661067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74330 + }, + { + "epoch": 0.3605355655434889, + "grad_norm": 5.036469588048931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74340 + }, + { + "epoch": 0.360584063736325, + "grad_norm": 8.514500109413348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74350 + }, + { + "epoch": 0.3606325619291611, + "grad_norm": 5.270865131024038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74360 + }, + { + "epoch": 0.3606810601219972, + "grad_norm": 5.089819410386553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74370 + }, + { + "epoch": 0.3607295583148333, + "grad_norm": 4.4888784600516374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74380 + }, + { + "epoch": 0.36077805650766936, + "grad_norm": 3.3230364806513535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74390 + }, + { + "epoch": 0.36082655470050545, + "grad_norm": 5.430788974081224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74400 + }, + { + "epoch": 0.36087505289334154, + "grad_norm": 4.6345391524482693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74410 + }, + { + "epoch": 0.36092355108617763, + "grad_norm": 5.400842155722785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74420 + }, + { + "epoch": 0.3609720492790137, + "grad_norm": 4.907150241706404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74430 + }, + { + "epoch": 0.3610205474718498, + "grad_norm": 4.88881141791353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74440 + }, + { + "epoch": 0.3610690456646859, + "grad_norm": 5.446416366794438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74450 + }, + { + "epoch": 0.361117543857522, + "grad_norm": 4.3864142185157107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74460 + }, + { + "epoch": 0.36116604205035807, + "grad_norm": 4.42127799260561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74470 + }, + { + "epoch": 0.3612145402431942, + "grad_norm": 1.3209492863097694e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74480 + }, + { + "epoch": 0.3612630384360303, + "grad_norm": 1.212302663589071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74490 + }, + { + "epoch": 0.3613115366288664, + "grad_norm": 8.509081794727535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74500 + }, + { + "epoch": 0.3613600348217025, + "grad_norm": 4.381423650556826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74510 + }, + { + "epoch": 0.36140853301453857, + "grad_norm": 9.107482128456468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74520 + }, + { + "epoch": 0.36145703120737466, + "grad_norm": 6.790253905819554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74530 + }, + { + "epoch": 0.36150552940021075, + "grad_norm": 4.813257987734687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74540 + }, + { + "epoch": 0.36155402759304683, + "grad_norm": 5.055516112406622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74550 + }, + { + "epoch": 0.3616025257858829, + "grad_norm": 3.787573064073513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74560 + }, + { + "epoch": 0.361651023978719, + "grad_norm": 4.0377199184149504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74570 + }, + { + "epoch": 0.3616995221715551, + "grad_norm": 4.3318766529409913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74580 + }, + { + "epoch": 0.3617480203643912, + "grad_norm": 4.88418834265758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74590 + }, + { + "epoch": 0.3617965185572273, + "grad_norm": 4.257387615780317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74600 + }, + { + "epoch": 0.36184501675006336, + "grad_norm": 3.7683196296711685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74610 + }, + { + "epoch": 0.36189351494289945, + "grad_norm": 4.354498628345027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74620 + }, + { + "epoch": 0.36194201313573554, + "grad_norm": 4.03642957280681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74630 + }, + { + "epoch": 0.36199051132857163, + "grad_norm": 4.4303214963292703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74640 + }, + { + "epoch": 0.3620390095214077, + "grad_norm": 2.540649802540429e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74650 + }, + { + "epoch": 0.3620875077142438, + "grad_norm": 3.876192238294607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74660 + }, + { + "epoch": 0.3621360059070799, + "grad_norm": 3.9883195768197766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74670 + }, + { + "epoch": 0.362184504099916, + "grad_norm": 8.841084309096914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74680 + }, + { + "epoch": 0.36223300229275207, + "grad_norm": 3.7315408007998485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74690 + }, + { + "epoch": 0.36228150048558816, + "grad_norm": 3.839131181848643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74700 + }, + { + "epoch": 0.36232999867842425, + "grad_norm": 3.806566439834569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74710 + }, + { + "epoch": 0.36237849687126034, + "grad_norm": 3.700583590671158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74720 + }, + { + "epoch": 0.3624269950640964, + "grad_norm": 3.685746037263016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74730 + }, + { + "epoch": 0.3624754932569325, + "grad_norm": 4.454453801372438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74740 + }, + { + "epoch": 0.3625239914497686, + "grad_norm": 3.6251057622394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74750 + }, + { + "epoch": 0.3625724896426047, + "grad_norm": 3.763702238757105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74760 + }, + { + "epoch": 0.3626209878354408, + "grad_norm": 4.283085388578911e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74770 + }, + { + "epoch": 0.36266948602827687, + "grad_norm": 3.891133246725076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74780 + }, + { + "epoch": 0.36271798422111295, + "grad_norm": 5.559414262279461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74790 + }, + { + "epoch": 0.36276648241394904, + "grad_norm": 1.092484012588102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74800 + }, + { + "epoch": 0.36281498060678513, + "grad_norm": 3.3215835060218524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74810 + }, + { + "epoch": 0.3628634787996212, + "grad_norm": 3.380084194759547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74820 + }, + { + "epoch": 0.3629119769924573, + "grad_norm": 4.5169002760303556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74830 + }, + { + "epoch": 0.3629604751852934, + "grad_norm": 4.082013163042575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74840 + }, + { + "epoch": 0.3630089733781295, + "grad_norm": 4.3603796484603663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74850 + }, + { + "epoch": 0.3630574715709656, + "grad_norm": 3.5311848023411585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74860 + }, + { + "epoch": 0.36310596976380166, + "grad_norm": 3.7850367107239435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74870 + }, + { + "epoch": 0.36315446795663775, + "grad_norm": 3.5022489441871585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74880 + }, + { + "epoch": 0.36320296614947384, + "grad_norm": 9.377566129842307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74890 + }, + { + "epoch": 0.3632514643423099, + "grad_norm": 1.8565297068562359e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74900 + }, + { + "epoch": 0.363299962535146, + "grad_norm": 3.351136967921775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74910 + }, + { + "epoch": 0.3633484607279821, + "grad_norm": 3.8745972119613725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74920 + }, + { + "epoch": 0.3633969589208182, + "grad_norm": 3.4912753221760795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74930 + }, + { + "epoch": 0.3634454571136543, + "grad_norm": 3.774933645672718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74940 + }, + { + "epoch": 0.36349395530649037, + "grad_norm": 4.1903132341758464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74950 + }, + { + "epoch": 0.36354245349932646, + "grad_norm": 3.9761118841852294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74960 + }, + { + "epoch": 0.36359095169216255, + "grad_norm": 4.1141305473502143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74970 + }, + { + "epoch": 0.36363944988499863, + "grad_norm": 3.3688070288917515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74980 + }, + { + "epoch": 0.3636879480778348, + "grad_norm": 3.325928048525384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 74990 + }, + { + "epoch": 0.36373644627067087, + "grad_norm": 3.3894747275553527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75000 + }, + { + "epoch": 0.36378494446350695, + "grad_norm": 5.192845833335014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75010 + }, + { + "epoch": 0.36383344265634304, + "grad_norm": 4.717973638435069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75020 + }, + { + "epoch": 0.36388194084917913, + "grad_norm": 5.304010528561776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75030 + }, + { + "epoch": 0.3639304390420152, + "grad_norm": 3.08988120423237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75040 + }, + { + "epoch": 0.3639789372348513, + "grad_norm": 2.192470128647983e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75050 + }, + { + "epoch": 0.3640274354276874, + "grad_norm": 3.6069363318347314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75060 + }, + { + "epoch": 0.3640759336205235, + "grad_norm": 3.080947408307111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75070 + }, + { + "epoch": 0.3641244318133596, + "grad_norm": 7.209890213744075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75080 + }, + { + "epoch": 0.36417293000619566, + "grad_norm": 4.0022254665927903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75090 + }, + { + "epoch": 0.36422142819903175, + "grad_norm": 7.833218660380226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75100 + }, + { + "epoch": 0.36426992639186784, + "grad_norm": 3.0143303320073755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75110 + }, + { + "epoch": 0.3643184245847039, + "grad_norm": 3.562260815215268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75120 + }, + { + "epoch": 0.36436692277754, + "grad_norm": 3.743475645023864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75130 + }, + { + "epoch": 0.3644154209703761, + "grad_norm": 3.067190732508607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75140 + }, + { + "epoch": 0.3644639191632122, + "grad_norm": 2.9441733317980834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75150 + }, + { + "epoch": 0.3645124173560483, + "grad_norm": 2.9812579782628745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75160 + }, + { + "epoch": 0.36456091554888437, + "grad_norm": 2.912599939008942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75170 + }, + { + "epoch": 0.36460941374172046, + "grad_norm": 2.8922750061610714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75180 + }, + { + "epoch": 0.36465791193455654, + "grad_norm": 4.037533472001087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75190 + }, + { + "epoch": 0.36470641012739263, + "grad_norm": 3.377153348083084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75200 + }, + { + "epoch": 0.3647549083202287, + "grad_norm": 3.072416348004481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75210 + }, + { + "epoch": 0.3648034065130648, + "grad_norm": 2.8392383910613717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75220 + }, + { + "epoch": 0.3648519047059009, + "grad_norm": 2.809098873512994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75230 + }, + { + "epoch": 0.364900402898737, + "grad_norm": 3.08771262780283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75240 + }, + { + "epoch": 0.3649489010915731, + "grad_norm": 3.1052090321281867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75250 + }, + { + "epoch": 0.36499739928440916, + "grad_norm": 3.843722140572936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75260 + }, + { + "epoch": 0.36504589747724525, + "grad_norm": 4.787157763530558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75270 + }, + { + "epoch": 0.36509439567008134, + "grad_norm": 2.963953420476173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75280 + }, + { + "epoch": 0.36514289386291743, + "grad_norm": 4.1832444708234107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75290 + }, + { + "epoch": 0.3651913920557535, + "grad_norm": 4.7090756538636924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75300 + }, + { + "epoch": 0.3652398902485896, + "grad_norm": 2.715424329835514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75310 + }, + { + "epoch": 0.3652883884414257, + "grad_norm": 1.2812859040423064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75320 + }, + { + "epoch": 0.3653368866342618, + "grad_norm": 2.735281157129066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75330 + }, + { + "epoch": 0.36538538482709787, + "grad_norm": 2.7899497467842593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75340 + }, + { + "epoch": 0.36543388301993396, + "grad_norm": 2.929733113887778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75350 + }, + { + "epoch": 0.36548238121277005, + "grad_norm": 2.709520856569725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75360 + }, + { + "epoch": 0.36553087940560614, + "grad_norm": 2.815521042975888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75370 + }, + { + "epoch": 0.3655793775984422, + "grad_norm": 2.94631064434725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75380 + }, + { + "epoch": 0.3656278757912783, + "grad_norm": 4.713056682703609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75390 + }, + { + "epoch": 0.3656763739841144, + "grad_norm": 3.2383920256506826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75400 + }, + { + "epoch": 0.3657248721769505, + "grad_norm": 2.865094472781493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75410 + }, + { + "epoch": 0.3657733703697866, + "grad_norm": 2.6878370817939867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75420 + }, + { + "epoch": 0.36582186856262267, + "grad_norm": 4.845879288950528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75430 + }, + { + "epoch": 0.36587036675545875, + "grad_norm": 2.867351724944456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75440 + }, + { + "epoch": 0.36591886494829484, + "grad_norm": 4.917279170513211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75450 + }, + { + "epoch": 0.36596736314113093, + "grad_norm": 2.9548135671575437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75460 + }, + { + "epoch": 0.366015861333967, + "grad_norm": 2.493544286608085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75470 + }, + { + "epoch": 0.3660643595268031, + "grad_norm": 2.730301389419765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75480 + }, + { + "epoch": 0.3661128577196392, + "grad_norm": 3.0110629722912563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75490 + }, + { + "epoch": 0.3661613559124753, + "grad_norm": 2.5681961801637954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75500 + }, + { + "epoch": 0.3662098541053114, + "grad_norm": 2.859578955849429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75510 + }, + { + "epoch": 0.3662583522981475, + "grad_norm": 2.4572034362790873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75520 + }, + { + "epoch": 0.3663068504909836, + "grad_norm": 5.212050382397138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75530 + }, + { + "epoch": 0.3663553486838197, + "grad_norm": 2.787909920698439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75540 + }, + { + "epoch": 0.3664038468766558, + "grad_norm": 2.6477394499124784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75550 + }, + { + "epoch": 0.36645234506949187, + "grad_norm": 3.099240757364896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75560 + }, + { + "epoch": 0.36650084326232796, + "grad_norm": 2.5760317612366634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75570 + }, + { + "epoch": 0.36654934145516405, + "grad_norm": 2.4570766754550277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75580 + }, + { + "epoch": 0.36659783964800013, + "grad_norm": 2.3628234657735447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75590 + }, + { + "epoch": 0.3666463378408362, + "grad_norm": 2.651049157975649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75600 + }, + { + "epoch": 0.3666948360336723, + "grad_norm": 2.6524847385189787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75610 + }, + { + "epoch": 0.3667433342265084, + "grad_norm": 2.5371497258674935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75620 + }, + { + "epoch": 0.3667918324193445, + "grad_norm": 2.6443217393534724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75630 + }, + { + "epoch": 0.3668403306121806, + "grad_norm": 2.440226296585024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75640 + }, + { + "epoch": 0.36688882880501666, + "grad_norm": 2.3380539460049476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75650 + }, + { + "epoch": 0.36693732699785275, + "grad_norm": 2.658999562754616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75660 + }, + { + "epoch": 0.36698582519068884, + "grad_norm": 2.4716359803278465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75670 + }, + { + "epoch": 0.36703432338352493, + "grad_norm": 3.5306339896123973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75680 + }, + { + "epoch": 0.367082821576361, + "grad_norm": 2.414400341876899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75690 + }, + { + "epoch": 0.3671313197691971, + "grad_norm": 2.4814926291583106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75700 + }, + { + "epoch": 0.3671798179620332, + "grad_norm": 2.2923828169041371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75710 + }, + { + "epoch": 0.3672283161548693, + "grad_norm": 2.4484717187078786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75720 + }, + { + "epoch": 0.36727681434770537, + "grad_norm": 2.555044886776159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75730 + }, + { + "epoch": 0.36732531254054146, + "grad_norm": 2.389499229593639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75740 + }, + { + "epoch": 0.36737381073337755, + "grad_norm": 2.5370222829224076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75750 + }, + { + "epoch": 0.36742230892621364, + "grad_norm": 2.337125124540762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75760 + }, + { + "epoch": 0.3674708071190497, + "grad_norm": 2.345881284782081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75770 + }, + { + "epoch": 0.3675193053118858, + "grad_norm": 2.390127349372051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75780 + }, + { + "epoch": 0.3675678035047219, + "grad_norm": 2.538244814331847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75790 + }, + { + "epoch": 0.367616301697558, + "grad_norm": 2.3824709671771416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75800 + }, + { + "epoch": 0.3676647998903941, + "grad_norm": 2.4107927742988977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75810 + }, + { + "epoch": 0.36771329808323017, + "grad_norm": 2.2809221889019682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75820 + }, + { + "epoch": 0.36776179627606626, + "grad_norm": 2.3636727064513252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75830 + }, + { + "epoch": 0.36781029446890234, + "grad_norm": 2.2465826532425126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75840 + }, + { + "epoch": 0.36785879266173843, + "grad_norm": 3.204981737781054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75850 + }, + { + "epoch": 0.3679072908545745, + "grad_norm": 2.2309421865429613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75860 + }, + { + "epoch": 0.3679557890474106, + "grad_norm": 2.723517695812916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75870 + }, + { + "epoch": 0.3680042872402467, + "grad_norm": 2.3658223824440938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75880 + }, + { + "epoch": 0.3680527854330828, + "grad_norm": 2.57072827025695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75890 + }, + { + "epoch": 0.3681012836259189, + "grad_norm": 2.2854023029594828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75900 + }, + { + "epoch": 0.36814978181875496, + "grad_norm": 2.2321016501791746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75910 + }, + { + "epoch": 0.36819828001159105, + "grad_norm": 2.1233255154129438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75920 + }, + { + "epoch": 0.36824677820442714, + "grad_norm": 1.208120011142455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75930 + }, + { + "epoch": 0.3682952763972632, + "grad_norm": 2.0328684513515327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75940 + }, + { + "epoch": 0.3683437745900993, + "grad_norm": 2.39452390360384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75950 + }, + { + "epoch": 0.3683922727829354, + "grad_norm": 2.2864007576117729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75960 + }, + { + "epoch": 0.3684407709757715, + "grad_norm": 2.421546980713174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75970 + }, + { + "epoch": 0.3684892691686076, + "grad_norm": 2.1156293428248318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75980 + }, + { + "epoch": 0.36853776736144367, + "grad_norm": 4.863335902882682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 75990 + }, + { + "epoch": 0.36858626555427976, + "grad_norm": 2.1994021892624005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76000 + }, + { + "epoch": 0.36863476374711585, + "grad_norm": 2.1053318732811022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76010 + }, + { + "epoch": 0.368683261939952, + "grad_norm": 2.740494835506979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76020 + }, + { + "epoch": 0.3687317601327881, + "grad_norm": 2.411820219094807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76030 + }, + { + "epoch": 0.36878025832562417, + "grad_norm": 2.6158170385315316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76040 + }, + { + "epoch": 0.36882875651846025, + "grad_norm": 1.928781472315677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76050 + }, + { + "epoch": 0.36887725471129634, + "grad_norm": 2.1261108429371234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76060 + }, + { + "epoch": 0.36892575290413243, + "grad_norm": 2.136070804681367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76070 + }, + { + "epoch": 0.3689742510969685, + "grad_norm": 2.2812358224655327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76080 + }, + { + "epoch": 0.3690227492898046, + "grad_norm": 2.2040163116798794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76090 + }, + { + "epoch": 0.3690712474826407, + "grad_norm": 1.9747555768390157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76100 + }, + { + "epoch": 0.3691197456754768, + "grad_norm": 2.860643633084692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76110 + }, + { + "epoch": 0.3691682438683129, + "grad_norm": 2.0273142808946432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76120 + }, + { + "epoch": 0.36921674206114896, + "grad_norm": 2.0073321138625033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76130 + }, + { + "epoch": 0.36926524025398505, + "grad_norm": 1.8639966015143727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76140 + }, + { + "epoch": 0.36931373844682114, + "grad_norm": 2.2107431618678675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76150 + }, + { + "epoch": 0.3693622366396572, + "grad_norm": 2.4327894720954646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76160 + }, + { + "epoch": 0.3694107348324933, + "grad_norm": 1.90817644352137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76170 + }, + { + "epoch": 0.3694592330253294, + "grad_norm": 2.0021543889470195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76180 + }, + { + "epoch": 0.3695077312181655, + "grad_norm": 2.3869503706919204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76190 + }, + { + "epoch": 0.3695562294110016, + "grad_norm": 1.9496268066632183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76200 + }, + { + "epoch": 0.36960472760383767, + "grad_norm": 2.688960023533582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76210 + }, + { + "epoch": 0.36965322579667376, + "grad_norm": 2.000416969849539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76220 + }, + { + "epoch": 0.36970172398950985, + "grad_norm": 2.0052975457929278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76230 + }, + { + "epoch": 0.36975022218234593, + "grad_norm": 1.7876116942261433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76240 + }, + { + "epoch": 0.369798720375182, + "grad_norm": 1.908296241026619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76250 + }, + { + "epoch": 0.3698472185680181, + "grad_norm": 2.0125962407746556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76260 + }, + { + "epoch": 0.3698957167608542, + "grad_norm": 1.8449529193276248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76270 + }, + { + "epoch": 0.3699442149536903, + "grad_norm": 1.9522666150351142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76280 + }, + { + "epoch": 0.3699927131465264, + "grad_norm": 2.938038790034625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76290 + }, + { + "epoch": 0.37004121133936246, + "grad_norm": 2.05992279234124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76300 + }, + { + "epoch": 0.37008970953219855, + "grad_norm": 1.8607346419230453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76310 + }, + { + "epoch": 0.37013820772503464, + "grad_norm": 1.8673698320981202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76320 + }, + { + "epoch": 0.37018670591787073, + "grad_norm": 1.790204464668932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76330 + }, + { + "epoch": 0.3702352041107068, + "grad_norm": 1.814994590176866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76340 + }, + { + "epoch": 0.3702837023035429, + "grad_norm": 1.9509731430389365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76350 + }, + { + "epoch": 0.370332200496379, + "grad_norm": 1.8250852917844895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76360 + }, + { + "epoch": 0.3703806986892151, + "grad_norm": 1.9902761039247707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76370 + }, + { + "epoch": 0.37042919688205117, + "grad_norm": 2.1963194285490317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76380 + }, + { + "epoch": 0.37047769507488726, + "grad_norm": 2.6439414568812936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76390 + }, + { + "epoch": 0.37052619326772335, + "grad_norm": 3.2129156579685514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76400 + }, + { + "epoch": 0.37057469146055944, + "grad_norm": 1.8627362408096815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76410 + }, + { + "epoch": 0.3706231896533955, + "grad_norm": 1.87569582976721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76420 + }, + { + "epoch": 0.3706716878462316, + "grad_norm": 3.797916861003614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76430 + }, + { + "epoch": 0.3707201860390677, + "grad_norm": 1.7531665719161538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76440 + }, + { + "epoch": 0.3707686842319038, + "grad_norm": 1.8441390636780852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76450 + }, + { + "epoch": 0.3708171824247399, + "grad_norm": 1.8029912496331235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76460 + }, + { + "epoch": 0.37086568061757597, + "grad_norm": 1.9149680952068593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76470 + }, + { + "epoch": 0.37091417881041205, + "grad_norm": 1.7445334776766686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76480 + }, + { + "epoch": 0.37096267700324814, + "grad_norm": 2.1873441369280044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76490 + }, + { + "epoch": 0.37101117519608423, + "grad_norm": 1.745147244491818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76500 + }, + { + "epoch": 0.3710596733889203, + "grad_norm": 1.74872283764671e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76510 + }, + { + "epoch": 0.3711081715817564, + "grad_norm": 4.754250255700754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76520 + }, + { + "epoch": 0.37115666977459255, + "grad_norm": 2.6024792987300316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76530 + }, + { + "epoch": 0.37120516796742864, + "grad_norm": 2.2083061423927575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76540 + }, + { + "epoch": 0.37125366616026473, + "grad_norm": 9.23212326142675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76550 + }, + { + "epoch": 0.3713021643531008, + "grad_norm": 1.884738196622493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76560 + }, + { + "epoch": 0.3713506625459369, + "grad_norm": 1.6854849604897026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76570 + }, + { + "epoch": 0.371399160738773, + "grad_norm": 1.7378486916186375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76580 + }, + { + "epoch": 0.3714476589316091, + "grad_norm": 1.8095320797328895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76590 + }, + { + "epoch": 0.37149615712444517, + "grad_norm": 1.5708597800312418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76600 + }, + { + "epoch": 0.37154465531728126, + "grad_norm": 1.715404494007089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76610 + }, + { + "epoch": 0.37159315351011735, + "grad_norm": 2.2143284184039658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76620 + }, + { + "epoch": 0.37164165170295343, + "grad_norm": 1.7054045997610956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76630 + }, + { + "epoch": 0.3716901498957895, + "grad_norm": 1.9227255165787938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76640 + }, + { + "epoch": 0.3717386480886256, + "grad_norm": 1.6247645362454932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76650 + }, + { + "epoch": 0.3717871462814617, + "grad_norm": 1.764780677149247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76660 + }, + { + "epoch": 0.3718356444742978, + "grad_norm": 1.643995091171746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76670 + }, + { + "epoch": 0.3718841426671339, + "grad_norm": 1.6916938250233216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76680 + }, + { + "epoch": 0.37193264085996997, + "grad_norm": 1.7858303635875927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76690 + }, + { + "epoch": 0.37198113905280605, + "grad_norm": 1.7958501530301874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76700 + }, + { + "epoch": 0.37202963724564214, + "grad_norm": 1.570280403484503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76710 + }, + { + "epoch": 0.37207813543847823, + "grad_norm": 1.6321504858751723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76720 + }, + { + "epoch": 0.3721266336313143, + "grad_norm": 2.7655255507852416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76730 + }, + { + "epoch": 0.3721751318241504, + "grad_norm": 1.6881763542642148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76740 + }, + { + "epoch": 0.3722236300169865, + "grad_norm": 1.708965555735631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76750 + }, + { + "epoch": 0.3722721282098226, + "grad_norm": 1.6306674410770938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76760 + }, + { + "epoch": 0.37232062640265867, + "grad_norm": 1.5635166050742555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76770 + }, + { + "epoch": 0.37236912459549476, + "grad_norm": 1.5588707924507617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76780 + }, + { + "epoch": 0.37241762278833085, + "grad_norm": 1.8276729463195807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76790 + }, + { + "epoch": 0.37246612098116694, + "grad_norm": 1.5936365116431261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76800 + }, + { + "epoch": 0.372514619174003, + "grad_norm": 1.628809229714534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76810 + }, + { + "epoch": 0.3725631173668391, + "grad_norm": 1.670881744075814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76820 + }, + { + "epoch": 0.3726116155596752, + "grad_norm": 1.8226080555905355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76830 + }, + { + "epoch": 0.3726601137525113, + "grad_norm": 1.5476994974505942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76840 + }, + { + "epoch": 0.3727086119453474, + "grad_norm": 1.4916250279384258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76850 + }, + { + "epoch": 0.37275711013818347, + "grad_norm": 1.5936007002892438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76860 + }, + { + "epoch": 0.37280560833101956, + "grad_norm": 2.4203984594350914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76870 + }, + { + "epoch": 0.37285410652385564, + "grad_norm": 5.940602250120719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76880 + }, + { + "epoch": 0.37290260471669173, + "grad_norm": 1.4809108961344464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76890 + }, + { + "epoch": 0.3729511029095278, + "grad_norm": 1.617328990732858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76900 + }, + { + "epoch": 0.3729996011023639, + "grad_norm": 1.5638970296549815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76910 + }, + { + "epoch": 0.3730480992952, + "grad_norm": 1.625987096076642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76920 + }, + { + "epoch": 0.3730965974880361, + "grad_norm": 1.5124888363970967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76930 + }, + { + "epoch": 0.3731450956808722, + "grad_norm": 1.5082746074313036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76940 + }, + { + "epoch": 0.37319359387370826, + "grad_norm": 1.4955767824176291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76950 + }, + { + "epoch": 0.37324209206654435, + "grad_norm": 1.5062494185258402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76960 + }, + { + "epoch": 0.37329059025938044, + "grad_norm": 1.523176678119853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76970 + }, + { + "epoch": 0.3733390884522165, + "grad_norm": 1.5345318615800352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76980 + }, + { + "epoch": 0.3733875866450526, + "grad_norm": 1.4439324047543778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 76990 + }, + { + "epoch": 0.3734360848378887, + "grad_norm": 1.52519021412445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77000 + }, + { + "epoch": 0.3734845830307248, + "grad_norm": 1.5142597931117052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77010 + }, + { + "epoch": 0.3735330812235609, + "grad_norm": 1.477794029369761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77020 + }, + { + "epoch": 0.37358157941639697, + "grad_norm": 1.4493060973563843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77030 + }, + { + "epoch": 0.3736300776092331, + "grad_norm": 1.498128625598838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77040 + }, + { + "epoch": 0.3736785758020692, + "grad_norm": 1.4149533456020436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77050 + }, + { + "epoch": 0.3737270739949053, + "grad_norm": 1.421843194293615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77060 + }, + { + "epoch": 0.3737755721877414, + "grad_norm": 1.4173437534736877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77070 + }, + { + "epoch": 0.37382407038057747, + "grad_norm": 1.4133448189568298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77080 + }, + { + "epoch": 0.37387256857341356, + "grad_norm": 1.3982935342937708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77090 + }, + { + "epoch": 0.37392106676624964, + "grad_norm": 1.607533164360575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77100 + }, + { + "epoch": 0.37396956495908573, + "grad_norm": 1.4143645898911927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77110 + }, + { + "epoch": 0.3740180631519218, + "grad_norm": 1.4395271819012123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77120 + }, + { + "epoch": 0.3740665613447579, + "grad_norm": 1.4189838282163691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77130 + }, + { + "epoch": 0.374115059537594, + "grad_norm": 1.484995664213784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77140 + }, + { + "epoch": 0.3741635577304301, + "grad_norm": 3.9930432649271097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77150 + }, + { + "epoch": 0.3742120559232662, + "grad_norm": 1.4151065386158734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77160 + }, + { + "epoch": 0.37426055411610226, + "grad_norm": 1.3597980341728544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77170 + }, + { + "epoch": 0.37430905230893835, + "grad_norm": 1.37275776523893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77180 + }, + { + "epoch": 0.37435755050177444, + "grad_norm": 1.377496374743714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77190 + }, + { + "epoch": 0.3744060486946105, + "grad_norm": 1.4014774762927118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77200 + }, + { + "epoch": 0.3744545468874466, + "grad_norm": 3.84302836664574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77210 + }, + { + "epoch": 0.3745030450802827, + "grad_norm": 1.4366686684752494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77220 + }, + { + "epoch": 0.3745515432731188, + "grad_norm": 1.3906029039389978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77230 + }, + { + "epoch": 0.3746000414659549, + "grad_norm": 3.993254722445272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77240 + }, + { + "epoch": 0.37464853965879097, + "grad_norm": 1.346542148894514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77250 + }, + { + "epoch": 0.37469703785162706, + "grad_norm": 1.416024133504834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77260 + }, + { + "epoch": 0.37474553604446315, + "grad_norm": 1.4238489143281186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77270 + }, + { + "epoch": 0.37479403423729923, + "grad_norm": 1.3112557439853845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77280 + }, + { + "epoch": 0.3748425324301353, + "grad_norm": 1.624549383905105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77290 + }, + { + "epoch": 0.3748910306229714, + "grad_norm": 1.382259000592967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77300 + }, + { + "epoch": 0.3749395288158075, + "grad_norm": 1.326087897268735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77310 + }, + { + "epoch": 0.3749880270086436, + "grad_norm": 1.3010873090024688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77320 + }, + { + "epoch": 0.3750365252014797, + "grad_norm": 1.3671379406332562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77330 + }, + { + "epoch": 0.37508502339431576, + "grad_norm": 1.4001341241964838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77340 + }, + { + "epoch": 0.37513352158715185, + "grad_norm": 1.3055957026608667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77350 + }, + { + "epoch": 0.37518201977998794, + "grad_norm": 1.3418357980299334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77360 + }, + { + "epoch": 0.37523051797282403, + "grad_norm": 1.326924490285819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77370 + }, + { + "epoch": 0.3752790161656601, + "grad_norm": 3.040901503936766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77380 + }, + { + "epoch": 0.3753275143584962, + "grad_norm": 1.340023914053745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77390 + }, + { + "epoch": 0.3753760125513323, + "grad_norm": 1.293084750386697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77400 + }, + { + "epoch": 0.3754245107441684, + "grad_norm": 1.2513098113231536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77410 + }, + { + "epoch": 0.37547300893700447, + "grad_norm": 1.2996987663882464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77420 + }, + { + "epoch": 0.37552150712984056, + "grad_norm": 1.281495940474997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77430 + }, + { + "epoch": 0.37557000532267665, + "grad_norm": 1.2852704855959018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77440 + }, + { + "epoch": 0.37561850351551274, + "grad_norm": 1.2965553253252438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77450 + }, + { + "epoch": 0.3756670017083488, + "grad_norm": 1.3316260094597965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77460 + }, + { + "epoch": 0.3757154999011849, + "grad_norm": 1.3040109081430273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77470 + }, + { + "epoch": 0.375763998094021, + "grad_norm": 2.5077674763451796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77480 + }, + { + "epoch": 0.3758124962868571, + "grad_norm": 0.05545315518975258, + "learning_rate": 0.0002, + "loss": 0.0578, + "step": 77490 + }, + { + "epoch": 0.3758609944796932, + "grad_norm": 0.0010900871129706502, + "learning_rate": 0.0002, + "loss": 0.0028, + "step": 77500 + }, + { + "epoch": 0.37590949267252927, + "grad_norm": 0.0007275001262314618, + "learning_rate": 0.0002, + "loss": 0.3582, + "step": 77510 + }, + { + "epoch": 0.37595799086536535, + "grad_norm": 0.000359463068889454, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 77520 + }, + { + "epoch": 0.37600648905820144, + "grad_norm": 0.0002656151191331446, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 77530 + }, + { + "epoch": 0.37605498725103753, + "grad_norm": 0.0003582958597689867, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 77540 + }, + { + "epoch": 0.3761034854438737, + "grad_norm": 0.00016994828183669597, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77550 + }, + { + "epoch": 0.37615198363670976, + "grad_norm": 0.00011338092008372769, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77560 + }, + { + "epoch": 0.37620048182954585, + "grad_norm": 5.058238093624823e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77570 + }, + { + "epoch": 0.37624898002238194, + "grad_norm": 3.447449125815183e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77580 + }, + { + "epoch": 0.37629747821521803, + "grad_norm": 5.798252095701173e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77590 + }, + { + "epoch": 0.3763459764080541, + "grad_norm": 0.00015293413889594376, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77600 + }, + { + "epoch": 0.3763944746008902, + "grad_norm": 2.7196641894988716e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77610 + }, + { + "epoch": 0.3764429727937263, + "grad_norm": 3.9709928387310356e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77620 + }, + { + "epoch": 0.3764914709865624, + "grad_norm": 2.4127491997205652e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77630 + }, + { + "epoch": 0.37653996917939847, + "grad_norm": 0.00010023400682257488, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77640 + }, + { + "epoch": 0.37658846737223456, + "grad_norm": 4.144268677919172e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77650 + }, + { + "epoch": 0.37663696556507065, + "grad_norm": 2.2457583327195607e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77660 + }, + { + "epoch": 0.37668546375790674, + "grad_norm": 1.8865590391214937e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77670 + }, + { + "epoch": 0.3767339619507428, + "grad_norm": 2.0487059373408556e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77680 + }, + { + "epoch": 0.3767824601435789, + "grad_norm": 3.588737308746204e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77690 + }, + { + "epoch": 0.376830958336415, + "grad_norm": 0.005399247165769339, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77700 + }, + { + "epoch": 0.3768794565292511, + "grad_norm": 1.6772559320088476e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77710 + }, + { + "epoch": 0.3769279547220872, + "grad_norm": 1.2325576790317427e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77720 + }, + { + "epoch": 0.37697645291492327, + "grad_norm": 1.3515748833015095e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77730 + }, + { + "epoch": 0.37702495110775935, + "grad_norm": 3.2250383810605854e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77740 + }, + { + "epoch": 0.37707344930059544, + "grad_norm": 3.360748814884573e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77750 + }, + { + "epoch": 0.37712194749343153, + "grad_norm": 1.9285262169432826e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 77760 + }, + { + "epoch": 0.3771704456862676, + "grad_norm": 3.4149536077165976e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77770 + }, + { + "epoch": 0.3772189438791037, + "grad_norm": 0.006312879733741283, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77780 + }, + { + "epoch": 0.3772674420719398, + "grad_norm": 9.161488560494035e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77790 + }, + { + "epoch": 0.3773159402647759, + "grad_norm": 4.550630183075555e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77800 + }, + { + "epoch": 0.377364438457612, + "grad_norm": 2.2572252419195138e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77810 + }, + { + "epoch": 0.37741293665044806, + "grad_norm": 1.671387508395128e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77820 + }, + { + "epoch": 0.37746143484328415, + "grad_norm": 2.0530123947537504e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77830 + }, + { + "epoch": 0.37750993303612024, + "grad_norm": 3.375030428287573e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77840 + }, + { + "epoch": 0.3775584312289563, + "grad_norm": 3.2638625270919874e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77850 + }, + { + "epoch": 0.3776069294217924, + "grad_norm": 2.139906064257957e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77860 + }, + { + "epoch": 0.3776554276146285, + "grad_norm": 1.5356694348156452e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77870 + }, + { + "epoch": 0.3777039258074646, + "grad_norm": 1.2426537978171837e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77880 + }, + { + "epoch": 0.3777524240003007, + "grad_norm": 1.899363451229874e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77890 + }, + { + "epoch": 0.37780092219313677, + "grad_norm": 3.037476199097e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77900 + }, + { + "epoch": 0.37784942038597286, + "grad_norm": 1.2146840163040906e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77910 + }, + { + "epoch": 0.37789791857880894, + "grad_norm": 1.4705289686389733e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77920 + }, + { + "epoch": 0.37794641677164503, + "grad_norm": 8.995201824291144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77930 + }, + { + "epoch": 0.3779949149644811, + "grad_norm": 1.5941026504151523e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77940 + }, + { + "epoch": 0.3780434131573172, + "grad_norm": 7.258874393301085e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77950 + }, + { + "epoch": 0.3780919113501533, + "grad_norm": 1.3045872037764639e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77960 + }, + { + "epoch": 0.3781404095429894, + "grad_norm": 2.3529692043666728e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 77970 + }, + { + "epoch": 0.3781889077358255, + "grad_norm": 0.00020965613657608628, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 77980 + }, + { + "epoch": 0.37823740592866156, + "grad_norm": 6.796907837269828e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 77990 + }, + { + "epoch": 0.37828590412149765, + "grad_norm": 0.00028940083575434983, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78000 + }, + { + "epoch": 0.37833440231433374, + "grad_norm": 3.167851537000388e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78010 + }, + { + "epoch": 0.37838290050716983, + "grad_norm": 2.537932596169412e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78020 + }, + { + "epoch": 0.3784313987000059, + "grad_norm": 2.089558620355092e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78030 + }, + { + "epoch": 0.378479896892842, + "grad_norm": 4.489149432629347e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78040 + }, + { + "epoch": 0.3785283950856781, + "grad_norm": 2.7356987629900686e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78050 + }, + { + "epoch": 0.37857689327851424, + "grad_norm": 2.5702429411467165e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78060 + }, + { + "epoch": 0.3786253914713503, + "grad_norm": 1.8317350622965023e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78070 + }, + { + "epoch": 0.3786738896641864, + "grad_norm": 1.2954293197253719e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78080 + }, + { + "epoch": 0.3787223878570225, + "grad_norm": 2.2527614419232123e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78090 + }, + { + "epoch": 0.3787708860498586, + "grad_norm": 2.436177783238236e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78100 + }, + { + "epoch": 0.3788193842426947, + "grad_norm": 1.2679641258728225e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78110 + }, + { + "epoch": 0.37886788243553077, + "grad_norm": 1.5207646356429905e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78120 + }, + { + "epoch": 0.37891638062836686, + "grad_norm": 1.23733225336764e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78130 + }, + { + "epoch": 0.37896487882120294, + "grad_norm": 1.8049264326691628e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78140 + }, + { + "epoch": 0.37901337701403903, + "grad_norm": 2.0353882064227946e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78150 + }, + { + "epoch": 0.3790618752068751, + "grad_norm": 8.706736480235122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78160 + }, + { + "epoch": 0.3791103733997112, + "grad_norm": 9.038445568876341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78170 + }, + { + "epoch": 0.3791588715925473, + "grad_norm": 9.808641334529966e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78180 + }, + { + "epoch": 0.3792073697853834, + "grad_norm": 2.216847860836424e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78190 + }, + { + "epoch": 0.3792558679782195, + "grad_norm": 1.3563174434239045e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78200 + }, + { + "epoch": 0.37930436617105556, + "grad_norm": 1.5849382180022076e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78210 + }, + { + "epoch": 0.37935286436389165, + "grad_norm": 7.240611012093723e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78220 + }, + { + "epoch": 0.37940136255672774, + "grad_norm": 7.1302415562968235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78230 + }, + { + "epoch": 0.3794498607495638, + "grad_norm": 1.3734476851823274e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78240 + }, + { + "epoch": 0.3794983589423999, + "grad_norm": 1.2743437764584087e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78250 + }, + { + "epoch": 0.379546857135236, + "grad_norm": 6.895520073157968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78260 + }, + { + "epoch": 0.3795953553280721, + "grad_norm": 1.3131858395354357e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78270 + }, + { + "epoch": 0.3796438535209082, + "grad_norm": 6.6175248321087565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78280 + }, + { + "epoch": 0.37969235171374427, + "grad_norm": 1.0951669537462294e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78290 + }, + { + "epoch": 0.37974084990658036, + "grad_norm": 1.60412528202869e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78300 + }, + { + "epoch": 0.37978934809941645, + "grad_norm": 6.012186531734187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78310 + }, + { + "epoch": 0.37983784629225253, + "grad_norm": 6.885196853545494e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78320 + }, + { + "epoch": 0.3798863444850886, + "grad_norm": 6.822800969530363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78330 + }, + { + "epoch": 0.3799348426779247, + "grad_norm": 1.4085522707318887e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78340 + }, + { + "epoch": 0.3799833408707608, + "grad_norm": 8.562540642742533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78350 + }, + { + "epoch": 0.3800318390635969, + "grad_norm": 1.378011529595824e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78360 + }, + { + "epoch": 0.380080337256433, + "grad_norm": 5.252751634543529e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78370 + }, + { + "epoch": 0.38012883544926906, + "grad_norm": 8.317289939441252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78380 + }, + { + "epoch": 0.38017733364210515, + "grad_norm": 7.246141649375204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78390 + }, + { + "epoch": 0.38022583183494124, + "grad_norm": 8.227508260461036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78400 + }, + { + "epoch": 0.38027433002777733, + "grad_norm": 9.618957847123966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78410 + }, + { + "epoch": 0.3803228282206134, + "grad_norm": 6.442013273044722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78420 + }, + { + "epoch": 0.3803713264134495, + "grad_norm": 4.066959718329599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78430 + }, + { + "epoch": 0.3804198246062856, + "grad_norm": 2.1391964764916338e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78440 + }, + { + "epoch": 0.3804683227991217, + "grad_norm": 2.1961561287753284e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78450 + }, + { + "epoch": 0.38051682099195777, + "grad_norm": 4.977352091373177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78460 + }, + { + "epoch": 0.38056531918479386, + "grad_norm": 6.000031135044992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78470 + }, + { + "epoch": 0.38061381737762995, + "grad_norm": 4.115740011911839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78480 + }, + { + "epoch": 0.38066231557046604, + "grad_norm": 1.0599498637020588e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78490 + }, + { + "epoch": 0.3807108137633021, + "grad_norm": 7.4592226155800745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78500 + }, + { + "epoch": 0.3807593119561382, + "grad_norm": 4.4725006773660425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78510 + }, + { + "epoch": 0.3808078101489743, + "grad_norm": 3.4244515063619474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78520 + }, + { + "epoch": 0.3808563083418104, + "grad_norm": 1.2437585610314272e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78530 + }, + { + "epoch": 0.3809048065346465, + "grad_norm": 7.236133114929544e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 78540 + }, + { + "epoch": 0.38095330472748257, + "grad_norm": 5.239744496066123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78550 + }, + { + "epoch": 0.38100180292031866, + "grad_norm": 4.916362740914337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78560 + }, + { + "epoch": 0.38105030111315474, + "grad_norm": 4.088025434612064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78570 + }, + { + "epoch": 0.3810987993059909, + "grad_norm": 3.3862486361613264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78580 + }, + { + "epoch": 0.381147297498827, + "grad_norm": 5.730485099775251e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78590 + }, + { + "epoch": 0.38119579569166306, + "grad_norm": 5.333954959496623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78600 + }, + { + "epoch": 0.38124429388449915, + "grad_norm": 2.8198224754305556e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78610 + }, + { + "epoch": 0.38129279207733524, + "grad_norm": 3.034001792912022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78620 + }, + { + "epoch": 0.38134129027017133, + "grad_norm": 3.0284202239272418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78630 + }, + { + "epoch": 0.3813897884630074, + "grad_norm": 4.4712232920574024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78640 + }, + { + "epoch": 0.3814382866558435, + "grad_norm": 4.871757937507937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78650 + }, + { + "epoch": 0.3814867848486796, + "grad_norm": 3.2398054372606566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78660 + }, + { + "epoch": 0.3815352830415157, + "grad_norm": 4.866895324084908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78670 + }, + { + "epoch": 0.38158378123435177, + "grad_norm": 5.000811597710708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78680 + }, + { + "epoch": 0.38163227942718786, + "grad_norm": 4.433771209733095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78690 + }, + { + "epoch": 0.38168077762002395, + "grad_norm": 5.720905392081477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78700 + }, + { + "epoch": 0.38172927581286004, + "grad_norm": 2.6399113721709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78710 + }, + { + "epoch": 0.3817777740056961, + "grad_norm": 2.784803882605047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78720 + }, + { + "epoch": 0.3818262721985322, + "grad_norm": 8.94964614417404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78730 + }, + { + "epoch": 0.3818747703913683, + "grad_norm": 4.006333256256767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78740 + }, + { + "epoch": 0.3819232685842044, + "grad_norm": 5.451862762129167e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78750 + }, + { + "epoch": 0.3819717667770405, + "grad_norm": 2.4259320525743533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78760 + }, + { + "epoch": 0.38202026496987657, + "grad_norm": 3.8153198147483636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78770 + }, + { + "epoch": 0.38206876316271265, + "grad_norm": 2.7439659788797144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78780 + }, + { + "epoch": 0.38211726135554874, + "grad_norm": 1.0841096809599549e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78790 + }, + { + "epoch": 0.38216575954838483, + "grad_norm": 3.845427272608504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78800 + }, + { + "epoch": 0.3822142577412209, + "grad_norm": 5.37592632099404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78810 + }, + { + "epoch": 0.382262755934057, + "grad_norm": 3.040083129235427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78820 + }, + { + "epoch": 0.3823112541268931, + "grad_norm": 2.4785163077467587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78830 + }, + { + "epoch": 0.3823597523197292, + "grad_norm": 3.927163561456837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78840 + }, + { + "epoch": 0.3824082505125653, + "grad_norm": 4.447609626367921e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78850 + }, + { + "epoch": 0.38245674870540136, + "grad_norm": 2.1893426946917316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78860 + }, + { + "epoch": 0.38250524689823745, + "grad_norm": 3.083378032897599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78870 + }, + { + "epoch": 0.38255374509107354, + "grad_norm": 1.3548222341341898e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78880 + }, + { + "epoch": 0.3826022432839096, + "grad_norm": 4.114679995836923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78890 + }, + { + "epoch": 0.3826507414767457, + "grad_norm": 3.47455397786689e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78900 + }, + { + "epoch": 0.3826992396695818, + "grad_norm": 2.11980091080477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78910 + }, + { + "epoch": 0.3827477378624179, + "grad_norm": 0.0007315928232856095, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78920 + }, + { + "epoch": 0.382796236055254, + "grad_norm": 2.621058411023114e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78930 + }, + { + "epoch": 0.38284473424809007, + "grad_norm": 3.7234410683595343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78940 + }, + { + "epoch": 0.38289323244092616, + "grad_norm": 3.490414655971108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78950 + }, + { + "epoch": 0.38294173063376225, + "grad_norm": 0.00010908166586887091, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78960 + }, + { + "epoch": 0.38299022882659833, + "grad_norm": 1.8312092606720398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78970 + }, + { + "epoch": 0.3830387270194344, + "grad_norm": 3.090961035923101e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78980 + }, + { + "epoch": 0.3830872252122705, + "grad_norm": 3.2244352041743696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 78990 + }, + { + "epoch": 0.3831357234051066, + "grad_norm": 3.3995945614151424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79000 + }, + { + "epoch": 0.3831842215979427, + "grad_norm": 3.633941105363192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79010 + }, + { + "epoch": 0.3832327197907788, + "grad_norm": 1.708166337266448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79020 + }, + { + "epoch": 0.38328121798361486, + "grad_norm": 2.1077778455946827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79030 + }, + { + "epoch": 0.38332971617645095, + "grad_norm": 2.9698201160499593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79040 + }, + { + "epoch": 0.38337821436928704, + "grad_norm": 2.7821868116006954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79050 + }, + { + "epoch": 0.38342671256212313, + "grad_norm": 2.0271504581614863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79060 + }, + { + "epoch": 0.3834752107549592, + "grad_norm": 1.98611064661236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79070 + }, + { + "epoch": 0.3835237089477953, + "grad_norm": 1.8261542891195859e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79080 + }, + { + "epoch": 0.38357220714063145, + "grad_norm": 2.6831858122022822e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79090 + }, + { + "epoch": 0.38362070533346754, + "grad_norm": 2.8443819246604107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79100 + }, + { + "epoch": 0.3836692035263036, + "grad_norm": 1.8547420950199012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79110 + }, + { + "epoch": 0.3837177017191397, + "grad_norm": 2.3587776922795456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79120 + }, + { + "epoch": 0.3837661999119758, + "grad_norm": 1.8467784457243397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79130 + }, + { + "epoch": 0.3838146981048119, + "grad_norm": 3.007711029567872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79140 + }, + { + "epoch": 0.383863196297648, + "grad_norm": 2.7705173124559224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79150 + }, + { + "epoch": 0.38391169449048407, + "grad_norm": 3.62360083272506e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79160 + }, + { + "epoch": 0.38396019268332016, + "grad_norm": 1.6822926909298985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79170 + }, + { + "epoch": 0.38400869087615624, + "grad_norm": 1.5823706007722649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79180 + }, + { + "epoch": 0.38405718906899233, + "grad_norm": 1.5904950487310998e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79190 + }, + { + "epoch": 0.3841056872618284, + "grad_norm": 2.4357041183975525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79200 + }, + { + "epoch": 0.3841541854546645, + "grad_norm": 1.584759615980147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79210 + }, + { + "epoch": 0.3842026836475006, + "grad_norm": 1.5490161331399577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79220 + }, + { + "epoch": 0.3842511818403367, + "grad_norm": 1.4835333104201709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79230 + }, + { + "epoch": 0.3842996800331728, + "grad_norm": 3.301337301309104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79240 + }, + { + "epoch": 0.38434817822600886, + "grad_norm": 2.3412560494762147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79250 + }, + { + "epoch": 0.38439667641884495, + "grad_norm": 1.9366652850294486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79260 + }, + { + "epoch": 0.38444517461168104, + "grad_norm": 1.6407291241193889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79270 + }, + { + "epoch": 0.38449367280451713, + "grad_norm": 1.4192285107128555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79280 + }, + { + "epoch": 0.3845421709973532, + "grad_norm": 2.2254064333537826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79290 + }, + { + "epoch": 0.3845906691901893, + "grad_norm": 2.414635901004658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79300 + }, + { + "epoch": 0.3846391673830254, + "grad_norm": 1.4648034039055347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79310 + }, + { + "epoch": 0.3846876655758615, + "grad_norm": 1.047172372636851e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79320 + }, + { + "epoch": 0.38473616376869757, + "grad_norm": 1.4069532880967017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79330 + }, + { + "epoch": 0.38478466196153366, + "grad_norm": 2.4219841634476325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79340 + }, + { + "epoch": 0.38483316015436975, + "grad_norm": 3.0608734959969297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79350 + }, + { + "epoch": 0.38488165834720584, + "grad_norm": 1.3483604561770335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79360 + }, + { + "epoch": 0.3849301565400419, + "grad_norm": 1.8153016299038427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79370 + }, + { + "epoch": 0.384978654732878, + "grad_norm": 1.3600906640931498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79380 + }, + { + "epoch": 0.3850271529257141, + "grad_norm": 3.0652547593490453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79390 + }, + { + "epoch": 0.3850756511185502, + "grad_norm": 2.0776740257133497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79400 + }, + { + "epoch": 0.3851241493113863, + "grad_norm": 1.1706435998348752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79410 + }, + { + "epoch": 0.38517264750422237, + "grad_norm": 1.5750979400763754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79420 + }, + { + "epoch": 0.38522114569705845, + "grad_norm": 1.3625926840177272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79430 + }, + { + "epoch": 0.38526964388989454, + "grad_norm": 2.313878894710797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79440 + }, + { + "epoch": 0.38531814208273063, + "grad_norm": 2.318929318789742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79450 + }, + { + "epoch": 0.3853666402755667, + "grad_norm": 1.736191507006879e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79460 + }, + { + "epoch": 0.3854151384684028, + "grad_norm": 1.2932399613418966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79470 + }, + { + "epoch": 0.3854636366612389, + "grad_norm": 1.273384668820654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79480 + }, + { + "epoch": 0.385512134854075, + "grad_norm": 1.9295089259685483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79490 + }, + { + "epoch": 0.38556063304691107, + "grad_norm": 4.189317223790567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79500 + }, + { + "epoch": 0.38560913123974716, + "grad_norm": 1.4019710761203896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79510 + }, + { + "epoch": 0.38565762943258325, + "grad_norm": 1.2105620044167154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79520 + }, + { + "epoch": 0.38570612762541934, + "grad_norm": 2.2796371013100725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79530 + }, + { + "epoch": 0.3857546258182554, + "grad_norm": 4.291934601496905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79540 + }, + { + "epoch": 0.3858031240110915, + "grad_norm": 1.8725648942563566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79550 + }, + { + "epoch": 0.3858516222039276, + "grad_norm": 1.1013297580575454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79560 + }, + { + "epoch": 0.3859001203967637, + "grad_norm": 1.0514920631976565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79570 + }, + { + "epoch": 0.3859486185895998, + "grad_norm": 2.6599668672133703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79580 + }, + { + "epoch": 0.38599711678243587, + "grad_norm": 2.0684874471044168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79590 + }, + { + "epoch": 0.386045614975272, + "grad_norm": 1.911382241814863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79600 + }, + { + "epoch": 0.3860941131681081, + "grad_norm": 1.1938764146179892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79610 + }, + { + "epoch": 0.3861426113609442, + "grad_norm": 1.0785336144181201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79620 + }, + { + "epoch": 0.3861911095537803, + "grad_norm": 1.5017644727777224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79630 + }, + { + "epoch": 0.38623960774661636, + "grad_norm": 1.9717849681910593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79640 + }, + { + "epoch": 0.38628810593945245, + "grad_norm": 1.8989512682310306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79650 + }, + { + "epoch": 0.38633660413228854, + "grad_norm": 1.4122193761068047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79660 + }, + { + "epoch": 0.38638510232512463, + "grad_norm": 2.596228114271071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79670 + }, + { + "epoch": 0.3864336005179607, + "grad_norm": 1.23372592497617e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79680 + }, + { + "epoch": 0.3864820987107968, + "grad_norm": 8.68109600560274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79690 + }, + { + "epoch": 0.3865305969036329, + "grad_norm": 1.8502138345866115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79700 + }, + { + "epoch": 0.386579095096469, + "grad_norm": 1.1141511322421138e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79710 + }, + { + "epoch": 0.38662759328930507, + "grad_norm": 9.758423402672634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79720 + }, + { + "epoch": 0.38667609148214116, + "grad_norm": 1.0115574013980222e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79730 + }, + { + "epoch": 0.38672458967497725, + "grad_norm": 1.8213665953226155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79740 + }, + { + "epoch": 0.38677308786781334, + "grad_norm": 1.7215393199876416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79750 + }, + { + "epoch": 0.3868215860606494, + "grad_norm": 1.171021040136111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79760 + }, + { + "epoch": 0.3868700842534855, + "grad_norm": 1.0799867595778778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79770 + }, + { + "epoch": 0.3869185824463216, + "grad_norm": 1.3855124052497558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79780 + }, + { + "epoch": 0.3869670806391577, + "grad_norm": 1.6332688801412587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79790 + }, + { + "epoch": 0.3870155788319938, + "grad_norm": 1.791997647160315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79800 + }, + { + "epoch": 0.38706407702482987, + "grad_norm": 9.131364890890836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79810 + }, + { + "epoch": 0.38711257521766596, + "grad_norm": 1.347222109870927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79820 + }, + { + "epoch": 0.38716107341050204, + "grad_norm": 1.3515914361050818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79830 + }, + { + "epoch": 0.38720957160333813, + "grad_norm": 3.5111866054649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79840 + }, + { + "epoch": 0.3872580697961742, + "grad_norm": 1.6760121752668056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79850 + }, + { + "epoch": 0.3873065679890103, + "grad_norm": 1.2118759968871018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79860 + }, + { + "epoch": 0.3873550661818464, + "grad_norm": 8.826020234664611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79870 + }, + { + "epoch": 0.3874035643746825, + "grad_norm": 9.684606538939988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79880 + }, + { + "epoch": 0.3874520625675186, + "grad_norm": 5.576464900514111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79890 + }, + { + "epoch": 0.38750056076035466, + "grad_norm": 5.47236322745448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79900 + }, + { + "epoch": 0.38754905895319075, + "grad_norm": 1.1329694871164975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79910 + }, + { + "epoch": 0.38759755714602684, + "grad_norm": 1.002712338049605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79920 + }, + { + "epoch": 0.3876460553388629, + "grad_norm": 9.013418207359791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79930 + }, + { + "epoch": 0.387694553531699, + "grad_norm": 1.6406447684857994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79940 + }, + { + "epoch": 0.3877430517245351, + "grad_norm": 1.8083375152855297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79950 + }, + { + "epoch": 0.3877915499173712, + "grad_norm": 8.384971579289413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79960 + }, + { + "epoch": 0.3878400481102073, + "grad_norm": 8.805367883724102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79970 + }, + { + "epoch": 0.38788854630304337, + "grad_norm": 9.02691169812897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79980 + }, + { + "epoch": 0.38793704449587946, + "grad_norm": 1.5521013665420469e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 79990 + }, + { + "epoch": 0.38798554268871555, + "grad_norm": 1.8164779476137483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80000 + }, + { + "epoch": 0.38803404088155163, + "grad_norm": 8.380465601476317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80010 + }, + { + "epoch": 0.3880825390743877, + "grad_norm": 8.098135140244267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80020 + }, + { + "epoch": 0.3881310372672238, + "grad_norm": 8.848288644003333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80030 + }, + { + "epoch": 0.3881795354600599, + "grad_norm": 1.336429363618663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80040 + }, + { + "epoch": 0.388228033652896, + "grad_norm": 1.4572234476872836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80050 + }, + { + "epoch": 0.3882765318457321, + "grad_norm": 3.10150753648486e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80060 + }, + { + "epoch": 0.38832503003856816, + "grad_norm": 7.428327535308199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80070 + }, + { + "epoch": 0.38837352823140425, + "grad_norm": 8.559666753171769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80080 + }, + { + "epoch": 0.38842202642424034, + "grad_norm": 1.4196017446010956e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80090 + }, + { + "epoch": 0.38847052461707643, + "grad_norm": 1.3634461311085033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80100 + }, + { + "epoch": 0.3885190228099126, + "grad_norm": 7.6841496365887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80110 + }, + { + "epoch": 0.38856752100274866, + "grad_norm": 1.11898793875298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80120 + }, + { + "epoch": 0.38861601919558475, + "grad_norm": 9.839340009420994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80130 + }, + { + "epoch": 0.38866451738842084, + "grad_norm": 1.4086905366639257e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80140 + }, + { + "epoch": 0.3887130155812569, + "grad_norm": 7.656317029614002e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80150 + }, + { + "epoch": 0.388761513774093, + "grad_norm": 5.583452548307832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80160 + }, + { + "epoch": 0.3888100119669291, + "grad_norm": 1.5924893887131475e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80170 + }, + { + "epoch": 0.3888585101597652, + "grad_norm": 5.87261865803157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80180 + }, + { + "epoch": 0.3889070083526013, + "grad_norm": 9.303216756961774e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80190 + }, + { + "epoch": 0.38895550654543737, + "grad_norm": 0.01935618743300438, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 80200 + }, + { + "epoch": 0.38900400473827346, + "grad_norm": 0.0005491882911883295, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 80210 + }, + { + "epoch": 0.38905250293110955, + "grad_norm": 0.0008096008095890284, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 80220 + }, + { + "epoch": 0.38910100112394563, + "grad_norm": 0.0023962745908647776, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80230 + }, + { + "epoch": 0.3891494993167817, + "grad_norm": 9.686857083579525e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80240 + }, + { + "epoch": 0.3891979975096178, + "grad_norm": 5.438842345029116e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80250 + }, + { + "epoch": 0.3892464957024539, + "grad_norm": 1.6683949070284143e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80260 + }, + { + "epoch": 0.38929499389529, + "grad_norm": 1.337112371402327e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80270 + }, + { + "epoch": 0.3893434920881261, + "grad_norm": 1.1469815945019946e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80280 + }, + { + "epoch": 0.38939199028096216, + "grad_norm": 2.2151854864205234e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80290 + }, + { + "epoch": 0.38944048847379825, + "grad_norm": 0.00035465319524519145, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80300 + }, + { + "epoch": 0.38948898666663434, + "grad_norm": 1.122518369811587e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80310 + }, + { + "epoch": 0.38953748485947043, + "grad_norm": 6.442198809963884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80320 + }, + { + "epoch": 0.3895859830523065, + "grad_norm": 1.635050830373075e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80330 + }, + { + "epoch": 0.3896344812451426, + "grad_norm": 1.1953939974773675e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80340 + }, + { + "epoch": 0.3896829794379787, + "grad_norm": 1.3664898688148241e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80350 + }, + { + "epoch": 0.3897314776308148, + "grad_norm": 6.234689408302074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80360 + }, + { + "epoch": 0.38977997582365087, + "grad_norm": 1.4400076906895265e-05, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 80370 + }, + { + "epoch": 0.38982847401648696, + "grad_norm": 6.883331661811098e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 80380 + }, + { + "epoch": 0.38987697220932305, + "grad_norm": 0.0018334289779886603, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 80390 + }, + { + "epoch": 0.38992547040215914, + "grad_norm": 0.0011126004392281175, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 80400 + }, + { + "epoch": 0.3899739685949952, + "grad_norm": 0.00019218624220229685, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80410 + }, + { + "epoch": 0.3900224667878313, + "grad_norm": 0.0001372256374452263, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80420 + }, + { + "epoch": 0.3900709649806674, + "grad_norm": 7.252902287291363e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80430 + }, + { + "epoch": 0.3901194631735035, + "grad_norm": 5.85535162827e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80440 + }, + { + "epoch": 0.3901679613663396, + "grad_norm": 4.515692489803769e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80450 + }, + { + "epoch": 0.39021645955917567, + "grad_norm": 2.7613948986981995e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80460 + }, + { + "epoch": 0.39026495775201175, + "grad_norm": 0.00011774444283219054, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80470 + }, + { + "epoch": 0.39031345594484784, + "grad_norm": 1.790042188076768e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80480 + }, + { + "epoch": 0.39036195413768393, + "grad_norm": 3.680487861856818e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 80490 + }, + { + "epoch": 0.39041045233052, + "grad_norm": 0.00011889711458934471, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80500 + }, + { + "epoch": 0.3904589505233561, + "grad_norm": 9.644591045798734e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80510 + }, + { + "epoch": 0.3905074487161922, + "grad_norm": 8.871432510204613e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80520 + }, + { + "epoch": 0.3905559469090283, + "grad_norm": 5.347377737052739e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80530 + }, + { + "epoch": 0.3906044451018644, + "grad_norm": 0.00015926607011351734, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80540 + }, + { + "epoch": 0.39065294329470046, + "grad_norm": 0.0003740928659681231, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 80550 + }, + { + "epoch": 0.39070144148753655, + "grad_norm": 0.00024136851425282657, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80560 + }, + { + "epoch": 0.39074993968037264, + "grad_norm": 0.0001160141036962159, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80570 + }, + { + "epoch": 0.3907984378732087, + "grad_norm": 5.965594027657062e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80580 + }, + { + "epoch": 0.3908469360660448, + "grad_norm": 5.532037903321907e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80590 + }, + { + "epoch": 0.3908954342588809, + "grad_norm": 4.3415173422545195e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80600 + }, + { + "epoch": 0.390943932451717, + "grad_norm": 0.0001488854322815314, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 80610 + }, + { + "epoch": 0.39099243064455314, + "grad_norm": 0.00026373594300821424, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80620 + }, + { + "epoch": 0.3910409288373892, + "grad_norm": 0.00032105654827319086, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80630 + }, + { + "epoch": 0.3910894270302253, + "grad_norm": 9.951941319741309e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80640 + }, + { + "epoch": 0.3911379252230614, + "grad_norm": 4.672180148190819e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80650 + }, + { + "epoch": 0.3911864234158975, + "grad_norm": 0.00010900780762312934, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80660 + }, + { + "epoch": 0.3912349216087336, + "grad_norm": 3.14068456646055e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80670 + }, + { + "epoch": 0.39128341980156967, + "grad_norm": 2.5101686333073303e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80680 + }, + { + "epoch": 0.39133191799440575, + "grad_norm": 2.696996489248704e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80690 + }, + { + "epoch": 0.39138041618724184, + "grad_norm": 1.9647752196760848e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80700 + }, + { + "epoch": 0.39142891438007793, + "grad_norm": 1.8628566976985894e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80710 + }, + { + "epoch": 0.391477412572914, + "grad_norm": 1.723382229101844e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80720 + }, + { + "epoch": 0.3915259107657501, + "grad_norm": 1.4924742572475225e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80730 + }, + { + "epoch": 0.3915744089585862, + "grad_norm": 1.7943064449355006e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80740 + }, + { + "epoch": 0.3916229071514223, + "grad_norm": 1.4964462025091052e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80750 + }, + { + "epoch": 0.39167140534425837, + "grad_norm": 1.1060458746214863e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80760 + }, + { + "epoch": 0.39171990353709446, + "grad_norm": 1.072665963874897e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80770 + }, + { + "epoch": 0.39176840172993055, + "grad_norm": 2.551934085204266e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80780 + }, + { + "epoch": 0.39181689992276664, + "grad_norm": 1.1591027032409329e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80790 + }, + { + "epoch": 0.3918653981156027, + "grad_norm": 1.4233392903406639e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80800 + }, + { + "epoch": 0.3919138963084388, + "grad_norm": 1.1836574230983388e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80810 + }, + { + "epoch": 0.3919623945012749, + "grad_norm": 9.71166809904389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80820 + }, + { + "epoch": 0.392010892694111, + "grad_norm": 9.057956958713476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80830 + }, + { + "epoch": 0.3920593908869471, + "grad_norm": 1.0166589163418394e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80840 + }, + { + "epoch": 0.39210788907978317, + "grad_norm": 9.361579941469245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80850 + }, + { + "epoch": 0.39215638727261926, + "grad_norm": 7.944528078951407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80860 + }, + { + "epoch": 0.39220488546545534, + "grad_norm": 7.255778200487839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80870 + }, + { + "epoch": 0.39225338365829143, + "grad_norm": 6.5729695961636025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80880 + }, + { + "epoch": 0.3923018818511275, + "grad_norm": 7.724596798652783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80890 + }, + { + "epoch": 0.3923503800439636, + "grad_norm": 9.25933363760123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80900 + }, + { + "epoch": 0.3923988782367997, + "grad_norm": 7.556866330560297e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 80910 + }, + { + "epoch": 0.3924473764296358, + "grad_norm": 0.00022851231915410608, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80920 + }, + { + "epoch": 0.3924958746224719, + "grad_norm": 0.00017908710287883878, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80930 + }, + { + "epoch": 0.39254437281530796, + "grad_norm": 9.388422768097371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80940 + }, + { + "epoch": 0.39259287100814405, + "grad_norm": 8.62001979839988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80950 + }, + { + "epoch": 0.39264136920098014, + "grad_norm": 0.01129545085132122, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80960 + }, + { + "epoch": 0.3926898673938162, + "grad_norm": 5.280946879793191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80970 + }, + { + "epoch": 0.3927383655866523, + "grad_norm": 2.2051441192161292e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 80980 + }, + { + "epoch": 0.3927868637794884, + "grad_norm": 0.00033963570604100823, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 80990 + }, + { + "epoch": 0.3928353619723245, + "grad_norm": 0.00019926069944631308, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81000 + }, + { + "epoch": 0.3928838601651606, + "grad_norm": 4.564179471344687e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81010 + }, + { + "epoch": 0.39293235835799667, + "grad_norm": 2.8340075004962273e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81020 + }, + { + "epoch": 0.39298085655083276, + "grad_norm": 2.1085204934934154e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81030 + }, + { + "epoch": 0.39302935474366885, + "grad_norm": 3.321783151477575e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81040 + }, + { + "epoch": 0.39307785293650493, + "grad_norm": 2.2921571144252084e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81050 + }, + { + "epoch": 0.393126351129341, + "grad_norm": 1.4080787877901457e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81060 + }, + { + "epoch": 0.3931748493221771, + "grad_norm": 1.2685705769399647e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81070 + }, + { + "epoch": 0.3932233475150132, + "grad_norm": 1.1935046131839044e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81080 + }, + { + "epoch": 0.3932718457078493, + "grad_norm": 1.454538414691342e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81090 + }, + { + "epoch": 0.3933203439006854, + "grad_norm": 1.3647440027853008e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81100 + }, + { + "epoch": 0.39336884209352146, + "grad_norm": 1.1658995390462223e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81110 + }, + { + "epoch": 0.39341734028635755, + "grad_norm": 8.744857041165233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81120 + }, + { + "epoch": 0.39346583847919364, + "grad_norm": 8.61179159983294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81130 + }, + { + "epoch": 0.3935143366720298, + "grad_norm": 9.616972420189995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81140 + }, + { + "epoch": 0.3935628348648659, + "grad_norm": 1.152618006017292e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81150 + }, + { + "epoch": 0.39361133305770196, + "grad_norm": 9.369483450427651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81160 + }, + { + "epoch": 0.39365983125053805, + "grad_norm": 7.626251772308024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81170 + }, + { + "epoch": 0.39370832944337414, + "grad_norm": 6.297699655988254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81180 + }, + { + "epoch": 0.3937568276362102, + "grad_norm": 8.6428581198561e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81190 + }, + { + "epoch": 0.3938053258290463, + "grad_norm": 7.0415762820630334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81200 + }, + { + "epoch": 0.3938538240218824, + "grad_norm": 5.700894234905718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81210 + }, + { + "epoch": 0.3939023222147185, + "grad_norm": 5.436976607597899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81220 + }, + { + "epoch": 0.3939508204075546, + "grad_norm": 4.766352958540665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81230 + }, + { + "epoch": 0.39399931860039067, + "grad_norm": 7.107205419742968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81240 + }, + { + "epoch": 0.39404781679322676, + "grad_norm": 6.5687440837791655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81250 + }, + { + "epoch": 0.39409631498606285, + "grad_norm": 4.3364361772546545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81260 + }, + { + "epoch": 0.39414481317889893, + "grad_norm": 4.295680355426157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81270 + }, + { + "epoch": 0.394193311371735, + "grad_norm": 4.007092229585396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81280 + }, + { + "epoch": 0.3942418095645711, + "grad_norm": 6.111867605795851e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81290 + }, + { + "epoch": 0.3942903077574072, + "grad_norm": 6.823359399277251e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81300 + }, + { + "epoch": 0.3943388059502433, + "grad_norm": 3.911684871127363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81310 + }, + { + "epoch": 0.3943873041430794, + "grad_norm": 4.1987291297118645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81320 + }, + { + "epoch": 0.39443580233591546, + "grad_norm": 3.821347490884364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81330 + }, + { + "epoch": 0.39448430052875155, + "grad_norm": 5.47799618288991e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81340 + }, + { + "epoch": 0.39453279872158764, + "grad_norm": 5.2872505875711795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81350 + }, + { + "epoch": 0.39458129691442373, + "grad_norm": 3.2219591048487928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81360 + }, + { + "epoch": 0.3946297951072598, + "grad_norm": 3.596534270400298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81370 + }, + { + "epoch": 0.3946782933000959, + "grad_norm": 2.9572313451353693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81380 + }, + { + "epoch": 0.394726791492932, + "grad_norm": 4.418077878654003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81390 + }, + { + "epoch": 0.3947752896857681, + "grad_norm": 4.407737833389547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81400 + }, + { + "epoch": 0.39482378787860417, + "grad_norm": 3.2154091513802996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81410 + }, + { + "epoch": 0.39487228607144026, + "grad_norm": 3.219742666260572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81420 + }, + { + "epoch": 0.39492078426427635, + "grad_norm": 3.0488392894767458e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81430 + }, + { + "epoch": 0.39496928245711244, + "grad_norm": 4.2761080294440035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81440 + }, + { + "epoch": 0.3950177806499485, + "grad_norm": 4.227425506542204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81450 + }, + { + "epoch": 0.3950662788427846, + "grad_norm": 2.831252686519292e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81460 + }, + { + "epoch": 0.3951147770356207, + "grad_norm": 3.0496487397613237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81470 + }, + { + "epoch": 0.3951632752284568, + "grad_norm": 2.960410711239092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81480 + }, + { + "epoch": 0.3952117734212929, + "grad_norm": 4.233604158798698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81490 + }, + { + "epoch": 0.39526027161412897, + "grad_norm": 4.243866897013504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81500 + }, + { + "epoch": 0.39530876980696505, + "grad_norm": 2.8558620215335395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81510 + }, + { + "epoch": 0.39535726799980114, + "grad_norm": 2.421081489956123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81520 + }, + { + "epoch": 0.39540576619263723, + "grad_norm": 2.6748728032544022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81530 + }, + { + "epoch": 0.3954542643854733, + "grad_norm": 3.3031351449608337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81540 + }, + { + "epoch": 0.3955027625783094, + "grad_norm": 3.5276698326924816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81550 + }, + { + "epoch": 0.3955512607711455, + "grad_norm": 3.0531302854797104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81560 + }, + { + "epoch": 0.3955997589639816, + "grad_norm": 2.590940312074963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81570 + }, + { + "epoch": 0.3956482571568177, + "grad_norm": 2.305389898538124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81580 + }, + { + "epoch": 0.39569675534965376, + "grad_norm": 3.3668684409349225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81590 + }, + { + "epoch": 0.39574525354248985, + "grad_norm": 3.654122110674507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81600 + }, + { + "epoch": 0.39579375173532594, + "grad_norm": 2.503442146917223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81610 + }, + { + "epoch": 0.395842249928162, + "grad_norm": 2.1378878045652527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81620 + }, + { + "epoch": 0.3958907481209981, + "grad_norm": 2.021527961915126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81630 + }, + { + "epoch": 0.3959392463138342, + "grad_norm": 3.189951257809298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81640 + }, + { + "epoch": 0.39598774450667035, + "grad_norm": 2.589429641375318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81650 + }, + { + "epoch": 0.39603624269950644, + "grad_norm": 1.876209580586874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81660 + }, + { + "epoch": 0.3960847408923425, + "grad_norm": 2.1338228179956786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81670 + }, + { + "epoch": 0.3961332390851786, + "grad_norm": 2.0694019440270495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81680 + }, + { + "epoch": 0.3961817372780147, + "grad_norm": 2.6019661163445562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81690 + }, + { + "epoch": 0.3962302354708508, + "grad_norm": 3.065084683839814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81700 + }, + { + "epoch": 0.3962787336636869, + "grad_norm": 2.0495810986176366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81710 + }, + { + "epoch": 0.39632723185652297, + "grad_norm": 2.221832573923166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81720 + }, + { + "epoch": 0.39637573004935905, + "grad_norm": 2.082743549181032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81730 + }, + { + "epoch": 0.39642422824219514, + "grad_norm": 2.399401637376286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81740 + }, + { + "epoch": 0.39647272643503123, + "grad_norm": 2.338571675863932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81750 + }, + { + "epoch": 0.3965212246278673, + "grad_norm": 1.9698911728482926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81760 + }, + { + "epoch": 0.3965697228207034, + "grad_norm": 1.5746552435302874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81770 + }, + { + "epoch": 0.3966182210135395, + "grad_norm": 2.235979764009244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81780 + }, + { + "epoch": 0.3966667192063756, + "grad_norm": 2.403244934612303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81790 + }, + { + "epoch": 0.3967152173992117, + "grad_norm": 4.189551418676274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81800 + }, + { + "epoch": 0.39676371559204776, + "grad_norm": 1.6860024061315926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81810 + }, + { + "epoch": 0.39681221378488385, + "grad_norm": 1.6282054957628134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81820 + }, + { + "epoch": 0.39686071197771994, + "grad_norm": 1.4663165757156094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81830 + }, + { + "epoch": 0.396909210170556, + "grad_norm": 2.1303328594513005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81840 + }, + { + "epoch": 0.3969577083633921, + "grad_norm": 3.648997335403692e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81850 + }, + { + "epoch": 0.3970062065562282, + "grad_norm": 1.4890952115820255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81860 + }, + { + "epoch": 0.3970547047490643, + "grad_norm": 1.5426670643137186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81870 + }, + { + "epoch": 0.3971032029419004, + "grad_norm": 1.6686001345078694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81880 + }, + { + "epoch": 0.39715170113473647, + "grad_norm": 2.1483551790879574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81890 + }, + { + "epoch": 0.39720019932757256, + "grad_norm": 2.0002380551886745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81900 + }, + { + "epoch": 0.39724869752040864, + "grad_norm": 1.372039832858718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81910 + }, + { + "epoch": 0.39729719571324473, + "grad_norm": 1.5713229686298291e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81920 + }, + { + "epoch": 0.3973456939060808, + "grad_norm": 1.4936265415599337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81930 + }, + { + "epoch": 0.3973941920989169, + "grad_norm": 1.87510204341379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81940 + }, + { + "epoch": 0.397442690291753, + "grad_norm": 2.049530621661688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81950 + }, + { + "epoch": 0.3974911884845891, + "grad_norm": 1.5752311810501851e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81960 + }, + { + "epoch": 0.3975396866774252, + "grad_norm": 1.5719440398243023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81970 + }, + { + "epoch": 0.39758818487026126, + "grad_norm": 1.2348565405773115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81980 + }, + { + "epoch": 0.39763668306309735, + "grad_norm": 1.9769749997067265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 81990 + }, + { + "epoch": 0.39768518125593344, + "grad_norm": 1.7131454796981416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82000 + }, + { + "epoch": 0.39773367944876953, + "grad_norm": 1.3058177046332275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82010 + }, + { + "epoch": 0.3977821776416056, + "grad_norm": 1.3577392792285536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82020 + }, + { + "epoch": 0.3978306758344417, + "grad_norm": 1.1749029908969533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82030 + }, + { + "epoch": 0.3978791740272778, + "grad_norm": 1.7855048781711957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82040 + }, + { + "epoch": 0.3979276722201139, + "grad_norm": 1.8041994280793006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82050 + }, + { + "epoch": 0.39797617041294997, + "grad_norm": 1.3389026207732968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82060 + }, + { + "epoch": 0.39802466860578606, + "grad_norm": 1.2709106158581562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82070 + }, + { + "epoch": 0.39807316679862215, + "grad_norm": 1.1905309520443552e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82080 + }, + { + "epoch": 0.39812166499145824, + "grad_norm": 1.6692970348231029e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82090 + }, + { + "epoch": 0.3981701631842943, + "grad_norm": 1.60368927026866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82100 + }, + { + "epoch": 0.3982186613771304, + "grad_norm": 1.2481424391808105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82110 + }, + { + "epoch": 0.3982671595699665, + "grad_norm": 1.143200051956228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82120 + }, + { + "epoch": 0.3983156577628026, + "grad_norm": 1.281097752325877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82130 + }, + { + "epoch": 0.3983641559556387, + "grad_norm": 1.720749764899665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82140 + }, + { + "epoch": 0.39841265414847477, + "grad_norm": 1.8649616322363727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82150 + }, + { + "epoch": 0.3984611523413109, + "grad_norm": 1.2942327884957194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82160 + }, + { + "epoch": 0.398509650534147, + "grad_norm": 1.0404919521533884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82170 + }, + { + "epoch": 0.3985581487269831, + "grad_norm": 1.3269707324070623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82180 + }, + { + "epoch": 0.3986066469198192, + "grad_norm": 1.6146583448062302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82190 + }, + { + "epoch": 0.39865514511265526, + "grad_norm": 1.6063586372183636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82200 + }, + { + "epoch": 0.39870364330549135, + "grad_norm": 1.0991200269927504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82210 + }, + { + "epoch": 0.39875214149832744, + "grad_norm": 1.9963208615081385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82220 + }, + { + "epoch": 0.3988006396911635, + "grad_norm": 1.17265574317571e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82230 + }, + { + "epoch": 0.3988491378839996, + "grad_norm": 1.8682094378164038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82240 + }, + { + "epoch": 0.3988976360768357, + "grad_norm": 1.4279520428317483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82250 + }, + { + "epoch": 0.3989461342696718, + "grad_norm": 1.084012069441087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82260 + }, + { + "epoch": 0.3989946324625079, + "grad_norm": 1.1722182762241573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82270 + }, + { + "epoch": 0.39904313065534397, + "grad_norm": 0.00011879202065756544, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 82280 + }, + { + "epoch": 0.39909162884818006, + "grad_norm": 0.00045788942952640355, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82290 + }, + { + "epoch": 0.39914012704101615, + "grad_norm": 6.246016710065305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82300 + }, + { + "epoch": 0.39918862523385223, + "grad_norm": 1.8779746824293397e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82310 + }, + { + "epoch": 0.3992371234266883, + "grad_norm": 1.1750061275961343e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82320 + }, + { + "epoch": 0.3992856216195244, + "grad_norm": 9.99045187199954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82330 + }, + { + "epoch": 0.3993341198123605, + "grad_norm": 1.2661688742809929e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82340 + }, + { + "epoch": 0.3993826180051966, + "grad_norm": 1.2581105693243444e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82350 + }, + { + "epoch": 0.3994311161980327, + "grad_norm": 8.068808710959274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82360 + }, + { + "epoch": 0.39947961439086876, + "grad_norm": 7.405713404295966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82370 + }, + { + "epoch": 0.39952811258370485, + "grad_norm": 7.587987056467682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82380 + }, + { + "epoch": 0.39957661077654094, + "grad_norm": 8.846510354487691e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82390 + }, + { + "epoch": 0.39962510896937703, + "grad_norm": 9.351232620247174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82400 + }, + { + "epoch": 0.3996736071622131, + "grad_norm": 5.96506470174063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82410 + }, + { + "epoch": 0.3997221053550492, + "grad_norm": 6.058191502233967e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82420 + }, + { + "epoch": 0.3997706035478853, + "grad_norm": 5.197588052396895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82430 + }, + { + "epoch": 0.3998191017407214, + "grad_norm": 7.233601536427159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82440 + }, + { + "epoch": 0.39986759993355747, + "grad_norm": 9.646493708714843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82450 + }, + { + "epoch": 0.39991609812639356, + "grad_norm": 4.594495294441003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82460 + }, + { + "epoch": 0.39996459631922965, + "grad_norm": 4.708732831204543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82470 + }, + { + "epoch": 0.40001309451206574, + "grad_norm": 4.7181069930957165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82480 + }, + { + "epoch": 0.4000615927049018, + "grad_norm": 5.598090410785517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82490 + }, + { + "epoch": 0.4001100908977379, + "grad_norm": 6.092340299801435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82500 + }, + { + "epoch": 0.400158589090574, + "grad_norm": 4.231704224366695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82510 + }, + { + "epoch": 0.4002070872834101, + "grad_norm": 4.071065632160753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82520 + }, + { + "epoch": 0.4002555854762462, + "grad_norm": 3.272609319537878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82530 + }, + { + "epoch": 0.40030408366908227, + "grad_norm": 4.993521542928647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82540 + }, + { + "epoch": 0.40035258186191836, + "grad_norm": 4.374587206257274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82550 + }, + { + "epoch": 0.40040108005475444, + "grad_norm": 3.345248160258052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82560 + }, + { + "epoch": 0.40044957824759053, + "grad_norm": 3.2902398743317463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82570 + }, + { + "epoch": 0.4004980764404266, + "grad_norm": 2.9811403692292515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82580 + }, + { + "epoch": 0.4005465746332627, + "grad_norm": 4.4339294618112035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82590 + }, + { + "epoch": 0.4005950728260988, + "grad_norm": 4.822734354092972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82600 + }, + { + "epoch": 0.4006435710189349, + "grad_norm": 2.7885607778443955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82610 + }, + { + "epoch": 0.400692069211771, + "grad_norm": 3.0135126962704817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82620 + }, + { + "epoch": 0.40074056740460706, + "grad_norm": 2.5816129891609307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82630 + }, + { + "epoch": 0.40078906559744315, + "grad_norm": 4.001351953775156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82640 + }, + { + "epoch": 0.40083756379027924, + "grad_norm": 3.965361884183949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82650 + }, + { + "epoch": 0.4008860619831153, + "grad_norm": 2.4765633952483768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82660 + }, + { + "epoch": 0.40093456017595147, + "grad_norm": 2.7085898182122037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82670 + }, + { + "epoch": 0.40098305836878756, + "grad_norm": 2.6738753149402328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82680 + }, + { + "epoch": 0.40103155656162365, + "grad_norm": 3.349481858094805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82690 + }, + { + "epoch": 0.40108005475445974, + "grad_norm": 3.44864793078159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82700 + }, + { + "epoch": 0.4011285529472958, + "grad_norm": 2.362544819334289e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82710 + }, + { + "epoch": 0.4011770511401319, + "grad_norm": 2.605817599032889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82720 + }, + { + "epoch": 0.401225549332968, + "grad_norm": 2.3376492208626587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82730 + }, + { + "epoch": 0.4012740475258041, + "grad_norm": 3.062794803554425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82740 + }, + { + "epoch": 0.4013225457186402, + "grad_norm": 3.3073304166464368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82750 + }, + { + "epoch": 0.40137104391147627, + "grad_norm": 1.983501988434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82760 + }, + { + "epoch": 0.40141954210431235, + "grad_norm": 1.8709246205617092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82770 + }, + { + "epoch": 0.40146804029714844, + "grad_norm": 2.0207303350616712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82780 + }, + { + "epoch": 0.40151653848998453, + "grad_norm": 1.01909590739524e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82790 + }, + { + "epoch": 0.4015650366828206, + "grad_norm": 2.876161261156085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82800 + }, + { + "epoch": 0.4016135348756567, + "grad_norm": 2.012314553212491e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82810 + }, + { + "epoch": 0.4016620330684928, + "grad_norm": 1.7441352611058392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82820 + }, + { + "epoch": 0.4017105312613289, + "grad_norm": 1.7464617485529743e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82830 + }, + { + "epoch": 0.401759029454165, + "grad_norm": 2.520116822779528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82840 + }, + { + "epoch": 0.40180752764700106, + "grad_norm": 2.66161350737093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82850 + }, + { + "epoch": 0.40185602583983715, + "grad_norm": 2.3358045382337878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82860 + }, + { + "epoch": 0.40190452403267324, + "grad_norm": 1.5997524315025657e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82870 + }, + { + "epoch": 0.4019530222255093, + "grad_norm": 1.7004905430439976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82880 + }, + { + "epoch": 0.4020015204183454, + "grad_norm": 2.4657524591020774e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82890 + }, + { + "epoch": 0.4020500186111815, + "grad_norm": 2.2849153538118117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82900 + }, + { + "epoch": 0.4020985168040176, + "grad_norm": 1.654778657211864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82910 + }, + { + "epoch": 0.4021470149968537, + "grad_norm": 1.6348502640539664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82920 + }, + { + "epoch": 0.40219551318968977, + "grad_norm": 0.031362392008304596, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 82930 + }, + { + "epoch": 0.40224401138252586, + "grad_norm": 0.00013207666052039713, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82940 + }, + { + "epoch": 0.40229250957536195, + "grad_norm": 0.0002539401757530868, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82950 + }, + { + "epoch": 0.40234100776819803, + "grad_norm": 4.1541221435181797e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82960 + }, + { + "epoch": 0.4023895059610341, + "grad_norm": 1.9570208678487688e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82970 + }, + { + "epoch": 0.4024380041538702, + "grad_norm": 1.2931704986840487e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82980 + }, + { + "epoch": 0.4024865023467063, + "grad_norm": 1.6436617443105206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 82990 + }, + { + "epoch": 0.4025350005395424, + "grad_norm": 1.3088090781820938e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83000 + }, + { + "epoch": 0.4025834987323785, + "grad_norm": 8.207169230445288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83010 + }, + { + "epoch": 0.40263199692521456, + "grad_norm": 7.677382200199645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83020 + }, + { + "epoch": 0.40268049511805065, + "grad_norm": 6.600607321161078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83030 + }, + { + "epoch": 0.40272899331088674, + "grad_norm": 9.465777111472562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83040 + }, + { + "epoch": 0.40277749150372283, + "grad_norm": 8.584804163547233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83050 + }, + { + "epoch": 0.4028259896965589, + "grad_norm": 5.658370810124325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83060 + }, + { + "epoch": 0.402874487889395, + "grad_norm": 5.2148266149742994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83070 + }, + { + "epoch": 0.4029229860822311, + "grad_norm": 4.8512838475289755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83080 + }, + { + "epoch": 0.4029714842750672, + "grad_norm": 6.360589850373799e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83090 + }, + { + "epoch": 0.40301998246790327, + "grad_norm": 6.4410987761220895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83100 + }, + { + "epoch": 0.40306848066073936, + "grad_norm": 4.201366209599655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83110 + }, + { + "epoch": 0.40311697885357545, + "grad_norm": 3.956192358600674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83120 + }, + { + "epoch": 0.40316547704641154, + "grad_norm": 3.964124516642187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83130 + }, + { + "epoch": 0.4032139752392476, + "grad_norm": 5.193843662709696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83140 + }, + { + "epoch": 0.4032624734320837, + "grad_norm": 5.073982720205095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83150 + }, + { + "epoch": 0.4033109716249198, + "grad_norm": 3.3884698495967314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83160 + }, + { + "epoch": 0.4033594698177559, + "grad_norm": 5.03585488331737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83170 + }, + { + "epoch": 0.40340796801059203, + "grad_norm": 3.341537194501143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83180 + }, + { + "epoch": 0.4034564662034281, + "grad_norm": 4.396985787025187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83190 + }, + { + "epoch": 0.4035049643962642, + "grad_norm": 4.422251095093088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83200 + }, + { + "epoch": 0.4035534625891003, + "grad_norm": 3.107196107521304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83210 + }, + { + "epoch": 0.4036019607819364, + "grad_norm": 3.782566636800766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83220 + }, + { + "epoch": 0.4036504589747725, + "grad_norm": 2.89628837890632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83230 + }, + { + "epoch": 0.40369895716760856, + "grad_norm": 3.946495326090371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83240 + }, + { + "epoch": 0.40374745536044465, + "grad_norm": 3.966229996876791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83250 + }, + { + "epoch": 0.40379595355328074, + "grad_norm": 2.575707640062319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83260 + }, + { + "epoch": 0.40384445174611683, + "grad_norm": 2.6161164896620903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83270 + }, + { + "epoch": 0.4038929499389529, + "grad_norm": 2.5695287604321493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83280 + }, + { + "epoch": 0.403941448131789, + "grad_norm": 3.4759393656713655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83290 + }, + { + "epoch": 0.4039899463246251, + "grad_norm": 3.466824182396522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83300 + }, + { + "epoch": 0.4040384445174612, + "grad_norm": 2.2755671125196386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83310 + }, + { + "epoch": 0.40408694271029727, + "grad_norm": 2.323955186511739e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83320 + }, + { + "epoch": 0.40413544090313336, + "grad_norm": 2.3521281491412083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83330 + }, + { + "epoch": 0.40418393909596945, + "grad_norm": 3.096116643064306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83340 + }, + { + "epoch": 0.40423243728880554, + "grad_norm": 3.0550520477845566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83350 + }, + { + "epoch": 0.4042809354816416, + "grad_norm": 2.3829963993193815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83360 + }, + { + "epoch": 0.4043294336744777, + "grad_norm": 2.153785999325919e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83370 + }, + { + "epoch": 0.4043779318673138, + "grad_norm": 1.949641045939643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83380 + }, + { + "epoch": 0.4044264300601499, + "grad_norm": 2.8293159175518667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83390 + }, + { + "epoch": 0.404474928252986, + "grad_norm": 2.716122708079638e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83400 + }, + { + "epoch": 0.40452342644582207, + "grad_norm": 2.0502104689512635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83410 + }, + { + "epoch": 0.40457192463865815, + "grad_norm": 1.91258891391044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83420 + }, + { + "epoch": 0.40462042283149424, + "grad_norm": 1.7431110563848051e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83430 + }, + { + "epoch": 0.40466892102433033, + "grad_norm": 2.590303665783722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83440 + }, + { + "epoch": 0.4047174192171664, + "grad_norm": 2.55559621109569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83450 + }, + { + "epoch": 0.4047659174100025, + "grad_norm": 1.732521354824712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83460 + }, + { + "epoch": 0.4048144156028386, + "grad_norm": 1.6561118627578253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83470 + }, + { + "epoch": 0.4048629137956747, + "grad_norm": 1.690986096036795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83480 + }, + { + "epoch": 0.40491141198851077, + "grad_norm": 2.4394782940362347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83490 + }, + { + "epoch": 0.40495991018134686, + "grad_norm": 2.2643398551736027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83500 + }, + { + "epoch": 0.40500840837418295, + "grad_norm": 1.5778373381181154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83510 + }, + { + "epoch": 0.40505690656701904, + "grad_norm": 1.6423167608081712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83520 + }, + { + "epoch": 0.4051054047598551, + "grad_norm": 1.455947085560183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83530 + }, + { + "epoch": 0.4051539029526912, + "grad_norm": 2.135700469807489e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83540 + }, + { + "epoch": 0.4052024011455273, + "grad_norm": 2.0865213627985213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83550 + }, + { + "epoch": 0.4052508993383634, + "grad_norm": 1.5351814681707765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83560 + }, + { + "epoch": 0.4052993975311995, + "grad_norm": 1.3635169580084039e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83570 + }, + { + "epoch": 0.40534789572403557, + "grad_norm": 1.4019335594639415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83580 + }, + { + "epoch": 0.40539639391687166, + "grad_norm": 2.0718277937703533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83590 + }, + { + "epoch": 0.40544489210970774, + "grad_norm": 1.9583621906349435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83600 + }, + { + "epoch": 0.40549339030254383, + "grad_norm": 1.2910985560665722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83610 + }, + { + "epoch": 0.4055418884953799, + "grad_norm": 1.3259069646665012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83620 + }, + { + "epoch": 0.405590386688216, + "grad_norm": 1.223734102495655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83630 + }, + { + "epoch": 0.4056388848810521, + "grad_norm": 1.7979712083615595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83640 + }, + { + "epoch": 0.4056873830738882, + "grad_norm": 1.774374368324061e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83650 + }, + { + "epoch": 0.4057358812667243, + "grad_norm": 1.1390095551178092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83660 + }, + { + "epoch": 0.40578437945956036, + "grad_norm": 1.201521399707417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83670 + }, + { + "epoch": 0.40583287765239645, + "grad_norm": 1.1231131793465465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83680 + }, + { + "epoch": 0.40588137584523254, + "grad_norm": 1.575179339852184e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83690 + }, + { + "epoch": 0.4059298740380687, + "grad_norm": 1.598815288161859e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83700 + }, + { + "epoch": 0.40597837223090477, + "grad_norm": 1.084893824554456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83710 + }, + { + "epoch": 0.40602687042374086, + "grad_norm": 1.0084504538099281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83720 + }, + { + "epoch": 0.40607536861657695, + "grad_norm": 9.98308337329945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83730 + }, + { + "epoch": 0.40612386680941304, + "grad_norm": 1.6950380086200312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83740 + }, + { + "epoch": 0.4061723650022491, + "grad_norm": 1.4763164699616027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83750 + }, + { + "epoch": 0.4062208631950852, + "grad_norm": 9.23987840906193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83760 + }, + { + "epoch": 0.4062693613879213, + "grad_norm": 9.460857199883321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83770 + }, + { + "epoch": 0.4063178595807574, + "grad_norm": 9.137273195847229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83780 + }, + { + "epoch": 0.4063663577735935, + "grad_norm": 1.4100004364081542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83790 + }, + { + "epoch": 0.40641485596642957, + "grad_norm": 1.4725607115906314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83800 + }, + { + "epoch": 0.40646335415926566, + "grad_norm": 9.125877227234014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83810 + }, + { + "epoch": 0.40651185235210174, + "grad_norm": 8.731592515687225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83820 + }, + { + "epoch": 0.40656035054493783, + "grad_norm": 8.222384622058598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83830 + }, + { + "epoch": 0.4066088487377739, + "grad_norm": 1.3044749493928975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83840 + }, + { + "epoch": 0.40665734693061, + "grad_norm": 1.2630965784410364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83850 + }, + { + "epoch": 0.4067058451234461, + "grad_norm": 8.606222081652959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83860 + }, + { + "epoch": 0.4067543433162822, + "grad_norm": 7.924250553514867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83870 + }, + { + "epoch": 0.4068028415091183, + "grad_norm": 8.377915605706221e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83880 + }, + { + "epoch": 0.40685133970195436, + "grad_norm": 1.3098226645524846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83890 + }, + { + "epoch": 0.40689983789479045, + "grad_norm": 1.1640478305707802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83900 + }, + { + "epoch": 0.40694833608762654, + "grad_norm": 8.158077662301366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83910 + }, + { + "epoch": 0.4069968342804626, + "grad_norm": 7.481696684408234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83920 + }, + { + "epoch": 0.4070453324732987, + "grad_norm": 7.344856953750423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83930 + }, + { + "epoch": 0.4070938306661348, + "grad_norm": 1.125528115153429e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83940 + }, + { + "epoch": 0.4071423288589709, + "grad_norm": 1.0985536391672213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83950 + }, + { + "epoch": 0.407190827051807, + "grad_norm": 7.057353172967851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83960 + }, + { + "epoch": 0.40723932524464307, + "grad_norm": 7.256094249896705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83970 + }, + { + "epoch": 0.40728782343747916, + "grad_norm": 7.484687216674502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83980 + }, + { + "epoch": 0.40733632163031525, + "grad_norm": 1.1428052175688208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 83990 + }, + { + "epoch": 0.40738481982315133, + "grad_norm": 1.0653110393832321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84000 + }, + { + "epoch": 0.4074333180159874, + "grad_norm": 6.574265398739954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84010 + }, + { + "epoch": 0.4074818162088235, + "grad_norm": 6.729227948198968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84020 + }, + { + "epoch": 0.4075303144016596, + "grad_norm": 6.923488626853214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84030 + }, + { + "epoch": 0.4075788125944957, + "grad_norm": 9.585729685568367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84040 + }, + { + "epoch": 0.4076273107873318, + "grad_norm": 9.6467283583479e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84050 + }, + { + "epoch": 0.40767580898016786, + "grad_norm": 6.33709930752957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84060 + }, + { + "epoch": 0.40772430717300395, + "grad_norm": 5.750349600930349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84070 + }, + { + "epoch": 0.40777280536584004, + "grad_norm": 6.367425271491811e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84080 + }, + { + "epoch": 0.40782130355867613, + "grad_norm": 1.1686482821460231e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84090 + }, + { + "epoch": 0.4078698017515122, + "grad_norm": 9.794961215447984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84100 + }, + { + "epoch": 0.4079182999443483, + "grad_norm": 6.23135292698862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84110 + }, + { + "epoch": 0.4079667981371844, + "grad_norm": 5.99307895754464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84120 + }, + { + "epoch": 0.4080152963300205, + "grad_norm": 5.690116040568682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84130 + }, + { + "epoch": 0.40806379452285657, + "grad_norm": 8.532826427654072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84140 + }, + { + "epoch": 0.40811229271569266, + "grad_norm": 9.866004120340222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84150 + }, + { + "epoch": 0.40816079090852875, + "grad_norm": 5.86412852499052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84160 + }, + { + "epoch": 0.40820928910136484, + "grad_norm": 5.626002348435577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84170 + }, + { + "epoch": 0.4082577872942009, + "grad_norm": 5.758608949690824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84180 + }, + { + "epoch": 0.408306285487037, + "grad_norm": 8.417052299591887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84190 + }, + { + "epoch": 0.4083547836798731, + "grad_norm": 7.968358772814099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84200 + }, + { + "epoch": 0.40840328187270925, + "grad_norm": 5.883697440367541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84210 + }, + { + "epoch": 0.40845178006554533, + "grad_norm": 5.310188271323568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84220 + }, + { + "epoch": 0.4085002782583814, + "grad_norm": 5.546914962906158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84230 + }, + { + "epoch": 0.4085487764512175, + "grad_norm": 8.988283184407919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84240 + }, + { + "epoch": 0.4085972746440536, + "grad_norm": 7.921300948510179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84250 + }, + { + "epoch": 0.4086457728368897, + "grad_norm": 5.399877522904717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84260 + }, + { + "epoch": 0.4086942710297258, + "grad_norm": 5.020473281547311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84270 + }, + { + "epoch": 0.40874276922256186, + "grad_norm": 5.294214702189493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84280 + }, + { + "epoch": 0.40879126741539795, + "grad_norm": 7.461838436029211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84290 + }, + { + "epoch": 0.40883976560823404, + "grad_norm": 6.5563796169954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84300 + }, + { + "epoch": 0.40888826380107013, + "grad_norm": 4.6779609874647576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84310 + }, + { + "epoch": 0.4089367619939062, + "grad_norm": 5.545342105506279e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84320 + }, + { + "epoch": 0.4089852601867423, + "grad_norm": 5.070928636996541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84330 + }, + { + "epoch": 0.4090337583795784, + "grad_norm": 7.055760420371371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84340 + }, + { + "epoch": 0.4090822565724145, + "grad_norm": 7.56702149828925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84350 + }, + { + "epoch": 0.40913075476525057, + "grad_norm": 4.87732791043527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84360 + }, + { + "epoch": 0.40917925295808666, + "grad_norm": 5.624893333333603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84370 + }, + { + "epoch": 0.40922775115092275, + "grad_norm": 4.922060838907782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84380 + }, + { + "epoch": 0.40927624934375884, + "grad_norm": 6.702283599224756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84390 + }, + { + "epoch": 0.4093247475365949, + "grad_norm": 6.565654757650918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84400 + }, + { + "epoch": 0.409373245729431, + "grad_norm": 4.5140816951061424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84410 + }, + { + "epoch": 0.4094217439222671, + "grad_norm": 4.3069150024166447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84420 + }, + { + "epoch": 0.4094702421151032, + "grad_norm": 4.5955999894431443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84430 + }, + { + "epoch": 0.4095187403079393, + "grad_norm": 6.914688128745183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84440 + }, + { + "epoch": 0.40956723850077537, + "grad_norm": 6.346681971081125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84450 + }, + { + "epoch": 0.40961573669361145, + "grad_norm": 4.1887633983606065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84460 + }, + { + "epoch": 0.40966423488644754, + "grad_norm": 4.266080111392512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84470 + }, + { + "epoch": 0.40971273307928363, + "grad_norm": 4.435386813383957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84480 + }, + { + "epoch": 0.4097612312721197, + "grad_norm": 6.560620704476605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84490 + }, + { + "epoch": 0.4098097294649558, + "grad_norm": 6.28500629318296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84500 + }, + { + "epoch": 0.4098582276577919, + "grad_norm": 4.636515882339154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84510 + }, + { + "epoch": 0.409906725850628, + "grad_norm": 4.0746988361206604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84520 + }, + { + "epoch": 0.4099552240434641, + "grad_norm": 3.9342663171737513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84530 + }, + { + "epoch": 0.41000372223630016, + "grad_norm": 5.793955324406852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84540 + }, + { + "epoch": 0.41005222042913625, + "grad_norm": 5.955043889116496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84550 + }, + { + "epoch": 0.41010071862197234, + "grad_norm": 4.1104317460849416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84560 + }, + { + "epoch": 0.4101492168148084, + "grad_norm": 3.9579106214659987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84570 + }, + { + "epoch": 0.4101977150076445, + "grad_norm": 4.060729850152711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84580 + }, + { + "epoch": 0.4102462132004806, + "grad_norm": 5.597308927463018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84590 + }, + { + "epoch": 0.4102947113933167, + "grad_norm": 6.092972739679681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84600 + }, + { + "epoch": 0.4103432095861528, + "grad_norm": 3.9613729541088105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84610 + }, + { + "epoch": 0.41039170777898887, + "grad_norm": 4.2709746139735216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84620 + }, + { + "epoch": 0.41044020597182496, + "grad_norm": 4.7562028271386225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84630 + }, + { + "epoch": 0.41048870416466104, + "grad_norm": 5.107327751829871e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84640 + }, + { + "epoch": 0.41053720235749713, + "grad_norm": 5.360213322092022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84650 + }, + { + "epoch": 0.4105857005503332, + "grad_norm": 3.273139839166106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84660 + }, + { + "epoch": 0.4106341987431693, + "grad_norm": 3.720153642916557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84670 + }, + { + "epoch": 0.4106826969360054, + "grad_norm": 3.5669978615260334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84680 + }, + { + "epoch": 0.4107311951288415, + "grad_norm": 4.937473931931891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84690 + }, + { + "epoch": 0.4107796933216776, + "grad_norm": 5.553351911657955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84700 + }, + { + "epoch": 0.41082819151451366, + "grad_norm": 3.673899584555329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84710 + }, + { + "epoch": 0.4108766897073498, + "grad_norm": 3.689021070840681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84720 + }, + { + "epoch": 0.4109251879001859, + "grad_norm": 4.6649839191559295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84730 + }, + { + "epoch": 0.410973686093022, + "grad_norm": 4.627266037005029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84740 + }, + { + "epoch": 0.41102218428585807, + "grad_norm": 5.21802405728522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84750 + }, + { + "epoch": 0.41107068247869416, + "grad_norm": 3.529020489168033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84760 + }, + { + "epoch": 0.41111918067153025, + "grad_norm": 3.639513010966766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84770 + }, + { + "epoch": 0.41116767886436634, + "grad_norm": 3.4759511891024886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84780 + }, + { + "epoch": 0.4112161770572024, + "grad_norm": 5.021156539442018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84790 + }, + { + "epoch": 0.4112646752500385, + "grad_norm": 4.843557803724252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84800 + }, + { + "epoch": 0.4113131734428746, + "grad_norm": 3.54718451944791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84810 + }, + { + "epoch": 0.4113616716357107, + "grad_norm": 3.5562001698963286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84820 + }, + { + "epoch": 0.4114101698285468, + "grad_norm": 3.031401263342559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84830 + }, + { + "epoch": 0.41145866802138287, + "grad_norm": 4.6273694920273556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84840 + }, + { + "epoch": 0.41150716621421896, + "grad_norm": 4.234576351791475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84850 + }, + { + "epoch": 0.41155566440705504, + "grad_norm": 3.1628366059521795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84860 + }, + { + "epoch": 0.41160416259989113, + "grad_norm": 3.2263648108710186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84870 + }, + { + "epoch": 0.4116526607927272, + "grad_norm": 3.105473922460078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84880 + }, + { + "epoch": 0.4117011589855633, + "grad_norm": 4.7468228103753063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84890 + }, + { + "epoch": 0.4117496571783994, + "grad_norm": 5.033633101447776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84900 + }, + { + "epoch": 0.4117981553712355, + "grad_norm": 3.1475983064410684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84910 + }, + { + "epoch": 0.4118466535640716, + "grad_norm": 3.111638022801344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84920 + }, + { + "epoch": 0.41189515175690766, + "grad_norm": 3.3257322229474084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84930 + }, + { + "epoch": 0.41194364994974375, + "grad_norm": 4.217595233058091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84940 + }, + { + "epoch": 0.41199214814257984, + "grad_norm": 4.60756723441591e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84950 + }, + { + "epoch": 0.4120406463354159, + "grad_norm": 3.059759308143839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84960 + }, + { + "epoch": 0.412089144528252, + "grad_norm": 3.0356764568750805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84970 + }, + { + "epoch": 0.4121376427210881, + "grad_norm": 2.99722955787729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84980 + }, + { + "epoch": 0.4121861409139242, + "grad_norm": 5.253111794445431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 84990 + }, + { + "epoch": 0.4122346391067603, + "grad_norm": 6.855932497273898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85000 + }, + { + "epoch": 0.41228313729959637, + "grad_norm": 6.965772172407014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85010 + }, + { + "epoch": 0.41233163549243246, + "grad_norm": 2.7839925564876467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85020 + }, + { + "epoch": 0.41238013368526855, + "grad_norm": 2.800832419325161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85030 + }, + { + "epoch": 0.41242863187810463, + "grad_norm": 6.740152116435638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85040 + }, + { + "epoch": 0.4124771300709407, + "grad_norm": 4.0952897961687995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85050 + }, + { + "epoch": 0.4125256282637768, + "grad_norm": 3.0848562460050744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85060 + }, + { + "epoch": 0.4125741264566129, + "grad_norm": 3.0621967539445905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85070 + }, + { + "epoch": 0.412622624649449, + "grad_norm": 2.8429735721147154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85080 + }, + { + "epoch": 0.4126711228422851, + "grad_norm": 3.760394804430689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85090 + }, + { + "epoch": 0.41271962103512116, + "grad_norm": 4.151870030000282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85100 + }, + { + "epoch": 0.41276811922795725, + "grad_norm": 2.874356539450673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85110 + }, + { + "epoch": 0.41281661742079334, + "grad_norm": 3.019870575826644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85120 + }, + { + "epoch": 0.41286511561362943, + "grad_norm": 3.518356095355557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85130 + }, + { + "epoch": 0.4129136138064655, + "grad_norm": 4.2744312622744474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85140 + }, + { + "epoch": 0.4129621119993016, + "grad_norm": 4.2259648580511566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85150 + }, + { + "epoch": 0.4130106101921377, + "grad_norm": 3.006768451996322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85160 + }, + { + "epoch": 0.4130591083849738, + "grad_norm": 2.808987460412027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85170 + }, + { + "epoch": 0.41310760657780987, + "grad_norm": 2.7813300107482064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85180 + }, + { + "epoch": 0.41315610477064596, + "grad_norm": 3.7501200722545036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85190 + }, + { + "epoch": 0.41320460296348205, + "grad_norm": 3.599895137540443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85200 + }, + { + "epoch": 0.41325310115631814, + "grad_norm": 4.1552041807335627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85210 + }, + { + "epoch": 0.4133015993491542, + "grad_norm": 2.804663949973474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85220 + }, + { + "epoch": 0.41335009754199037, + "grad_norm": 2.7856498263645335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85230 + }, + { + "epoch": 0.41339859573482646, + "grad_norm": 3.7971236110934115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85240 + }, + { + "epoch": 0.41344709392766255, + "grad_norm": 3.877805170304782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85250 + }, + { + "epoch": 0.41349559212049863, + "grad_norm": 2.808645263030485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85260 + }, + { + "epoch": 0.4135440903133347, + "grad_norm": 3.075585368605971e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85270 + }, + { + "epoch": 0.4135925885061708, + "grad_norm": 2.5486858135082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85280 + }, + { + "epoch": 0.4136410866990069, + "grad_norm": 3.6655137591878884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85290 + }, + { + "epoch": 0.413689584891843, + "grad_norm": 3.224376996513456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85300 + }, + { + "epoch": 0.4137380830846791, + "grad_norm": 2.771747062979557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85310 + }, + { + "epoch": 0.41378658127751516, + "grad_norm": 2.6222849669466086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85320 + }, + { + "epoch": 0.41383507947035125, + "grad_norm": 2.613114702398889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85330 + }, + { + "epoch": 0.41388357766318734, + "grad_norm": 3.253775560096983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85340 + }, + { + "epoch": 0.41393207585602343, + "grad_norm": 3.17877749012041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85350 + }, + { + "epoch": 0.4139805740488595, + "grad_norm": 2.40054760070052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85360 + }, + { + "epoch": 0.4140290722416956, + "grad_norm": 2.891603401167231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85370 + }, + { + "epoch": 0.4140775704345317, + "grad_norm": 2.416251447812101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85380 + }, + { + "epoch": 0.4141260686273678, + "grad_norm": 3.279894542629336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85390 + }, + { + "epoch": 0.41417456682020387, + "grad_norm": 3.3521104114697664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85400 + }, + { + "epoch": 0.41422306501303996, + "grad_norm": 2.457564676205948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85410 + }, + { + "epoch": 0.41427156320587605, + "grad_norm": 2.9441136462082795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85420 + }, + { + "epoch": 0.41432006139871214, + "grad_norm": 2.4564985778852133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85430 + }, + { + "epoch": 0.4143685595915482, + "grad_norm": 3.601705884648254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85440 + }, + { + "epoch": 0.4144170577843843, + "grad_norm": 3.1491839536101907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85450 + }, + { + "epoch": 0.4144655559772204, + "grad_norm": 2.550165447701147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85460 + }, + { + "epoch": 0.4145140541700565, + "grad_norm": 2.385546906680247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85470 + }, + { + "epoch": 0.4145625523628926, + "grad_norm": 2.3726057918338483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85480 + }, + { + "epoch": 0.41461105055572867, + "grad_norm": 3.183658066063799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85490 + }, + { + "epoch": 0.41465954874856475, + "grad_norm": 3.4269197612957214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85500 + }, + { + "epoch": 0.41470804694140084, + "grad_norm": 9.802788554225117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85510 + }, + { + "epoch": 0.41475654513423693, + "grad_norm": 2.447062286137225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85520 + }, + { + "epoch": 0.414805043327073, + "grad_norm": 2.2970917257225665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85530 + }, + { + "epoch": 0.4148535415199091, + "grad_norm": 3.5658658816828392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85540 + }, + { + "epoch": 0.4149020397127452, + "grad_norm": 3.135565975753707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85550 + }, + { + "epoch": 0.4149505379055813, + "grad_norm": 2.2999034854365163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85560 + }, + { + "epoch": 0.4149990360984174, + "grad_norm": 2.2106348751549376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85570 + }, + { + "epoch": 0.41504753429125346, + "grad_norm": 2.3188532338735968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85580 + }, + { + "epoch": 0.41509603248408955, + "grad_norm": 2.878437896924879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85590 + }, + { + "epoch": 0.41514453067692564, + "grad_norm": 3.3035101409950585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85600 + }, + { + "epoch": 0.4151930288697617, + "grad_norm": 2.2111851194495102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85610 + }, + { + "epoch": 0.4152415270625978, + "grad_norm": 2.2627931173246907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85620 + }, + { + "epoch": 0.4152900252554339, + "grad_norm": 2.3594250819769513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85630 + }, + { + "epoch": 0.41533852344827, + "grad_norm": 2.925731052982883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85640 + }, + { + "epoch": 0.4153870216411061, + "grad_norm": 2.7711828920473636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85650 + }, + { + "epoch": 0.41543551983394217, + "grad_norm": 2.2801972932029457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85660 + }, + { + "epoch": 0.41548401802677826, + "grad_norm": 2.182869849320923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85670 + }, + { + "epoch": 0.41553251621961435, + "grad_norm": 2.1343717548916175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85680 + }, + { + "epoch": 0.41558101441245043, + "grad_norm": 2.855003913282417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85690 + }, + { + "epoch": 0.4156295126052865, + "grad_norm": 2.742183937698428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85700 + }, + { + "epoch": 0.4156780107981226, + "grad_norm": 2.1604022037990944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85710 + }, + { + "epoch": 0.4157265089909587, + "grad_norm": 2.05739894454382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85720 + }, + { + "epoch": 0.4157750071837948, + "grad_norm": 2.3189234354958899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85730 + }, + { + "epoch": 0.41582350537663093, + "grad_norm": 2.896879607305891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85740 + }, + { + "epoch": 0.415872003569467, + "grad_norm": 2.7532263402463286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85750 + }, + { + "epoch": 0.4159205017623031, + "grad_norm": 2.1402919969659706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85760 + }, + { + "epoch": 0.4159689999551392, + "grad_norm": 2.202725966071739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85770 + }, + { + "epoch": 0.4160174981479753, + "grad_norm": 2.1156073160000233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85780 + }, + { + "epoch": 0.4160659963408114, + "grad_norm": 2.604174653697555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85790 + }, + { + "epoch": 0.41611449453364746, + "grad_norm": 2.5947053927666275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85800 + }, + { + "epoch": 0.41616299272648355, + "grad_norm": 2.621203520902782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85810 + }, + { + "epoch": 0.41621149091931964, + "grad_norm": 2.09120784688821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85820 + }, + { + "epoch": 0.4162599891121557, + "grad_norm": 2.832825884979684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85830 + }, + { + "epoch": 0.4163084873049918, + "grad_norm": 2.822843043759349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85840 + }, + { + "epoch": 0.4163569854978279, + "grad_norm": 2.9700692039114074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85850 + }, + { + "epoch": 0.416405483690664, + "grad_norm": 2.1848781273092754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85860 + }, + { + "epoch": 0.4164539818835001, + "grad_norm": 2.0558029234507558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85870 + }, + { + "epoch": 0.41650248007633617, + "grad_norm": 2.049885949872987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85880 + }, + { + "epoch": 0.41655097826917226, + "grad_norm": 2.499174911463342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85890 + }, + { + "epoch": 0.41659947646200834, + "grad_norm": 2.7344151476427214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85900 + }, + { + "epoch": 0.41664797465484443, + "grad_norm": 2.2197394855538732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85910 + }, + { + "epoch": 0.4166964728476805, + "grad_norm": 2.117959922998125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85920 + }, + { + "epoch": 0.4167449710405166, + "grad_norm": 3.0580125098822464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85930 + }, + { + "epoch": 0.4167934692333527, + "grad_norm": 2.5256312596866337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85940 + }, + { + "epoch": 0.4168419674261888, + "grad_norm": 2.701184484976693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85950 + }, + { + "epoch": 0.4168904656190249, + "grad_norm": 2.0857434890331206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85960 + }, + { + "epoch": 0.41693896381186096, + "grad_norm": 1.9915425752969895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85970 + }, + { + "epoch": 0.41698746200469705, + "grad_norm": 2.2214706518752791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85980 + }, + { + "epoch": 0.41703596019753314, + "grad_norm": 2.3674044768995373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 85990 + }, + { + "epoch": 0.41708445839036923, + "grad_norm": 2.73995141242267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86000 + }, + { + "epoch": 0.4171329565832053, + "grad_norm": 1.858952742850306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86010 + }, + { + "epoch": 0.4171814547760414, + "grad_norm": 1.9781748505920405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86020 + }, + { + "epoch": 0.4172299529688775, + "grad_norm": 1.960740263484695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86030 + }, + { + "epoch": 0.4172784511617136, + "grad_norm": 2.420800910840626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86040 + }, + { + "epoch": 0.41732694935454967, + "grad_norm": 2.2836192670183664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86050 + }, + { + "epoch": 0.41737544754738576, + "grad_norm": 1.9273385021278955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86060 + }, + { + "epoch": 0.41742394574022185, + "grad_norm": 1.8373052057540917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86070 + }, + { + "epoch": 0.41747244393305794, + "grad_norm": 2.0538563205718674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86080 + }, + { + "epoch": 0.417520942125894, + "grad_norm": 2.5474352582932625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86090 + }, + { + "epoch": 0.4175694403187301, + "grad_norm": 2.2999243753929477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86100 + }, + { + "epoch": 0.4176179385115662, + "grad_norm": 2.077950540524398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86110 + }, + { + "epoch": 0.4176664367044023, + "grad_norm": 1.886261031813774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86120 + }, + { + "epoch": 0.4177149348972384, + "grad_norm": 2.2533636467869655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86130 + }, + { + "epoch": 0.41776343309007447, + "grad_norm": 2.5316353458038066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86140 + }, + { + "epoch": 0.41781193128291055, + "grad_norm": 2.2923343578895583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86150 + }, + { + "epoch": 0.41786042947574664, + "grad_norm": 1.8322175776575023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86160 + }, + { + "epoch": 0.41790892766858273, + "grad_norm": 1.9492213709781936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86170 + }, + { + "epoch": 0.4179574258614188, + "grad_norm": 1.9064057710238558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86180 + }, + { + "epoch": 0.4180059240542549, + "grad_norm": 2.143291197853614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86190 + }, + { + "epoch": 0.418054422247091, + "grad_norm": 2.2283519740540214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86200 + }, + { + "epoch": 0.4181029204399271, + "grad_norm": 1.9129714701193734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86210 + }, + { + "epoch": 0.41815141863276317, + "grad_norm": 2.2202257810022274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86220 + }, + { + "epoch": 0.41819991682559926, + "grad_norm": 1.8781929611577652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86230 + }, + { + "epoch": 0.41824841501843535, + "grad_norm": 2.2373718877588544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86240 + }, + { + "epoch": 0.4182969132112715, + "grad_norm": 2.2539498445439676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86250 + }, + { + "epoch": 0.4183454114041076, + "grad_norm": 1.769072497381785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86260 + }, + { + "epoch": 0.41839390959694367, + "grad_norm": 1.8710852600634098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86270 + }, + { + "epoch": 0.41844240778977976, + "grad_norm": 1.8794877121308673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86280 + }, + { + "epoch": 0.41849090598261585, + "grad_norm": 2.1500653701878036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86290 + }, + { + "epoch": 0.41853940417545193, + "grad_norm": 2.13412263860846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86300 + }, + { + "epoch": 0.418587902368288, + "grad_norm": 1.8661191347746353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86310 + }, + { + "epoch": 0.4186364005611241, + "grad_norm": 1.900191506365445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86320 + }, + { + "epoch": 0.4186848987539602, + "grad_norm": 1.8641914323325182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86330 + }, + { + "epoch": 0.4187333969467963, + "grad_norm": 2.2138569022445154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86340 + }, + { + "epoch": 0.4187818951396324, + "grad_norm": 2.1057677201952174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86350 + }, + { + "epoch": 0.41883039333246846, + "grad_norm": 1.7984913824875548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86360 + }, + { + "epoch": 0.41887889152530455, + "grad_norm": 1.7576506650129886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86370 + }, + { + "epoch": 0.41892738971814064, + "grad_norm": 1.7785423267469014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86380 + }, + { + "epoch": 0.41897588791097673, + "grad_norm": 2.133313188323882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86390 + }, + { + "epoch": 0.4190243861038128, + "grad_norm": 2.0448683812901436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86400 + }, + { + "epoch": 0.4190728842966489, + "grad_norm": 1.7590895140529028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86410 + }, + { + "epoch": 0.419121382489485, + "grad_norm": 1.7296747500950005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86420 + }, + { + "epoch": 0.4191698806823211, + "grad_norm": 1.794813044853072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86430 + }, + { + "epoch": 0.41921837887515717, + "grad_norm": 2.1106099268308753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86440 + }, + { + "epoch": 0.41926687706799326, + "grad_norm": 2.162163923458138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86450 + }, + { + "epoch": 0.41931537526082935, + "grad_norm": 1.7105419658491883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86460 + }, + { + "epoch": 0.41936387345366544, + "grad_norm": 1.7124582996075333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86470 + }, + { + "epoch": 0.4194123716465015, + "grad_norm": 1.7687800379917462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86480 + }, + { + "epoch": 0.4194608698393376, + "grad_norm": 2.120746671607776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86490 + }, + { + "epoch": 0.4195093680321737, + "grad_norm": 2.2168669033817423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86500 + }, + { + "epoch": 0.4195578662250098, + "grad_norm": 1.7403949925665074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86510 + }, + { + "epoch": 0.4196063644178459, + "grad_norm": 1.7134897234427626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86520 + }, + { + "epoch": 0.41965486261068197, + "grad_norm": 1.8906300169874157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86530 + }, + { + "epoch": 0.41970336080351806, + "grad_norm": 2.0190488214666402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86540 + }, + { + "epoch": 0.41975185899635414, + "grad_norm": 2.0114286769512546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86550 + }, + { + "epoch": 0.41980035718919023, + "grad_norm": 1.7064242285869113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86560 + }, + { + "epoch": 0.4198488553820263, + "grad_norm": 1.6301696348364203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86570 + }, + { + "epoch": 0.4198973535748624, + "grad_norm": 1.6772510491591675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86580 + }, + { + "epoch": 0.4199458517676985, + "grad_norm": 2.0023338720420725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86590 + }, + { + "epoch": 0.4199943499605346, + "grad_norm": 2.0297373737321323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86600 + }, + { + "epoch": 0.4200428481533707, + "grad_norm": 1.7447874256504292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86610 + }, + { + "epoch": 0.42009134634620676, + "grad_norm": 1.9797694505996333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86620 + }, + { + "epoch": 0.42013984453904285, + "grad_norm": 1.6554703563542716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86630 + }, + { + "epoch": 0.42018834273187894, + "grad_norm": 1.993732325900055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86640 + }, + { + "epoch": 0.420236840924715, + "grad_norm": 4.5662761749554193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86650 + }, + { + "epoch": 0.4202853391175511, + "grad_norm": 1.8834731463357457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86660 + }, + { + "epoch": 0.4203338373103872, + "grad_norm": 1.6244852929503395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86670 + }, + { + "epoch": 0.4203823355032233, + "grad_norm": 1.7226953730187233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86680 + }, + { + "epoch": 0.4204308336960594, + "grad_norm": 1.950410819517856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86690 + }, + { + "epoch": 0.42047933188889547, + "grad_norm": 2.0933423172664334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86700 + }, + { + "epoch": 0.42052783008173156, + "grad_norm": 1.6703782534932543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86710 + }, + { + "epoch": 0.42057632827456765, + "grad_norm": 1.602526253918768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86720 + }, + { + "epoch": 0.42062482646740373, + "grad_norm": 1.6079349052233738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86730 + }, + { + "epoch": 0.4206733246602398, + "grad_norm": 1.9896287994924933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86740 + }, + { + "epoch": 0.4207218228530759, + "grad_norm": 1.9398235906464834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86750 + }, + { + "epoch": 0.420770321045912, + "grad_norm": 1.7030046706167923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86760 + }, + { + "epoch": 0.42081881923874814, + "grad_norm": 1.6463995677895582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86770 + }, + { + "epoch": 0.42086731743158423, + "grad_norm": 1.6417413917224621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86780 + }, + { + "epoch": 0.4209158156244203, + "grad_norm": 1.941079545986213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86790 + }, + { + "epoch": 0.4209643138172564, + "grad_norm": 1.8135797574814205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86800 + }, + { + "epoch": 0.4210128120100925, + "grad_norm": 1.6631825872082118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86810 + }, + { + "epoch": 0.4210613102029286, + "grad_norm": 1.6384979062422644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86820 + }, + { + "epoch": 0.4211098083957647, + "grad_norm": 1.6116403855903627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86830 + }, + { + "epoch": 0.42115830658860076, + "grad_norm": 1.8687535430217395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86840 + }, + { + "epoch": 0.42120680478143685, + "grad_norm": 1.865995500338613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86850 + }, + { + "epoch": 0.42125530297427294, + "grad_norm": 1.536614462338548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86860 + }, + { + "epoch": 0.421303801167109, + "grad_norm": 1.5312882339912903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86870 + }, + { + "epoch": 0.4213522993599451, + "grad_norm": 1.5404074815705826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86880 + }, + { + "epoch": 0.4214007975527812, + "grad_norm": 1.774182010194636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86890 + }, + { + "epoch": 0.4214492957456173, + "grad_norm": 1.7387993977990845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86900 + }, + { + "epoch": 0.4214977939384534, + "grad_norm": 1.5545882092737884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86910 + }, + { + "epoch": 0.42154629213128947, + "grad_norm": 1.818033581457712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86920 + }, + { + "epoch": 0.42159479032412556, + "grad_norm": 1.5205530701223324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86930 + }, + { + "epoch": 0.42164328851696165, + "grad_norm": 1.949280630242356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86940 + }, + { + "epoch": 0.42169178670979773, + "grad_norm": 1.8748643526578235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86950 + }, + { + "epoch": 0.4217402849026338, + "grad_norm": 1.5407866271743842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86960 + }, + { + "epoch": 0.4217887830954699, + "grad_norm": 1.5710260470314097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86970 + }, + { + "epoch": 0.421837281288306, + "grad_norm": 1.5352334514773247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86980 + }, + { + "epoch": 0.4218857794811421, + "grad_norm": 1.8008097413257929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 86990 + }, + { + "epoch": 0.4219342776739782, + "grad_norm": 1.759993750738431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87000 + }, + { + "epoch": 0.42198277586681426, + "grad_norm": 1.5413954201903834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87010 + }, + { + "epoch": 0.42203127405965035, + "grad_norm": 1.50050865954654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87020 + }, + { + "epoch": 0.42207977225248644, + "grad_norm": 1.5534500619196479e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87030 + }, + { + "epoch": 0.42212827044532253, + "grad_norm": 1.8444897875724564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87040 + }, + { + "epoch": 0.4221767686381586, + "grad_norm": 1.7185003287067957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87050 + }, + { + "epoch": 0.4222252668309947, + "grad_norm": 1.6782885836619243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87060 + }, + { + "epoch": 0.4222737650238308, + "grad_norm": 1.5631228222900972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87070 + }, + { + "epoch": 0.4223222632166669, + "grad_norm": 1.5571589528917684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87080 + }, + { + "epoch": 0.42237076140950297, + "grad_norm": 1.80167461394376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87090 + }, + { + "epoch": 0.42241925960233906, + "grad_norm": 1.6919416623295547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87100 + }, + { + "epoch": 0.42246775779517515, + "grad_norm": 1.5121472074497433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87110 + }, + { + "epoch": 0.42251625598801124, + "grad_norm": 1.4818755289525143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87120 + }, + { + "epoch": 0.4225647541808473, + "grad_norm": 1.4505316414670233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87130 + }, + { + "epoch": 0.4226132523736834, + "grad_norm": 1.748239384369299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87140 + }, + { + "epoch": 0.4226617505665195, + "grad_norm": 1.7264952134610212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87150 + }, + { + "epoch": 0.4227102487593556, + "grad_norm": 1.496337489470534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87160 + }, + { + "epoch": 0.4227587469521917, + "grad_norm": 1.602974180059391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87170 + }, + { + "epoch": 0.42280724514502777, + "grad_norm": 1.4445750196045992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87180 + }, + { + "epoch": 0.42285574333786385, + "grad_norm": 1.8206293361799908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87190 + }, + { + "epoch": 0.42290424153069994, + "grad_norm": 1.763197303716879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87200 + }, + { + "epoch": 0.42295273972353603, + "grad_norm": 1.4306934303931484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87210 + }, + { + "epoch": 0.4230012379163721, + "grad_norm": 1.5153615606777748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87220 + }, + { + "epoch": 0.4230497361092082, + "grad_norm": 1.492545038672688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87230 + }, + { + "epoch": 0.4230982343020443, + "grad_norm": 1.6815793912883237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87240 + }, + { + "epoch": 0.4231467324948804, + "grad_norm": 1.60594865405983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87250 + }, + { + "epoch": 0.4231952306877165, + "grad_norm": 1.43344124126088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87260 + }, + { + "epoch": 0.42324372888055256, + "grad_norm": 1.4938616743620514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87270 + }, + { + "epoch": 0.4232922270733887, + "grad_norm": 1.4632449563123373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87280 + }, + { + "epoch": 0.4233407252662248, + "grad_norm": 1.7643196770222858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87290 + }, + { + "epoch": 0.4233892234590609, + "grad_norm": 1.715258832746258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87300 + }, + { + "epoch": 0.42343772165189697, + "grad_norm": 1.4191971331456443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87310 + }, + { + "epoch": 0.42348621984473306, + "grad_norm": 1.4876628995352803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87320 + }, + { + "epoch": 0.42353471803756915, + "grad_norm": 1.4353915389619942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87330 + }, + { + "epoch": 0.42358321623040524, + "grad_norm": 1.5850515922011255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87340 + }, + { + "epoch": 0.4236317144232413, + "grad_norm": 1.6929483592775796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87350 + }, + { + "epoch": 0.4236802126160774, + "grad_norm": 1.5008617992862128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87360 + }, + { + "epoch": 0.4237287108089135, + "grad_norm": 1.4208225707079691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87370 + }, + { + "epoch": 0.4237772090017496, + "grad_norm": 1.443810191403827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87380 + }, + { + "epoch": 0.4238257071945857, + "grad_norm": 1.6101176925076288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87390 + }, + { + "epoch": 0.42387420538742177, + "grad_norm": 1.6531272706288291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87400 + }, + { + "epoch": 0.42392270358025785, + "grad_norm": 1.4025542327544827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87410 + }, + { + "epoch": 0.42397120177309394, + "grad_norm": 1.4235440914944775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87420 + }, + { + "epoch": 0.42401969996593003, + "grad_norm": 1.4035327922101715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87430 + }, + { + "epoch": 0.4240681981587661, + "grad_norm": 1.5934153907437576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87440 + }, + { + "epoch": 0.4241166963516022, + "grad_norm": 1.6075216535682557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87450 + }, + { + "epoch": 0.4241651945444383, + "grad_norm": 1.4059476427519257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87460 + }, + { + "epoch": 0.4242136927372744, + "grad_norm": 1.432296272696476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87470 + }, + { + "epoch": 0.42426219093011047, + "grad_norm": 1.4458026953434455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87480 + }, + { + "epoch": 0.42431068912294656, + "grad_norm": 1.7361752213673753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87490 + }, + { + "epoch": 0.42435918731578265, + "grad_norm": 1.568478040780974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87500 + }, + { + "epoch": 0.42440768550861874, + "grad_norm": 1.3823886035879696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87510 + }, + { + "epoch": 0.4244561837014548, + "grad_norm": 1.3667691689533967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87520 + }, + { + "epoch": 0.4245046818942909, + "grad_norm": 1.3784305963326915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87530 + }, + { + "epoch": 0.424553180087127, + "grad_norm": 1.548357317915361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87540 + }, + { + "epoch": 0.4246016782799631, + "grad_norm": 1.5348115312008304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87550 + }, + { + "epoch": 0.4246501764727992, + "grad_norm": 1.3601886905689753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87560 + }, + { + "epoch": 0.42469867466563527, + "grad_norm": 1.3291850109453662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87570 + }, + { + "epoch": 0.42474717285847136, + "grad_norm": 1.3512310204077949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87580 + }, + { + "epoch": 0.42479567105130744, + "grad_norm": 1.554766413391917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87590 + }, + { + "epoch": 0.42484416924414353, + "grad_norm": 1.577722201773213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87600 + }, + { + "epoch": 0.4248926674369796, + "grad_norm": 1.519953514161898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87610 + }, + { + "epoch": 0.4249411656298157, + "grad_norm": 1.370926412391782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87620 + }, + { + "epoch": 0.4249896638226518, + "grad_norm": 1.412054047023048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87630 + }, + { + "epoch": 0.4250381620154879, + "grad_norm": 2.495671878932626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87640 + }, + { + "epoch": 0.425086660208324, + "grad_norm": 1.49197660448408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87650 + }, + { + "epoch": 0.42513515840116006, + "grad_norm": 1.2986573949547164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87660 + }, + { + "epoch": 0.42518365659399615, + "grad_norm": 1.3246058472304867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87670 + }, + { + "epoch": 0.42523215478683224, + "grad_norm": 1.2976910568340827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87680 + }, + { + "epoch": 0.4252806529796683, + "grad_norm": 1.5727272284493665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87690 + }, + { + "epoch": 0.4253291511725044, + "grad_norm": 1.4776558998619294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87700 + }, + { + "epoch": 0.4253776493653405, + "grad_norm": 1.3096939710521838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87710 + }, + { + "epoch": 0.4254261475581766, + "grad_norm": 1.3480133986831788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87720 + }, + { + "epoch": 0.4254746457510127, + "grad_norm": 1.3328396164524747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87730 + }, + { + "epoch": 0.42552314394384877, + "grad_norm": 1.4976728834881214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87740 + }, + { + "epoch": 0.42557164213668486, + "grad_norm": 1.4857438657145394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87750 + }, + { + "epoch": 0.42562014032952095, + "grad_norm": 1.3535732534819545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87760 + }, + { + "epoch": 0.42566863852235703, + "grad_norm": 2.273075239145328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87770 + }, + { + "epoch": 0.4257171367151931, + "grad_norm": 1.4584222185476392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87780 + }, + { + "epoch": 0.42576563490802927, + "grad_norm": 1.4661570446605765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87790 + }, + { + "epoch": 0.42581413310086536, + "grad_norm": 1.6016061010759586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87800 + }, + { + "epoch": 0.42586263129370144, + "grad_norm": 1.513586624923846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87810 + }, + { + "epoch": 0.42591112948653753, + "grad_norm": 1.2937675819557626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87820 + }, + { + "epoch": 0.4259596276793736, + "grad_norm": 1.2834361484692636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87830 + }, + { + "epoch": 0.4260081258722097, + "grad_norm": 1.47132979577691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87840 + }, + { + "epoch": 0.4260566240650458, + "grad_norm": 1.535396449980908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87850 + }, + { + "epoch": 0.4261051222578819, + "grad_norm": 1.2697498164015997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87860 + }, + { + "epoch": 0.426153620450718, + "grad_norm": 1.3744055138431577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87870 + }, + { + "epoch": 0.42620211864355406, + "grad_norm": 1.3090620143429987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87880 + }, + { + "epoch": 0.42625061683639015, + "grad_norm": 1.4820554383732087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87890 + }, + { + "epoch": 0.42629911502922624, + "grad_norm": 1.436498138218667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87900 + }, + { + "epoch": 0.4263476132220623, + "grad_norm": 1.3251849395601312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87910 + }, + { + "epoch": 0.4263961114148984, + "grad_norm": 1.3316632418991503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87920 + }, + { + "epoch": 0.4264446096077345, + "grad_norm": 1.2661760706578207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87930 + }, + { + "epoch": 0.4264931078005706, + "grad_norm": 1.415421166939268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87940 + }, + { + "epoch": 0.4265416059934067, + "grad_norm": 1.5816954146430362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87950 + }, + { + "epoch": 0.42659010418624277, + "grad_norm": 1.2525443082722632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87960 + }, + { + "epoch": 0.42663860237907886, + "grad_norm": 1.2446942321275856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87970 + }, + { + "epoch": 0.42668710057191495, + "grad_norm": 1.982404285172379e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87980 + }, + { + "epoch": 0.42673559876475103, + "grad_norm": 1.440684229692124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 87990 + }, + { + "epoch": 0.4267840969575871, + "grad_norm": 1.4050534957732452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88000 + }, + { + "epoch": 0.4268325951504232, + "grad_norm": 1.2726448517241806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88010 + }, + { + "epoch": 0.4268810933432593, + "grad_norm": 1.243185039356831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88020 + }, + { + "epoch": 0.4269295915360954, + "grad_norm": 1.193763949913773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88030 + }, + { + "epoch": 0.4269780897289315, + "grad_norm": 1.39395893938854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88040 + }, + { + "epoch": 0.42702658792176756, + "grad_norm": 1.4210117171842285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88050 + }, + { + "epoch": 0.42707508611460365, + "grad_norm": 1.263285156483107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88060 + }, + { + "epoch": 0.42712358430743974, + "grad_norm": 1.822341175738984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88070 + }, + { + "epoch": 0.42717208250027583, + "grad_norm": 1.2450819042442163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88080 + }, + { + "epoch": 0.4272205806931119, + "grad_norm": 1.4077535581691336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88090 + }, + { + "epoch": 0.427269078885948, + "grad_norm": 1.4124714198260335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88100 + }, + { + "epoch": 0.4273175770787841, + "grad_norm": 1.2161264351107093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88110 + }, + { + "epoch": 0.4273660752716202, + "grad_norm": 1.2211944522277918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88120 + }, + { + "epoch": 0.42741457346445627, + "grad_norm": 1.215983047586633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88130 + }, + { + "epoch": 0.42746307165729236, + "grad_norm": 1.3564573464464047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88140 + }, + { + "epoch": 0.42751156985012845, + "grad_norm": 1.3767323991942249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88150 + }, + { + "epoch": 0.42756006804296454, + "grad_norm": 1.277433199220468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88160 + }, + { + "epoch": 0.4276085662358006, + "grad_norm": 1.198366845756027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88170 + }, + { + "epoch": 0.4276570644286367, + "grad_norm": 1.2047097186496103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88180 + }, + { + "epoch": 0.4277055626214728, + "grad_norm": 1.3557138345277053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88190 + }, + { + "epoch": 0.4277540608143089, + "grad_norm": 1.3825551548052317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88200 + }, + { + "epoch": 0.427802559007145, + "grad_norm": 1.1696025126184395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88210 + }, + { + "epoch": 0.42785105719998107, + "grad_norm": 1.233136828204806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88220 + }, + { + "epoch": 0.42789955539281715, + "grad_norm": 1.1971613389505364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88230 + }, + { + "epoch": 0.42794805358565324, + "grad_norm": 1.358702945708501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88240 + }, + { + "epoch": 0.42799655177848933, + "grad_norm": 1.3607831306217122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88250 + }, + { + "epoch": 0.4280450499713254, + "grad_norm": 1.1622339712857865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88260 + }, + { + "epoch": 0.4280935481641615, + "grad_norm": 1.1621111895010472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88270 + }, + { + "epoch": 0.4281420463569976, + "grad_norm": 1.1997295246146678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88280 + }, + { + "epoch": 0.4281905445498337, + "grad_norm": 1.2943471006110485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88290 + }, + { + "epoch": 0.42823904274266983, + "grad_norm": 1.3377244556522783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88300 + }, + { + "epoch": 0.4282875409355059, + "grad_norm": 1.2321527265157783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88310 + }, + { + "epoch": 0.428336039128342, + "grad_norm": 1.1471979632915463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88320 + }, + { + "epoch": 0.4283845373211781, + "grad_norm": 1.6952556336491398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88330 + }, + { + "epoch": 0.4284330355140142, + "grad_norm": 1.341167035207036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88340 + }, + { + "epoch": 0.42848153370685027, + "grad_norm": 1.3103874607622856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88350 + }, + { + "epoch": 0.42853003189968636, + "grad_norm": 1.171114547560137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88360 + }, + { + "epoch": 0.42857853009252245, + "grad_norm": 1.2668049009789684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88370 + }, + { + "epoch": 0.42862702828535854, + "grad_norm": 1.1611719230586459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88380 + }, + { + "epoch": 0.4286755264781946, + "grad_norm": 1.281762394000907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88390 + }, + { + "epoch": 0.4287240246710307, + "grad_norm": 1.3055694125796435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88400 + }, + { + "epoch": 0.4287725228638668, + "grad_norm": 1.1247328757235664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88410 + }, + { + "epoch": 0.4288210210567029, + "grad_norm": 1.1325257531780153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88420 + }, + { + "epoch": 0.428869519249539, + "grad_norm": 1.1314138248508243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88430 + }, + { + "epoch": 0.42891801744237507, + "grad_norm": 1.3468950044170924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88440 + }, + { + "epoch": 0.42896651563521115, + "grad_norm": 1.2955597128438967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88450 + }, + { + "epoch": 0.42901501382804724, + "grad_norm": 1.1385039755396065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88460 + }, + { + "epoch": 0.42906351202088333, + "grad_norm": 1.1296290836071421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88470 + }, + { + "epoch": 0.4291120102137194, + "grad_norm": 1.1657525789132706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88480 + }, + { + "epoch": 0.4291605084065555, + "grad_norm": 1.290518554242226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88490 + }, + { + "epoch": 0.4292090065993916, + "grad_norm": 1.2735382881601254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88500 + }, + { + "epoch": 0.4292575047922277, + "grad_norm": 1.0954838103316433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88510 + }, + { + "epoch": 0.4293060029850638, + "grad_norm": 1.1264631183394158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88520 + }, + { + "epoch": 0.42935450117789986, + "grad_norm": 1.1177010605933901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88530 + }, + { + "epoch": 0.42940299937073595, + "grad_norm": 1.306655974531168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88540 + }, + { + "epoch": 0.42945149756357204, + "grad_norm": 1.3221327321844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88550 + }, + { + "epoch": 0.4294999957564081, + "grad_norm": 1.0917749193595228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88560 + }, + { + "epoch": 0.4295484939492442, + "grad_norm": 1.1630240948079518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88570 + }, + { + "epoch": 0.4295969921420803, + "grad_norm": 1.1144608436097769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88580 + }, + { + "epoch": 0.4296454903349164, + "grad_norm": 1.299585363767619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88590 + }, + { + "epoch": 0.4296939885277525, + "grad_norm": 1.3049044866875192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88600 + }, + { + "epoch": 0.42974248672058857, + "grad_norm": 1.2549480743473396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88610 + }, + { + "epoch": 0.42979098491342466, + "grad_norm": 1.0983638532024997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88620 + }, + { + "epoch": 0.42983948310626074, + "grad_norm": 1.2581283215240546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88630 + }, + { + "epoch": 0.42988798129909683, + "grad_norm": 1.2648240499402164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88640 + }, + { + "epoch": 0.4299364794919329, + "grad_norm": 1.2732867560316663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88650 + }, + { + "epoch": 0.429984977684769, + "grad_norm": 1.1162901358829913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88660 + }, + { + "epoch": 0.4300334758776051, + "grad_norm": 1.0874476430444702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88670 + }, + { + "epoch": 0.4300819740704412, + "grad_norm": 1.1544653943929006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88680 + }, + { + "epoch": 0.4301304722632773, + "grad_norm": 2.0015325219446822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88690 + }, + { + "epoch": 0.43017897045611336, + "grad_norm": 1.2472106902805535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88700 + }, + { + "epoch": 0.43022746864894945, + "grad_norm": 1.0663297445034914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88710 + }, + { + "epoch": 0.43027596684178554, + "grad_norm": 1.036633605622228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88720 + }, + { + "epoch": 0.43032446503462163, + "grad_norm": 1.1875738437083783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88730 + }, + { + "epoch": 0.4303729632274577, + "grad_norm": 1.3111349517203053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88740 + }, + { + "epoch": 0.4304214614202938, + "grad_norm": 1.2311534192122053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88750 + }, + { + "epoch": 0.4304699596131299, + "grad_norm": 1.0376504633313743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88760 + }, + { + "epoch": 0.430518457805966, + "grad_norm": 1.1185311876715787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88770 + }, + { + "epoch": 0.43056695599880207, + "grad_norm": 1.0488360402405306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88780 + }, + { + "epoch": 0.43061545419163816, + "grad_norm": 1.2678691518885898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88790 + }, + { + "epoch": 0.43066395238447425, + "grad_norm": 1.2255235048996838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88800 + }, + { + "epoch": 0.4307124505773104, + "grad_norm": 1.0999642086062522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88810 + }, + { + "epoch": 0.4307609487701465, + "grad_norm": 1.0782755310856373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88820 + }, + { + "epoch": 0.43080944696298257, + "grad_norm": 1.0231886449219019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88830 + }, + { + "epoch": 0.43085794515581866, + "grad_norm": 2.56653834185272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88840 + }, + { + "epoch": 0.43090644334865474, + "grad_norm": 1.2184817421712069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88850 + }, + { + "epoch": 0.43095494154149083, + "grad_norm": 1.0383822512949337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88860 + }, + { + "epoch": 0.4310034397343269, + "grad_norm": 1.0238489522862437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88870 + }, + { + "epoch": 0.431051937927163, + "grad_norm": 1.01020951603914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88880 + }, + { + "epoch": 0.4311004361199991, + "grad_norm": 1.2035351915073988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88890 + }, + { + "epoch": 0.4311489343128352, + "grad_norm": 1.1836419844257762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88900 + }, + { + "epoch": 0.4311974325056713, + "grad_norm": 1.0343827483438872e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88910 + }, + { + "epoch": 0.43124593069850736, + "grad_norm": 1.008897996257474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88920 + }, + { + "epoch": 0.43129442889134345, + "grad_norm": 1.0249848969579034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88930 + }, + { + "epoch": 0.43134292708417954, + "grad_norm": 1.1734961447018577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88940 + }, + { + "epoch": 0.4313914252770156, + "grad_norm": 1.1680842959549409e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88950 + }, + { + "epoch": 0.4314399234698517, + "grad_norm": 1.0008790241045062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88960 + }, + { + "epoch": 0.4314884216626878, + "grad_norm": 1.0054655774638377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88970 + }, + { + "epoch": 0.4315369198555239, + "grad_norm": 1.0529002025805312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88980 + }, + { + "epoch": 0.43158541804836, + "grad_norm": 1.187426406090708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 88990 + }, + { + "epoch": 0.43163391624119607, + "grad_norm": 1.2029428830828692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89000 + }, + { + "epoch": 0.43168241443403216, + "grad_norm": 9.977685522244428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89010 + }, + { + "epoch": 0.43173091262686825, + "grad_norm": 1.0059719102173403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89020 + }, + { + "epoch": 0.43177941081970433, + "grad_norm": 9.786008092760312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89030 + }, + { + "epoch": 0.4318279090125404, + "grad_norm": 1.1631635743469815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89040 + }, + { + "epoch": 0.4318764072053765, + "grad_norm": 1.1334483929203998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89050 + }, + { + "epoch": 0.4319249053982126, + "grad_norm": 9.689816238278581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89060 + }, + { + "epoch": 0.4319734035910487, + "grad_norm": 9.871945394479553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89070 + }, + { + "epoch": 0.4320219017838848, + "grad_norm": 9.77412994984661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89080 + }, + { + "epoch": 0.43207039997672086, + "grad_norm": 1.1694928048200381e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89090 + }, + { + "epoch": 0.43211889816955695, + "grad_norm": 1.1557877854784238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89100 + }, + { + "epoch": 0.43216739636239304, + "grad_norm": 9.585788518506888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89110 + }, + { + "epoch": 0.43221589455522913, + "grad_norm": 9.921777888166616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89120 + }, + { + "epoch": 0.4322643927480652, + "grad_norm": 1.0679826090154165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89130 + }, + { + "epoch": 0.4323128909409013, + "grad_norm": 1.1625152041006004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89140 + }, + { + "epoch": 0.4323613891337374, + "grad_norm": 1.1695890833607336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89150 + }, + { + "epoch": 0.4324098873265735, + "grad_norm": 9.673194512060945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89160 + }, + { + "epoch": 0.43245838551940957, + "grad_norm": 1.2031821938762732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89170 + }, + { + "epoch": 0.43250688371224566, + "grad_norm": 9.312238091752079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89180 + }, + { + "epoch": 0.43255538190508175, + "grad_norm": 1.1101075614305955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89190 + }, + { + "epoch": 0.43260388009791784, + "grad_norm": 1.1774151431609425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89200 + }, + { + "epoch": 0.4326523782907539, + "grad_norm": 9.313505699992675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89210 + }, + { + "epoch": 0.43270087648359, + "grad_norm": 9.314843651964111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89220 + }, + { + "epoch": 0.4327493746764261, + "grad_norm": 1.0138483474975146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89230 + }, + { + "epoch": 0.4327978728692622, + "grad_norm": 1.1281866107992755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89240 + }, + { + "epoch": 0.4328463710620983, + "grad_norm": 1.1262882537721453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89250 + }, + { + "epoch": 0.43289486925493437, + "grad_norm": 9.9121109542466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89260 + }, + { + "epoch": 0.43294336744777046, + "grad_norm": 9.363652253568944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89270 + }, + { + "epoch": 0.43299186564060654, + "grad_norm": 9.425522051742519e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89280 + }, + { + "epoch": 0.43304036383344263, + "grad_norm": 1.1283813705631474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89290 + }, + { + "epoch": 0.4330888620262787, + "grad_norm": 1.1130993016195134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89300 + }, + { + "epoch": 0.4331373602191148, + "grad_norm": 9.792276500775188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89310 + }, + { + "epoch": 0.4331858584119509, + "grad_norm": 9.341031415033285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89320 + }, + { + "epoch": 0.43323435660478704, + "grad_norm": 9.886250751378611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89330 + }, + { + "epoch": 0.43328285479762313, + "grad_norm": 1.1917399689309605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89340 + }, + { + "epoch": 0.4333313529904592, + "grad_norm": 1.1024523871583369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89350 + }, + { + "epoch": 0.4333798511832953, + "grad_norm": 9.139125722867902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89360 + }, + { + "epoch": 0.4334283493761314, + "grad_norm": 9.126945599291503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89370 + }, + { + "epoch": 0.4334768475689675, + "grad_norm": 1.5561025179522403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89380 + }, + { + "epoch": 0.43352534576180357, + "grad_norm": 1.075897060331954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89390 + }, + { + "epoch": 0.43357384395463966, + "grad_norm": 1.1007573164079076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89400 + }, + { + "epoch": 0.43362234214747575, + "grad_norm": 9.739888184867596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89410 + }, + { + "epoch": 0.43367084034031184, + "grad_norm": 8.901801606953086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89420 + }, + { + "epoch": 0.4337193385331479, + "grad_norm": 9.062342343213459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89430 + }, + { + "epoch": 0.433767836725984, + "grad_norm": 1.0957718643567205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89440 + }, + { + "epoch": 0.4338163349188201, + "grad_norm": 1.0783632120592301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89450 + }, + { + "epoch": 0.4338648331116562, + "grad_norm": 8.851041854995856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89460 + }, + { + "epoch": 0.4339133313044923, + "grad_norm": 9.350291918508447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89470 + }, + { + "epoch": 0.43396182949732837, + "grad_norm": 9.246338805724008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89480 + }, + { + "epoch": 0.43401032769016445, + "grad_norm": 1.0696449948000009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89490 + }, + { + "epoch": 0.43405882588300054, + "grad_norm": 1.0779039882891084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89500 + }, + { + "epoch": 0.43410732407583663, + "grad_norm": 8.706062004648629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89510 + }, + { + "epoch": 0.4341558222686727, + "grad_norm": 1.350266387589727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89520 + }, + { + "epoch": 0.4342043204615088, + "grad_norm": 9.051947813532024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89530 + }, + { + "epoch": 0.4342528186543449, + "grad_norm": 1.0717300824580889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89540 + }, + { + "epoch": 0.434301316847181, + "grad_norm": 1.0519379145534913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89550 + }, + { + "epoch": 0.4343498150400171, + "grad_norm": 1.7912756788973638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89560 + }, + { + "epoch": 0.43439831323285316, + "grad_norm": 8.70624461413172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89570 + }, + { + "epoch": 0.43444681142568925, + "grad_norm": 8.725001521270315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89580 + }, + { + "epoch": 0.43449530961852534, + "grad_norm": 1.0688326312902063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89590 + }, + { + "epoch": 0.4345438078113614, + "grad_norm": 1.0968693686663755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89600 + }, + { + "epoch": 0.4345923060041975, + "grad_norm": 8.868267542538888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89610 + }, + { + "epoch": 0.4346408041970336, + "grad_norm": 8.637202597583382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89620 + }, + { + "epoch": 0.4346893023898697, + "grad_norm": 8.63945359697027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89630 + }, + { + "epoch": 0.4347378005827058, + "grad_norm": 1.0469497624399082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89640 + }, + { + "epoch": 0.43478629877554187, + "grad_norm": 1.040682633401957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89650 + }, + { + "epoch": 0.43483479696837796, + "grad_norm": 8.601060130786209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89660 + }, + { + "epoch": 0.43488329516121405, + "grad_norm": 8.588839506273871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89670 + }, + { + "epoch": 0.43493179335405013, + "grad_norm": 8.539916507288581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89680 + }, + { + "epoch": 0.4349802915468862, + "grad_norm": 1.0506486347594546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89690 + }, + { + "epoch": 0.4350287897397223, + "grad_norm": 1.066094341695134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89700 + }, + { + "epoch": 0.4350772879325584, + "grad_norm": 8.413655194772218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89710 + }, + { + "epoch": 0.4351257861253945, + "grad_norm": 8.24720132186485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89720 + }, + { + "epoch": 0.4351742843182306, + "grad_norm": 8.441080012744351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89730 + }, + { + "epoch": 0.43522278251106666, + "grad_norm": 1.0432073338506598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89740 + }, + { + "epoch": 0.43527128070390275, + "grad_norm": 1.0399265448768347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89750 + }, + { + "epoch": 0.43531977889673884, + "grad_norm": 8.771719706146541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89760 + }, + { + "epoch": 0.43536827708957493, + "grad_norm": 8.290261632737383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89770 + }, + { + "epoch": 0.435416775282411, + "grad_norm": 8.525102401790718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89780 + }, + { + "epoch": 0.4354652734752471, + "grad_norm": 1.0322570176413137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89790 + }, + { + "epoch": 0.4355137716680832, + "grad_norm": 1.077077698141693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89800 + }, + { + "epoch": 0.4355622698609193, + "grad_norm": 8.326245648504482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89810 + }, + { + "epoch": 0.43561076805375537, + "grad_norm": 8.36903382150922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89820 + }, + { + "epoch": 0.43565926624659146, + "grad_norm": 8.153840269642387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89830 + }, + { + "epoch": 0.4357077644394276, + "grad_norm": 1.0322879262503193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89840 + }, + { + "epoch": 0.4357562626322637, + "grad_norm": 1.0155332574868225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89850 + }, + { + "epoch": 0.4358047608250998, + "grad_norm": 8.142238527852896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89860 + }, + { + "epoch": 0.43585325901793587, + "grad_norm": 8.373462634381212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89870 + }, + { + "epoch": 0.43590175721077196, + "grad_norm": 8.359137382285553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89880 + }, + { + "epoch": 0.43595025540360804, + "grad_norm": 1.0204481526443487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89890 + }, + { + "epoch": 0.43599875359644413, + "grad_norm": 1.019168607285792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89900 + }, + { + "epoch": 0.4360472517892802, + "grad_norm": 8.357977065998057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89910 + }, + { + "epoch": 0.4360957499821163, + "grad_norm": 8.012470686935558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89920 + }, + { + "epoch": 0.4361442481749524, + "grad_norm": 8.099492276869569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89930 + }, + { + "epoch": 0.4361927463677885, + "grad_norm": 1.0010439410734762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89940 + }, + { + "epoch": 0.4362412445606246, + "grad_norm": 1.0007965300928845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89950 + }, + { + "epoch": 0.43628974275346066, + "grad_norm": 7.980471394830602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89960 + }, + { + "epoch": 0.43633824094629675, + "grad_norm": 7.645667920996857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89970 + }, + { + "epoch": 0.43638673913913284, + "grad_norm": 7.908449362048486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89980 + }, + { + "epoch": 0.43643523733196893, + "grad_norm": 9.825112812222869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 89990 + }, + { + "epoch": 0.436483735524805, + "grad_norm": 9.541535916923749e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90000 + }, + { + "epoch": 0.4365322337176411, + "grad_norm": 7.818754710342546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90010 + }, + { + "epoch": 0.4365807319104772, + "grad_norm": 8.072250068380526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90020 + }, + { + "epoch": 0.4366292301033133, + "grad_norm": 8.029762454953016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90030 + }, + { + "epoch": 0.43667772829614937, + "grad_norm": 9.685998492159342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90040 + }, + { + "epoch": 0.43672622648898546, + "grad_norm": 9.762638342181162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90050 + }, + { + "epoch": 0.43677472468182155, + "grad_norm": 7.929661194339133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90060 + }, + { + "epoch": 0.43682322287465764, + "grad_norm": 7.804177926118427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90070 + }, + { + "epoch": 0.4368717210674937, + "grad_norm": 7.952715463943605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90080 + }, + { + "epoch": 0.4369202192603298, + "grad_norm": 9.961118507817446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90090 + }, + { + "epoch": 0.4369687174531659, + "grad_norm": 9.613790297180458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90100 + }, + { + "epoch": 0.437017215646002, + "grad_norm": 8.015167907160503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90110 + }, + { + "epoch": 0.4370657138388381, + "grad_norm": 7.745887842247612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90120 + }, + { + "epoch": 0.43711421203167417, + "grad_norm": 8.262913553380713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90130 + }, + { + "epoch": 0.43716271022451025, + "grad_norm": 9.608829287799381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90140 + }, + { + "epoch": 0.43721120841734634, + "grad_norm": 9.541182777184076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90150 + }, + { + "epoch": 0.43725970661018243, + "grad_norm": 4.6556948518627905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90160 + }, + { + "epoch": 0.4373082048030185, + "grad_norm": 7.592193185246288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90170 + }, + { + "epoch": 0.4373567029958546, + "grad_norm": 7.551972913688587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90180 + }, + { + "epoch": 0.4374052011886907, + "grad_norm": 9.070006257161367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90190 + }, + { + "epoch": 0.4374536993815268, + "grad_norm": 9.508055853757469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90200 + }, + { + "epoch": 0.43750219757436287, + "grad_norm": 7.620985797984758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90210 + }, + { + "epoch": 0.43755069576719896, + "grad_norm": 8.329532619200108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90220 + }, + { + "epoch": 0.43759919396003505, + "grad_norm": 7.443091476488917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90230 + }, + { + "epoch": 0.43764769215287114, + "grad_norm": 9.302923587029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90240 + }, + { + "epoch": 0.4376961903457072, + "grad_norm": 9.254830501959077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90250 + }, + { + "epoch": 0.4377446885385433, + "grad_norm": 7.419274794528974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90260 + }, + { + "epoch": 0.4377931867313794, + "grad_norm": 7.620212016945516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90270 + }, + { + "epoch": 0.4378416849242155, + "grad_norm": 7.403628643487536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90280 + }, + { + "epoch": 0.4378901831170516, + "grad_norm": 9.254860344753979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90290 + }, + { + "epoch": 0.43793868130988767, + "grad_norm": 9.204062223489018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90300 + }, + { + "epoch": 0.43798717950272376, + "grad_norm": 8.6517694342092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90310 + }, + { + "epoch": 0.43803567769555984, + "grad_norm": 7.217891351274375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90320 + }, + { + "epoch": 0.43808417588839593, + "grad_norm": 7.38892751428466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90330 + }, + { + "epoch": 0.438132674081232, + "grad_norm": 9.332391215366442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90340 + }, + { + "epoch": 0.43818117227406816, + "grad_norm": 9.15590305794467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90350 + }, + { + "epoch": 0.43822967046690425, + "grad_norm": 7.535199841868234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90360 + }, + { + "epoch": 0.43827816865974034, + "grad_norm": 7.596810291943257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90370 + }, + { + "epoch": 0.43832666685257643, + "grad_norm": 7.256983991510424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90380 + }, + { + "epoch": 0.4383751650454125, + "grad_norm": 9.194849326377152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90390 + }, + { + "epoch": 0.4384236632382486, + "grad_norm": 9.296019953808354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90400 + }, + { + "epoch": 0.4384721614310847, + "grad_norm": 7.121440148694091e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90410 + }, + { + "epoch": 0.4385206596239208, + "grad_norm": 7.106817889734884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90420 + }, + { + "epoch": 0.43856915781675687, + "grad_norm": 7.236675259036929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90430 + }, + { + "epoch": 0.43861765600959296, + "grad_norm": 1.0429919683474509e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90440 + }, + { + "epoch": 0.43866615420242905, + "grad_norm": 8.893417913213852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90450 + }, + { + "epoch": 0.43871465239526514, + "grad_norm": 7.429704851347196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90460 + }, + { + "epoch": 0.4387631505881012, + "grad_norm": 8.029010700738581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90470 + }, + { + "epoch": 0.4388116487809373, + "grad_norm": 7.345953889625889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90480 + }, + { + "epoch": 0.4388601469737734, + "grad_norm": 8.85361686187025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90490 + }, + { + "epoch": 0.4389086451666095, + "grad_norm": 9.386071297967646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90500 + }, + { + "epoch": 0.4389571433594456, + "grad_norm": 7.207586349977646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90510 + }, + { + "epoch": 0.43900564155228167, + "grad_norm": 7.24389295214678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90520 + }, + { + "epoch": 0.43905413974511776, + "grad_norm": 6.947910691224024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90530 + }, + { + "epoch": 0.43910263793795384, + "grad_norm": 8.944149954004388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90540 + }, + { + "epoch": 0.43915113613078993, + "grad_norm": 8.764075687395234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90550 + }, + { + "epoch": 0.439199634323626, + "grad_norm": 6.972252464265694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90560 + }, + { + "epoch": 0.4392481325164621, + "grad_norm": 7.166455873175437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90570 + }, + { + "epoch": 0.4392966307092982, + "grad_norm": 7.137136037727032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90580 + }, + { + "epoch": 0.4393451289021343, + "grad_norm": 8.949737662078405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90590 + }, + { + "epoch": 0.4393936270949704, + "grad_norm": 8.750713931249265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90600 + }, + { + "epoch": 0.43944212528780646, + "grad_norm": 6.901878890630542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90610 + }, + { + "epoch": 0.43949062348064255, + "grad_norm": 7.020910430810545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90620 + }, + { + "epoch": 0.43953912167347864, + "grad_norm": 7.133716195539819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90630 + }, + { + "epoch": 0.4395876198663147, + "grad_norm": 8.721244171283615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90640 + }, + { + "epoch": 0.4396361180591508, + "grad_norm": 8.742391344185307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90650 + }, + { + "epoch": 0.4396846162519869, + "grad_norm": 7.249706612810769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90660 + }, + { + "epoch": 0.439733114444823, + "grad_norm": 7.52294511130458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90670 + }, + { + "epoch": 0.4397816126376591, + "grad_norm": 6.886572379016798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90680 + }, + { + "epoch": 0.43983011083049517, + "grad_norm": 8.834410181179919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90690 + }, + { + "epoch": 0.43987860902333126, + "grad_norm": 8.725656641672686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90700 + }, + { + "epoch": 0.43992710721616735, + "grad_norm": 6.87302446067406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90710 + }, + { + "epoch": 0.43997560540900343, + "grad_norm": 7.053954220737069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90720 + }, + { + "epoch": 0.4400241036018395, + "grad_norm": 4.1733849798220035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90730 + }, + { + "epoch": 0.4400726017946756, + "grad_norm": 8.293088171740237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90740 + }, + { + "epoch": 0.4401210999875117, + "grad_norm": 8.925866978870545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90750 + }, + { + "epoch": 0.4401695981803478, + "grad_norm": 6.825389675668703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90760 + }, + { + "epoch": 0.4402180963731839, + "grad_norm": 6.551358211481784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90770 + }, + { + "epoch": 0.44026659456601996, + "grad_norm": 6.482663650331233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90780 + }, + { + "epoch": 0.44031509275885605, + "grad_norm": 8.909185567063105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90790 + }, + { + "epoch": 0.44036359095169214, + "grad_norm": 8.583626254221599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90800 + }, + { + "epoch": 0.44041208914452823, + "grad_norm": 6.616259184966111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90810 + }, + { + "epoch": 0.4404605873373643, + "grad_norm": 6.697229082419653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90820 + }, + { + "epoch": 0.4405090855302004, + "grad_norm": 6.65799646526466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90830 + }, + { + "epoch": 0.4405575837230365, + "grad_norm": 8.476805390955633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90840 + }, + { + "epoch": 0.4406060819158726, + "grad_norm": 8.495071313063818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90850 + }, + { + "epoch": 0.4406545801087087, + "grad_norm": 6.521150908156415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90860 + }, + { + "epoch": 0.4407030783015448, + "grad_norm": 6.498264326637582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90870 + }, + { + "epoch": 0.4407515764943809, + "grad_norm": 7.135948720815577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90880 + }, + { + "epoch": 0.440800074687217, + "grad_norm": 4.647939022106584e-06, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 90890 + }, + { + "epoch": 0.4408485728800531, + "grad_norm": 3.317089067422785e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90900 + }, + { + "epoch": 0.44089707107288917, + "grad_norm": 1.935690306709148e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90910 + }, + { + "epoch": 0.44094556926572526, + "grad_norm": 1.4321668459160719e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90920 + }, + { + "epoch": 0.44099406745856135, + "grad_norm": 0.0002755977329798043, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 90930 + }, + { + "epoch": 0.44104256565139743, + "grad_norm": 7.477389590349048e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90940 + }, + { + "epoch": 0.4410910638442335, + "grad_norm": 3.1395615224028006e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90950 + }, + { + "epoch": 0.4411395620370696, + "grad_norm": 1.7803880837163888e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90960 + }, + { + "epoch": 0.4411880602299057, + "grad_norm": 1.1509023352118675e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90970 + }, + { + "epoch": 0.4412365584227418, + "grad_norm": 9.378548384120222e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90980 + }, + { + "epoch": 0.4412850566155779, + "grad_norm": 1.002517092274502e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 90990 + }, + { + "epoch": 0.44133355480841396, + "grad_norm": 8.23654409032315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91000 + }, + { + "epoch": 0.44138205300125005, + "grad_norm": 6.010277502355166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91010 + }, + { + "epoch": 0.44143055119408614, + "grad_norm": 6.139645847724751e-06, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 91020 + }, + { + "epoch": 0.44147904938692223, + "grad_norm": 1.2997279554838315e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91030 + }, + { + "epoch": 0.4415275475797583, + "grad_norm": 0.13959696888923645, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 91040 + }, + { + "epoch": 0.4415760457725944, + "grad_norm": 0.002440987154841423, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 91050 + }, + { + "epoch": 0.4416245439654305, + "grad_norm": 0.0002878751256503165, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 91060 + }, + { + "epoch": 0.4416730421582666, + "grad_norm": 0.0001223428116645664, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91070 + }, + { + "epoch": 0.44172154035110267, + "grad_norm": 0.00011883502884302288, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91080 + }, + { + "epoch": 0.44177003854393876, + "grad_norm": 0.0025684568099677563, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91090 + }, + { + "epoch": 0.44181853673677485, + "grad_norm": 3.319809911772609e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91100 + }, + { + "epoch": 0.44186703492961094, + "grad_norm": 4.024224836030044e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91110 + }, + { + "epoch": 0.441915533122447, + "grad_norm": 1.7964253856916912e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91120 + }, + { + "epoch": 0.4419640313152831, + "grad_norm": 1.6897174646146595e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91130 + }, + { + "epoch": 0.4420125295081192, + "grad_norm": 1.6562913515372202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91140 + }, + { + "epoch": 0.4420610277009553, + "grad_norm": 1.6718349797884002e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91150 + }, + { + "epoch": 0.4421095258937914, + "grad_norm": 1.1518156497913878e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91160 + }, + { + "epoch": 0.44215802408662747, + "grad_norm": 1.1252522199356463e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91170 + }, + { + "epoch": 0.44220652227946355, + "grad_norm": 1.3242510249256156e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91180 + }, + { + "epoch": 0.44225502047229964, + "grad_norm": 1.666878415562678e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91190 + }, + { + "epoch": 0.44230351866513573, + "grad_norm": 1.2502989193308167e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91200 + }, + { + "epoch": 0.4423520168579718, + "grad_norm": 8.94042295840336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91210 + }, + { + "epoch": 0.4424005150508079, + "grad_norm": 8.854811312630773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91220 + }, + { + "epoch": 0.442449013243644, + "grad_norm": 7.448446922353469e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91230 + }, + { + "epoch": 0.4424975114364801, + "grad_norm": 8.502802302245982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91240 + }, + { + "epoch": 0.4425460096293162, + "grad_norm": 8.167128726199735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91250 + }, + { + "epoch": 0.44259450782215226, + "grad_norm": 6.691323505947366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91260 + }, + { + "epoch": 0.44264300601498835, + "grad_norm": 6.566247520822799e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91270 + }, + { + "epoch": 0.44269150420782444, + "grad_norm": 6.623955869144993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91280 + }, + { + "epoch": 0.4427400024006605, + "grad_norm": 6.746309281879803e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91290 + }, + { + "epoch": 0.4427885005934966, + "grad_norm": 6.511449100798927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91300 + }, + { + "epoch": 0.4428369987863327, + "grad_norm": 8.219391929742415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91310 + }, + { + "epoch": 0.4428854969791688, + "grad_norm": 5.3300354920793325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91320 + }, + { + "epoch": 0.4429339951720049, + "grad_norm": 5.5838927437434904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91330 + }, + { + "epoch": 0.44298249336484097, + "grad_norm": 6.139985089248512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91340 + }, + { + "epoch": 0.44303099155767706, + "grad_norm": 5.603197678283323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91350 + }, + { + "epoch": 0.44307948975051314, + "grad_norm": 4.647496552934172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91360 + }, + { + "epoch": 0.4431279879433493, + "grad_norm": 7.881601959525142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91370 + }, + { + "epoch": 0.4431764861361854, + "grad_norm": 4.677819561038632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91380 + }, + { + "epoch": 0.44322498432902147, + "grad_norm": 5.467014489113353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91390 + }, + { + "epoch": 0.44327348252185755, + "grad_norm": 4.7414096115971915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91400 + }, + { + "epoch": 0.44332198071469364, + "grad_norm": 4.627105226973072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91410 + }, + { + "epoch": 0.44337047890752973, + "grad_norm": 6.1488708524848334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91420 + }, + { + "epoch": 0.4434189771003658, + "grad_norm": 5.259284080239013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91430 + }, + { + "epoch": 0.4434674752932019, + "grad_norm": 5.384847554523731e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91440 + }, + { + "epoch": 0.443515973486038, + "grad_norm": 6.005394880048698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91450 + }, + { + "epoch": 0.4435644716788741, + "grad_norm": 4.79313894174993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91460 + }, + { + "epoch": 0.44361296987171017, + "grad_norm": 5.116360171086853e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91470 + }, + { + "epoch": 0.44366146806454626, + "grad_norm": 4.946455646859249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91480 + }, + { + "epoch": 0.44370996625738235, + "grad_norm": 4.9252939788857475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91490 + }, + { + "epoch": 0.44375846445021844, + "grad_norm": 5.1882752813980915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91500 + }, + { + "epoch": 0.4438069626430545, + "grad_norm": 0.0002644760243128985, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91510 + }, + { + "epoch": 0.4438554608358906, + "grad_norm": 4.348821676103398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91520 + }, + { + "epoch": 0.4439039590287267, + "grad_norm": 3.6997757888457272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91530 + }, + { + "epoch": 0.4439524572215628, + "grad_norm": 3.843323611363303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91540 + }, + { + "epoch": 0.4440009554143989, + "grad_norm": 3.7987028917996213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91550 + }, + { + "epoch": 0.44404945360723497, + "grad_norm": 4.6627656047348864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91560 + }, + { + "epoch": 0.44409795180007106, + "grad_norm": 3.2153723168448778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91570 + }, + { + "epoch": 0.44414644999290714, + "grad_norm": 2.9250970783323282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91580 + }, + { + "epoch": 0.44419494818574323, + "grad_norm": 3.519390475048567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91590 + }, + { + "epoch": 0.4442434463785793, + "grad_norm": 3.6130272746959236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91600 + }, + { + "epoch": 0.4442919445714154, + "grad_norm": 2.776271912807715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91610 + }, + { + "epoch": 0.4443404427642515, + "grad_norm": 2.8734884836012498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91620 + }, + { + "epoch": 0.4443889409570876, + "grad_norm": 2.818819666572381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91630 + }, + { + "epoch": 0.4444374391499237, + "grad_norm": 3.756784280994907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91640 + }, + { + "epoch": 0.44448593734275976, + "grad_norm": 2.8920323984493734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91650 + }, + { + "epoch": 0.44453443553559585, + "grad_norm": 2.6688371690397616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91660 + }, + { + "epoch": 0.44458293372843194, + "grad_norm": 2.3797224457666744e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91670 + }, + { + "epoch": 0.444631431921268, + "grad_norm": 2.6607149266055785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91680 + }, + { + "epoch": 0.4446799301141041, + "grad_norm": 2.875376367228455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91690 + }, + { + "epoch": 0.4447284283069402, + "grad_norm": 2.752822638285579e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91700 + }, + { + "epoch": 0.4447769264997763, + "grad_norm": 2.958562390631414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91710 + }, + { + "epoch": 0.4448254246926124, + "grad_norm": 2.35238553614181e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91720 + }, + { + "epoch": 0.44487392288544847, + "grad_norm": 2.127344941982301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91730 + }, + { + "epoch": 0.44492242107828456, + "grad_norm": 2.5909414489433402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91740 + }, + { + "epoch": 0.44497091927112065, + "grad_norm": 2.384025265200762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91750 + }, + { + "epoch": 0.44501941746395673, + "grad_norm": 2.2100321075413376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91760 + }, + { + "epoch": 0.4450679156567928, + "grad_norm": 2.4110725007631117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91770 + }, + { + "epoch": 0.4451164138496289, + "grad_norm": 1.8497322571420227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91780 + }, + { + "epoch": 0.445164912042465, + "grad_norm": 2.699408014450455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91790 + }, + { + "epoch": 0.4452134102353011, + "grad_norm": 2.395451474512811e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91800 + }, + { + "epoch": 0.4452619084281372, + "grad_norm": 1.7957139561985969e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91810 + }, + { + "epoch": 0.44531040662097326, + "grad_norm": 1.9521935428201687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91820 + }, + { + "epoch": 0.44535890481380935, + "grad_norm": 1.6702311995686614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91830 + }, + { + "epoch": 0.44540740300664544, + "grad_norm": 2.2909259769221535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91840 + }, + { + "epoch": 0.44545590119948153, + "grad_norm": 2.321154624951305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91850 + }, + { + "epoch": 0.4455043993923176, + "grad_norm": 1.7001095784507925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91860 + }, + { + "epoch": 0.4455528975851537, + "grad_norm": 1.6472234847242362e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91870 + }, + { + "epoch": 0.4456013957779898, + "grad_norm": 1.7357012893626234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91880 + }, + { + "epoch": 0.44564989397082594, + "grad_norm": 1.7320609231319395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91890 + }, + { + "epoch": 0.445698392163662, + "grad_norm": 2.1920420749665936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91900 + }, + { + "epoch": 0.4457468903564981, + "grad_norm": 1.463465423512389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91910 + }, + { + "epoch": 0.4457953885493342, + "grad_norm": 1.4693324601466884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91920 + }, + { + "epoch": 0.4458438867421703, + "grad_norm": 1.615547489564051e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91930 + }, + { + "epoch": 0.4458923849350064, + "grad_norm": 1.7924077155839768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91940 + }, + { + "epoch": 0.44594088312784247, + "grad_norm": 1.779806666490913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91950 + }, + { + "epoch": 0.44598938132067856, + "grad_norm": 1.497596713306848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91960 + }, + { + "epoch": 0.44603787951351465, + "grad_norm": 1.4493914477498038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91970 + }, + { + "epoch": 0.44608637770635073, + "grad_norm": 1.427176243851136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91980 + }, + { + "epoch": 0.4461348758991868, + "grad_norm": 1.7448624021199066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 91990 + }, + { + "epoch": 0.4461833740920229, + "grad_norm": 1.5232375289997435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92000 + }, + { + "epoch": 0.446231872284859, + "grad_norm": 1.407643367201672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92010 + }, + { + "epoch": 0.4462803704776951, + "grad_norm": 1.3047850870862021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92020 + }, + { + "epoch": 0.4463288686705312, + "grad_norm": 1.321547529187228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92030 + }, + { + "epoch": 0.44637736686336726, + "grad_norm": 1.685063352852012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92040 + }, + { + "epoch": 0.44642586505620335, + "grad_norm": 1.5623736544512212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92050 + }, + { + "epoch": 0.44647436324903944, + "grad_norm": 1.2625649787878501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92060 + }, + { + "epoch": 0.44652286144187553, + "grad_norm": 1.31511558265629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92070 + }, + { + "epoch": 0.4465713596347116, + "grad_norm": 1.4086484725339687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92080 + }, + { + "epoch": 0.4466198578275477, + "grad_norm": 1.6804527831482119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92090 + }, + { + "epoch": 0.4466683560203838, + "grad_norm": 1.3704893717658706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92100 + }, + { + "epoch": 0.4467168542132199, + "grad_norm": 2.446459802740719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92110 + }, + { + "epoch": 0.44676535240605597, + "grad_norm": 1.2841137504437938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92120 + }, + { + "epoch": 0.44681385059889206, + "grad_norm": 1.125603716900514e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92130 + }, + { + "epoch": 0.44686234879172815, + "grad_norm": 1.397276605530351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92140 + }, + { + "epoch": 0.44691084698456424, + "grad_norm": 1.4945309203540091e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92150 + }, + { + "epoch": 0.4469593451774003, + "grad_norm": 1.138194079430832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92160 + }, + { + "epoch": 0.4470078433702364, + "grad_norm": 1.3894085668653133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92170 + }, + { + "epoch": 0.4470563415630725, + "grad_norm": 1.155329755420098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92180 + }, + { + "epoch": 0.4471048397559086, + "grad_norm": 1.3178379276723717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92190 + }, + { + "epoch": 0.4471533379487447, + "grad_norm": 1.574854309183138e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92200 + }, + { + "epoch": 0.44720183614158077, + "grad_norm": 1.0119623539139866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92210 + }, + { + "epoch": 0.44725033433441685, + "grad_norm": 9.416309580956295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92220 + }, + { + "epoch": 0.44729883252725294, + "grad_norm": 1.0546166322455974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92230 + }, + { + "epoch": 0.44734733072008903, + "grad_norm": 1.2390718211463536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92240 + }, + { + "epoch": 0.4473958289129251, + "grad_norm": 1.2205006214571767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92250 + }, + { + "epoch": 0.4474443271057612, + "grad_norm": 1.0356301345382235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92260 + }, + { + "epoch": 0.4474928252985973, + "grad_norm": 1.0882399692491163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92270 + }, + { + "epoch": 0.4475413234914334, + "grad_norm": 1.1120807812403655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92280 + }, + { + "epoch": 0.4475898216842695, + "grad_norm": 1.083833240045351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92290 + }, + { + "epoch": 0.44763831987710556, + "grad_norm": 1.1991955943813082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92300 + }, + { + "epoch": 0.44768681806994165, + "grad_norm": 8.802076649772062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92310 + }, + { + "epoch": 0.44773531626277774, + "grad_norm": 9.320485787611688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92320 + }, + { + "epoch": 0.4477838144556138, + "grad_norm": 1.1301785889372695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92330 + }, + { + "epoch": 0.4478323126484499, + "grad_norm": 1.4378871355802403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92340 + }, + { + "epoch": 0.447880810841286, + "grad_norm": 1.065027277036279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92350 + }, + { + "epoch": 0.4479293090341221, + "grad_norm": 8.868223630997818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92360 + }, + { + "epoch": 0.4479778072269582, + "grad_norm": 7.826508294783707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92370 + }, + { + "epoch": 0.44802630541979427, + "grad_norm": 9.353443033432995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92380 + }, + { + "epoch": 0.44807480361263036, + "grad_norm": 1.0148303317691898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92390 + }, + { + "epoch": 0.4481233018054665, + "grad_norm": 1.0198726840826566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92400 + }, + { + "epoch": 0.4481717999983026, + "grad_norm": 8.644508397992468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92410 + }, + { + "epoch": 0.4482202981911387, + "grad_norm": 7.66234620641626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92420 + }, + { + "epoch": 0.44826879638397477, + "grad_norm": 0.00115307979285717, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 92430 + }, + { + "epoch": 0.44831729457681085, + "grad_norm": 9.442629379918799e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 92440 + }, + { + "epoch": 0.44836579276964694, + "grad_norm": 0.004771435167640448, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 92450 + }, + { + "epoch": 0.44841429096248303, + "grad_norm": 0.1324959248304367, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 92460 + }, + { + "epoch": 0.4484627891553191, + "grad_norm": 0.003848625812679529, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 92470 + }, + { + "epoch": 0.4485112873481552, + "grad_norm": 0.00037395587423816323, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92480 + }, + { + "epoch": 0.4485597855409913, + "grad_norm": 3.193623706465587e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92490 + }, + { + "epoch": 0.4486082837338274, + "grad_norm": 2.257289452245459e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92500 + }, + { + "epoch": 0.4486567819266635, + "grad_norm": 4.83282892673742e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92510 + }, + { + "epoch": 0.44870528011949956, + "grad_norm": 1.3808640687784646e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92520 + }, + { + "epoch": 0.44875377831233565, + "grad_norm": 1.5209545381367207e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92530 + }, + { + "epoch": 0.44880227650517174, + "grad_norm": 1.205753596877912e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92540 + }, + { + "epoch": 0.4488507746980078, + "grad_norm": 1.170080213341862e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92550 + }, + { + "epoch": 0.4488992728908439, + "grad_norm": 3.630188075476326e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92560 + }, + { + "epoch": 0.44894777108368, + "grad_norm": 7.910772183095105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92570 + }, + { + "epoch": 0.4489962692765161, + "grad_norm": 9.432992555957753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92580 + }, + { + "epoch": 0.4490447674693522, + "grad_norm": 8.121774953906424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92590 + }, + { + "epoch": 0.44909326566218827, + "grad_norm": 8.44014994072495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92600 + }, + { + "epoch": 0.44914176385502436, + "grad_norm": 6.648905127804028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92610 + }, + { + "epoch": 0.44919026204786044, + "grad_norm": 5.669253368978389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92620 + }, + { + "epoch": 0.44923876024069653, + "grad_norm": 6.961851340747671e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92630 + }, + { + "epoch": 0.4492872584335326, + "grad_norm": 8.417974640906323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92640 + }, + { + "epoch": 0.4493357566263687, + "grad_norm": 6.0393695093807764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92650 + }, + { + "epoch": 0.4493842548192048, + "grad_norm": 5.126377800479531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92660 + }, + { + "epoch": 0.4494327530120409, + "grad_norm": 5.0670823839027435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92670 + }, + { + "epoch": 0.449481251204877, + "grad_norm": 4.402869762998307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92680 + }, + { + "epoch": 0.44952974939771306, + "grad_norm": 6.490651230706135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92690 + }, + { + "epoch": 0.44957824759054915, + "grad_norm": 5.4978982007014565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92700 + }, + { + "epoch": 0.44962674578338524, + "grad_norm": 4.102880211576121e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92710 + }, + { + "epoch": 0.44967524397622133, + "grad_norm": 4.16833199778921e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92720 + }, + { + "epoch": 0.4497237421690574, + "grad_norm": 9.184348164126277e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92730 + }, + { + "epoch": 0.4497722403618935, + "grad_norm": 4.0466993596055545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92740 + }, + { + "epoch": 0.4498207385547296, + "grad_norm": 4.04873890147428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92750 + }, + { + "epoch": 0.4498692367475657, + "grad_norm": 3.935565018764464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92760 + }, + { + "epoch": 0.44991773494040177, + "grad_norm": 3.7478246213140665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92770 + }, + { + "epoch": 0.44996623313323786, + "grad_norm": 1.0486562132427935e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92780 + }, + { + "epoch": 0.45001473132607395, + "grad_norm": 3.360695245646639e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92790 + }, + { + "epoch": 0.45006322951891004, + "grad_norm": 3.549557277437998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92800 + }, + { + "epoch": 0.4501117277117461, + "grad_norm": 3.6222847938915947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92810 + }, + { + "epoch": 0.4501602259045822, + "grad_norm": 3.0594369491154794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92820 + }, + { + "epoch": 0.4502087240974183, + "grad_norm": 2.8066385766578605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92830 + }, + { + "epoch": 0.4502572222902544, + "grad_norm": 3.390193796803942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92840 + }, + { + "epoch": 0.4503057204830905, + "grad_norm": 2.717813003982883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92850 + }, + { + "epoch": 0.45035421867592657, + "grad_norm": 2.919571443271707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92860 + }, + { + "epoch": 0.45040271686876265, + "grad_norm": 2.713954245336936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92870 + }, + { + "epoch": 0.45045121506159874, + "grad_norm": 3.410310682738782e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92880 + }, + { + "epoch": 0.45049971325443483, + "grad_norm": 3.040215005967184e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92890 + }, + { + "epoch": 0.4505482114472709, + "grad_norm": 2.9519353574869456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92900 + }, + { + "epoch": 0.45059670964010706, + "grad_norm": 2.8328706775937462e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92910 + }, + { + "epoch": 0.45064520783294315, + "grad_norm": 2.3782056359777926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92920 + }, + { + "epoch": 0.45069370602577924, + "grad_norm": 2.4031189695961075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92930 + }, + { + "epoch": 0.4507422042186153, + "grad_norm": 2.7684409360517748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92940 + }, + { + "epoch": 0.4507907024114514, + "grad_norm": 2.5084618755499832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92950 + }, + { + "epoch": 0.4508392006042875, + "grad_norm": 2.3000145574769704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92960 + }, + { + "epoch": 0.4508876987971236, + "grad_norm": 2.117935082424083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92970 + }, + { + "epoch": 0.4509361969899597, + "grad_norm": 2.1564999315160094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92980 + }, + { + "epoch": 0.45098469518279577, + "grad_norm": 2.2919057300896384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 92990 + }, + { + "epoch": 0.45103319337563186, + "grad_norm": 2.4675573513377458e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93000 + }, + { + "epoch": 0.45108169156846795, + "grad_norm": 2.12808049582236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93010 + }, + { + "epoch": 0.45113018976130403, + "grad_norm": 1.9878686998708872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93020 + }, + { + "epoch": 0.4511786879541401, + "grad_norm": 2.355857986913179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93030 + }, + { + "epoch": 0.4512271861469762, + "grad_norm": 2.151092758140294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93040 + }, + { + "epoch": 0.4512756843398123, + "grad_norm": 1.9889605482603656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93050 + }, + { + "epoch": 0.4513241825326484, + "grad_norm": 2.284749825776089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93060 + }, + { + "epoch": 0.4513726807254845, + "grad_norm": 3.2600860322418157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93070 + }, + { + "epoch": 0.45142117891832056, + "grad_norm": 2.882686430893955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93080 + }, + { + "epoch": 0.45146967711115665, + "grad_norm": 5.486395821208134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93090 + }, + { + "epoch": 0.45151817530399274, + "grad_norm": 2.216885377492872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93100 + }, + { + "epoch": 0.45156667349682883, + "grad_norm": 3.317504706501495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93110 + }, + { + "epoch": 0.4516151716896649, + "grad_norm": 3.245025254727807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93120 + }, + { + "epoch": 0.451663669882501, + "grad_norm": 1.9262802197772544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93130 + }, + { + "epoch": 0.4517121680753371, + "grad_norm": 2.0956701973773306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93140 + }, + { + "epoch": 0.4517606662681732, + "grad_norm": 2.2918504782865057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93150 + }, + { + "epoch": 0.45180916446100927, + "grad_norm": 6.341327207337599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93160 + }, + { + "epoch": 0.45185766265384536, + "grad_norm": 3.2345058116334258e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93170 + }, + { + "epoch": 0.45190616084668145, + "grad_norm": 2.294432533744839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93180 + }, + { + "epoch": 0.45195465903951754, + "grad_norm": 1.6927214119277778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93190 + }, + { + "epoch": 0.4520031572323536, + "grad_norm": 1.9733870431082323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93200 + }, + { + "epoch": 0.4520516554251897, + "grad_norm": 2.029924644375569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93210 + }, + { + "epoch": 0.4521001536180258, + "grad_norm": 2.177840542572085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93220 + }, + { + "epoch": 0.4521486518108619, + "grad_norm": 1.8927980818261858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93230 + }, + { + "epoch": 0.452197150003698, + "grad_norm": 1.8875623482017545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93240 + }, + { + "epoch": 0.45224564819653407, + "grad_norm": 1.8438993265590398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93250 + }, + { + "epoch": 0.45229414638937016, + "grad_norm": 1.7711137161313673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93260 + }, + { + "epoch": 0.45234264458220624, + "grad_norm": 1.6887516949282144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93270 + }, + { + "epoch": 0.45239114277504233, + "grad_norm": 1.9248634544055676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93280 + }, + { + "epoch": 0.4524396409678784, + "grad_norm": 1.718939415695786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93290 + }, + { + "epoch": 0.4524881391607145, + "grad_norm": 1.5415267853313708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93300 + }, + { + "epoch": 0.4525366373535506, + "grad_norm": 2.0036253772559576e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 93310 + }, + { + "epoch": 0.4525851355463867, + "grad_norm": 1.6038582089095144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93320 + }, + { + "epoch": 0.4526336337392228, + "grad_norm": 2.045537485173554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93330 + }, + { + "epoch": 0.45268213193205886, + "grad_norm": 2.3108048026188044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93340 + }, + { + "epoch": 0.45273063012489495, + "grad_norm": 2.5663960059318924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93350 + }, + { + "epoch": 0.45277912831773104, + "grad_norm": 2.1683936211047694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93360 + }, + { + "epoch": 0.4528276265105671, + "grad_norm": 2.3224949927680427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93370 + }, + { + "epoch": 0.4528761247034032, + "grad_norm": 2.1748439849034185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93380 + }, + { + "epoch": 0.4529246228962393, + "grad_norm": 2.1050691429991275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93390 + }, + { + "epoch": 0.4529731210890754, + "grad_norm": 2.1705220660805935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93400 + }, + { + "epoch": 0.4530216192819115, + "grad_norm": 1.9862939097947674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93410 + }, + { + "epoch": 0.4530701174747476, + "grad_norm": 1.681849425949622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93420 + }, + { + "epoch": 0.4531186156675837, + "grad_norm": 2.0394900275277905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93430 + }, + { + "epoch": 0.4531671138604198, + "grad_norm": 2.1707621726818616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93440 + }, + { + "epoch": 0.4532156120532559, + "grad_norm": 1.7016249103107839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93450 + }, + { + "epoch": 0.453264110246092, + "grad_norm": 2.803588131428114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93460 + }, + { + "epoch": 0.45331260843892807, + "grad_norm": 1.8420994365442311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93470 + }, + { + "epoch": 0.45336110663176415, + "grad_norm": 4.61477702629054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93480 + }, + { + "epoch": 0.45340960482460024, + "grad_norm": 1.7774295884009916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93490 + }, + { + "epoch": 0.45345810301743633, + "grad_norm": 1.6283107697745436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93500 + }, + { + "epoch": 0.4535066012102724, + "grad_norm": 1.7296115402132273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93510 + }, + { + "epoch": 0.4535550994031085, + "grad_norm": 1.7823891766965971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93520 + }, + { + "epoch": 0.4536035975959446, + "grad_norm": 1.6669212072883965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93530 + }, + { + "epoch": 0.4536520957887807, + "grad_norm": 1.5620653357473202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93540 + }, + { + "epoch": 0.4537005939816168, + "grad_norm": 1.5938907154122717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93550 + }, + { + "epoch": 0.45374909217445286, + "grad_norm": 2.080420244965353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93560 + }, + { + "epoch": 0.45379759036728895, + "grad_norm": 1.7201817854584078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93570 + }, + { + "epoch": 0.45384608856012504, + "grad_norm": 1.3786788031211472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93580 + }, + { + "epoch": 0.4538945867529611, + "grad_norm": 1.4175810747474316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93590 + }, + { + "epoch": 0.4539430849457972, + "grad_norm": 1.4308346862890176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93600 + }, + { + "epoch": 0.4539915831386333, + "grad_norm": 9.116462024394423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93610 + }, + { + "epoch": 0.4540400813314694, + "grad_norm": 7.462572284566704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93620 + }, + { + "epoch": 0.4540885795243055, + "grad_norm": 8.301749403472058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93630 + }, + { + "epoch": 0.45413707771714157, + "grad_norm": 1.391746650369896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93640 + }, + { + "epoch": 0.45418557590997766, + "grad_norm": 1.2063384247085196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93650 + }, + { + "epoch": 0.45423407410281375, + "grad_norm": 1.395593244524207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93660 + }, + { + "epoch": 0.45428257229564983, + "grad_norm": 2.647523160703713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93670 + }, + { + "epoch": 0.4543310704884859, + "grad_norm": 1.3804722129862057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93680 + }, + { + "epoch": 0.454379568681322, + "grad_norm": 1.3019914604228688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93690 + }, + { + "epoch": 0.4544280668741581, + "grad_norm": 1.32756463244732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93700 + }, + { + "epoch": 0.4544765650669942, + "grad_norm": 1.7983583120440017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93710 + }, + { + "epoch": 0.4545250632598303, + "grad_norm": 1.2832722404709784e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93720 + }, + { + "epoch": 0.45457356145266636, + "grad_norm": 1.2756727301166393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93730 + }, + { + "epoch": 0.45462205964550245, + "grad_norm": 1.2948401035828283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93740 + }, + { + "epoch": 0.45467055783833854, + "grad_norm": 1.2185359992145095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93750 + }, + { + "epoch": 0.45471905603117463, + "grad_norm": 1.3562014373746933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93760 + }, + { + "epoch": 0.4547675542240107, + "grad_norm": 3.4875170058512595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93770 + }, + { + "epoch": 0.4548160524168468, + "grad_norm": 1.3730006003243034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93780 + }, + { + "epoch": 0.4548645506096829, + "grad_norm": 1.251754724762577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93790 + }, + { + "epoch": 0.454913048802519, + "grad_norm": 1.2577438610605896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93800 + }, + { + "epoch": 0.45496154699535507, + "grad_norm": 1.2110235729778651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93810 + }, + { + "epoch": 0.45501004518819116, + "grad_norm": 1.1828516335299355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93820 + }, + { + "epoch": 0.45505854338102725, + "grad_norm": 1.1293798252154374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93830 + }, + { + "epoch": 0.45510704157386334, + "grad_norm": 1.1494176987980609e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93840 + }, + { + "epoch": 0.4551555397666994, + "grad_norm": 1.2858752143074526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93850 + }, + { + "epoch": 0.4552040379595355, + "grad_norm": 1.2345687991910381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93860 + }, + { + "epoch": 0.4552525361523716, + "grad_norm": 1.0772229188660276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93870 + }, + { + "epoch": 0.4553010343452077, + "grad_norm": 1.0822448075487046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93880 + }, + { + "epoch": 0.4553495325380438, + "grad_norm": 1.1079376918132766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93890 + }, + { + "epoch": 0.45539803073087987, + "grad_norm": 1.0544023325564922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93900 + }, + { + "epoch": 0.45544652892371595, + "grad_norm": 1.1561115798031096e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93910 + }, + { + "epoch": 0.45549502711655204, + "grad_norm": 1.090431055672525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93920 + }, + { + "epoch": 0.4555435253093882, + "grad_norm": 1.1231043117732042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93930 + }, + { + "epoch": 0.4555920235022243, + "grad_norm": 1.090045543605811e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93940 + }, + { + "epoch": 0.45564052169506036, + "grad_norm": 9.963886213881779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93950 + }, + { + "epoch": 0.45568901988789645, + "grad_norm": 9.947417538569425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93960 + }, + { + "epoch": 0.45573751808073254, + "grad_norm": 1.1030923587895813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93970 + }, + { + "epoch": 0.45578601627356863, + "grad_norm": 1.0006706361309625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93980 + }, + { + "epoch": 0.4558345144664047, + "grad_norm": 9.440784651815193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 93990 + }, + { + "epoch": 0.4558830126592408, + "grad_norm": 1.146274712482409e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94000 + }, + { + "epoch": 0.4559315108520769, + "grad_norm": 1.0526355254114605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94010 + }, + { + "epoch": 0.455980009044913, + "grad_norm": 1.0388515647719032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94020 + }, + { + "epoch": 0.45602850723774907, + "grad_norm": 1.1471427114884136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94030 + }, + { + "epoch": 0.45607700543058516, + "grad_norm": 9.029652119352249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94040 + }, + { + "epoch": 0.45612550362342125, + "grad_norm": 9.290770321968012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94050 + }, + { + "epoch": 0.45617400181625734, + "grad_norm": 9.129671525442973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94060 + }, + { + "epoch": 0.4562225000090934, + "grad_norm": 1.1278779084022972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94070 + }, + { + "epoch": 0.4562709982019295, + "grad_norm": 1.2362370398477651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94080 + }, + { + "epoch": 0.4563194963947656, + "grad_norm": 8.909838129511627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94090 + }, + { + "epoch": 0.4563679945876017, + "grad_norm": 8.822228778626595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94100 + }, + { + "epoch": 0.4564164927804378, + "grad_norm": 9.28864722027356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94110 + }, + { + "epoch": 0.45646499097327387, + "grad_norm": 1.1004373163814307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94120 + }, + { + "epoch": 0.45651348916610995, + "grad_norm": 4.92393064632779e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94130 + }, + { + "epoch": 0.45656198735894604, + "grad_norm": 8.678203471390589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94140 + }, + { + "epoch": 0.45661048555178213, + "grad_norm": 8.346264621650334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94150 + }, + { + "epoch": 0.4566589837446182, + "grad_norm": 8.964486823970219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94160 + }, + { + "epoch": 0.4567074819374543, + "grad_norm": 8.615709816695016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94170 + }, + { + "epoch": 0.4567559801302904, + "grad_norm": 1.3599344583781203e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94180 + }, + { + "epoch": 0.4568044783231265, + "grad_norm": 8.143181844388891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94190 + }, + { + "epoch": 0.45685297651596257, + "grad_norm": 8.341540365108813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94200 + }, + { + "epoch": 0.45690147470879866, + "grad_norm": 8.263259587693028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94210 + }, + { + "epoch": 0.45694997290163475, + "grad_norm": 9.308204198532621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94220 + }, + { + "epoch": 0.45699847109447084, + "grad_norm": 8.444964691989298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94230 + }, + { + "epoch": 0.4570469692873069, + "grad_norm": 7.98075291186251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94240 + }, + { + "epoch": 0.457095467480143, + "grad_norm": 8.032808977986861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94250 + }, + { + "epoch": 0.4571439656729791, + "grad_norm": 7.493603675357008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94260 + }, + { + "epoch": 0.4571924638658152, + "grad_norm": 8.113316312119423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94270 + }, + { + "epoch": 0.4572409620586513, + "grad_norm": 7.046129439913784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94280 + }, + { + "epoch": 0.45728946025148737, + "grad_norm": 7.472826268895005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94290 + }, + { + "epoch": 0.45733795844432346, + "grad_norm": 7.908931820566067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94300 + }, + { + "epoch": 0.45738645663715954, + "grad_norm": 7.72822204453405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94310 + }, + { + "epoch": 0.45743495482999563, + "grad_norm": 8.069037562563608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94320 + }, + { + "epoch": 0.4574834530228317, + "grad_norm": 7.156000378927274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94330 + }, + { + "epoch": 0.4575319512156678, + "grad_norm": 7.80452012349997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94340 + }, + { + "epoch": 0.4575804494085039, + "grad_norm": 7.39595066079346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94350 + }, + { + "epoch": 0.45762894760134, + "grad_norm": 7.4486359835646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94360 + }, + { + "epoch": 0.4576774457941761, + "grad_norm": 8.494091616739752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94370 + }, + { + "epoch": 0.45772594398701216, + "grad_norm": 7.077387635945342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94380 + }, + { + "epoch": 0.45777444217984825, + "grad_norm": 6.909594389981066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94390 + }, + { + "epoch": 0.45782294037268434, + "grad_norm": 6.727907475578832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94400 + }, + { + "epoch": 0.4578714385655204, + "grad_norm": 6.558184395544231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94410 + }, + { + "epoch": 0.4579199367583565, + "grad_norm": 6.934892553545069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94420 + }, + { + "epoch": 0.4579684349511926, + "grad_norm": 6.337277795864793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94430 + }, + { + "epoch": 0.45801693314402875, + "grad_norm": 6.565962848981144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94440 + }, + { + "epoch": 0.45806543133686484, + "grad_norm": 6.599199764423247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94450 + }, + { + "epoch": 0.4581139295297009, + "grad_norm": 6.021802505529195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94460 + }, + { + "epoch": 0.458162427722537, + "grad_norm": 6.583250637959281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94470 + }, + { + "epoch": 0.4582109259153731, + "grad_norm": 6.574101689693634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94480 + }, + { + "epoch": 0.4582594241082092, + "grad_norm": 7.116807410056936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94490 + }, + { + "epoch": 0.4583079223010453, + "grad_norm": 5.809283720736858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94500 + }, + { + "epoch": 0.45835642049388137, + "grad_norm": 6.604135478482931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94510 + }, + { + "epoch": 0.45840491868671746, + "grad_norm": 5.941029144196364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94520 + }, + { + "epoch": 0.45845341687955354, + "grad_norm": 6.147953968138609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94530 + }, + { + "epoch": 0.45850191507238963, + "grad_norm": 6.364867886077263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94540 + }, + { + "epoch": 0.4585504132652257, + "grad_norm": 6.128850600362057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94550 + }, + { + "epoch": 0.4585989114580618, + "grad_norm": 6.728062089678133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94560 + }, + { + "epoch": 0.4586474096508979, + "grad_norm": 5.797912763227941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94570 + }, + { + "epoch": 0.458695907843734, + "grad_norm": 5.957509188192489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94580 + }, + { + "epoch": 0.4587444060365701, + "grad_norm": 5.642183964482683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94590 + }, + { + "epoch": 0.45879290422940616, + "grad_norm": 5.79604716222093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94600 + }, + { + "epoch": 0.45884140242224225, + "grad_norm": 6.000536245664989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94610 + }, + { + "epoch": 0.45888990061507834, + "grad_norm": 5.628338612950756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94620 + }, + { + "epoch": 0.4589383988079144, + "grad_norm": 5.803347562505223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94630 + }, + { + "epoch": 0.4589868970007505, + "grad_norm": 5.663168280989339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94640 + }, + { + "epoch": 0.4590353951935866, + "grad_norm": 6.132625571808603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94650 + }, + { + "epoch": 0.4590838933864227, + "grad_norm": 5.892711669730488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94660 + }, + { + "epoch": 0.4591323915792588, + "grad_norm": 5.626173447126348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94670 + }, + { + "epoch": 0.45918088977209487, + "grad_norm": 5.425561653282784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94680 + }, + { + "epoch": 0.45922938796493096, + "grad_norm": 5.630254804600554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94690 + }, + { + "epoch": 0.45927788615776705, + "grad_norm": 5.390420483308844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94700 + }, + { + "epoch": 0.45932638435060313, + "grad_norm": 5.096185304864775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94710 + }, + { + "epoch": 0.4593748825434392, + "grad_norm": 5.276278329802153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94720 + }, + { + "epoch": 0.4594233807362753, + "grad_norm": 5.423934794634988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94730 + }, + { + "epoch": 0.4594718789291114, + "grad_norm": 5.244158387540665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94740 + }, + { + "epoch": 0.4595203771219475, + "grad_norm": 5.849101967214665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94750 + }, + { + "epoch": 0.4595688753147836, + "grad_norm": 4.728386215901992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94760 + }, + { + "epoch": 0.45961737350761966, + "grad_norm": 5.228036457083363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94770 + }, + { + "epoch": 0.45966587170045575, + "grad_norm": 5.239695610725903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94780 + }, + { + "epoch": 0.45971436989329184, + "grad_norm": 4.7564699912072683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94790 + }, + { + "epoch": 0.45976286808612793, + "grad_norm": 5.238055678091769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94800 + }, + { + "epoch": 0.459811366278964, + "grad_norm": 4.878640424976766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94810 + }, + { + "epoch": 0.4598598644718001, + "grad_norm": 5.251187076282804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94820 + }, + { + "epoch": 0.4599083626646362, + "grad_norm": 4.963450805917091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94830 + }, + { + "epoch": 0.4599568608574723, + "grad_norm": 5.3431193691722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94840 + }, + { + "epoch": 0.46000535905030837, + "grad_norm": 4.7504781264251505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94850 + }, + { + "epoch": 0.46005385724314446, + "grad_norm": 4.6789938323854585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94860 + }, + { + "epoch": 0.46010235543598055, + "grad_norm": 4.711311305527488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94870 + }, + { + "epoch": 0.46015085362881664, + "grad_norm": 4.961841000294953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94880 + }, + { + "epoch": 0.4601993518216527, + "grad_norm": 4.925834673485951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94890 + }, + { + "epoch": 0.4602478500144888, + "grad_norm": 4.4509172880680126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94900 + }, + { + "epoch": 0.4602963482073249, + "grad_norm": 4.716530384030193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94910 + }, + { + "epoch": 0.460344846400161, + "grad_norm": 4.697059239333612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94920 + }, + { + "epoch": 0.4603933445929971, + "grad_norm": 4.4614711214308045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94930 + }, + { + "epoch": 0.46044184278583317, + "grad_norm": 5.081760150460468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94940 + }, + { + "epoch": 0.46049034097866925, + "grad_norm": 5.240150358076789e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94950 + }, + { + "epoch": 0.4605388391715054, + "grad_norm": 4.434580205270322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94960 + }, + { + "epoch": 0.4605873373643415, + "grad_norm": 4.345888271473086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94970 + }, + { + "epoch": 0.4606358355571776, + "grad_norm": 4.428902968811599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94980 + }, + { + "epoch": 0.46068433375001366, + "grad_norm": 4.901665988654713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 94990 + }, + { + "epoch": 0.46073283194284975, + "grad_norm": 4.241922511027951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95000 + }, + { + "epoch": 0.46078133013568584, + "grad_norm": 4.5125298697712424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95010 + }, + { + "epoch": 0.46082982832852193, + "grad_norm": 4.631254739706492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95020 + }, + { + "epoch": 0.460878326521358, + "grad_norm": 4.1736072375897493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95030 + }, + { + "epoch": 0.4609268247141941, + "grad_norm": 4.3719086306737154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95040 + }, + { + "epoch": 0.4609753229070302, + "grad_norm": 4.1489343516332156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95050 + }, + { + "epoch": 0.4610238210998663, + "grad_norm": 4.5801397163813817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95060 + }, + { + "epoch": 0.46107231929270237, + "grad_norm": 4.302365823605214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95070 + }, + { + "epoch": 0.46112081748553846, + "grad_norm": 4.622851861313393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95080 + }, + { + "epoch": 0.46116931567837455, + "grad_norm": 4.869173153565498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95090 + }, + { + "epoch": 0.46121781387121064, + "grad_norm": 4.371229636035423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95100 + }, + { + "epoch": 0.4612663120640467, + "grad_norm": 4.1536176809131575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95110 + }, + { + "epoch": 0.4613148102568828, + "grad_norm": 4.4113011199442553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95120 + }, + { + "epoch": 0.4613633084497189, + "grad_norm": 4.3588391918092384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95130 + }, + { + "epoch": 0.461411806642555, + "grad_norm": 3.7369730421232816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95140 + }, + { + "epoch": 0.4614603048353911, + "grad_norm": 3.924214979633689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95150 + }, + { + "epoch": 0.46150880302822717, + "grad_norm": 4.061259630816494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95160 + }, + { + "epoch": 0.46155730122106325, + "grad_norm": 1.1193330465175677e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95170 + }, + { + "epoch": 0.46160579941389934, + "grad_norm": 4.0150175095732266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95180 + }, + { + "epoch": 0.46165429760673543, + "grad_norm": 3.9518528183180024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95190 + }, + { + "epoch": 0.4617027957995715, + "grad_norm": 3.805726009886712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95200 + }, + { + "epoch": 0.4617512939924076, + "grad_norm": 3.683426257339306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95210 + }, + { + "epoch": 0.4617997921852437, + "grad_norm": 3.818154254986439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95220 + }, + { + "epoch": 0.4618482903780798, + "grad_norm": 3.5729686942431726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95230 + }, + { + "epoch": 0.4618967885709159, + "grad_norm": 4.0221098629444896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95240 + }, + { + "epoch": 0.46194528676375196, + "grad_norm": 3.6822319771090406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95250 + }, + { + "epoch": 0.46199378495658805, + "grad_norm": 3.358933042818535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95260 + }, + { + "epoch": 0.46204228314942414, + "grad_norm": 3.751722772449284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95270 + }, + { + "epoch": 0.4620907813422602, + "grad_norm": 3.5951029531133827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95280 + }, + { + "epoch": 0.4621392795350963, + "grad_norm": 3.6170033013149805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95290 + }, + { + "epoch": 0.4621877777279324, + "grad_norm": 3.442265210651385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95300 + }, + { + "epoch": 0.4622362759207685, + "grad_norm": 3.6721041851706104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95310 + }, + { + "epoch": 0.4622847741136046, + "grad_norm": 3.3998517778854875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95320 + }, + { + "epoch": 0.46233327230644067, + "grad_norm": 3.6496734878710413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95330 + }, + { + "epoch": 0.46238177049927676, + "grad_norm": 3.468273348516959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95340 + }, + { + "epoch": 0.46243026869211284, + "grad_norm": 3.26433962527517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95350 + }, + { + "epoch": 0.46247876688494893, + "grad_norm": 3.955447880343854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95360 + }, + { + "epoch": 0.462527265077785, + "grad_norm": 4.0335854123441095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95370 + }, + { + "epoch": 0.4625757632706211, + "grad_norm": 3.343220100759936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95380 + }, + { + "epoch": 0.4626242614634572, + "grad_norm": 3.3089574458244897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95390 + }, + { + "epoch": 0.4626727596562933, + "grad_norm": 3.3595188142498955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95400 + }, + { + "epoch": 0.4627212578491294, + "grad_norm": 3.15803589501229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95410 + }, + { + "epoch": 0.46276975604196546, + "grad_norm": 3.414589286876435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95420 + }, + { + "epoch": 0.46281825423480155, + "grad_norm": 3.4032976259368297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95430 + }, + { + "epoch": 0.46286675242763764, + "grad_norm": 3.475304595212947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95440 + }, + { + "epoch": 0.46291525062047373, + "grad_norm": 3.1043421699905593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95450 + }, + { + "epoch": 0.4629637488133098, + "grad_norm": 3.680956410789804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95460 + }, + { + "epoch": 0.46301224700614596, + "grad_norm": 4.100603518963908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95470 + }, + { + "epoch": 0.46306074519898205, + "grad_norm": 3.224706688342849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95480 + }, + { + "epoch": 0.46310924339181814, + "grad_norm": 3.2343947964363906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95490 + }, + { + "epoch": 0.4631577415846542, + "grad_norm": 3.1893259233584104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95500 + }, + { + "epoch": 0.4632062397774903, + "grad_norm": 3.1045786386130203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95510 + }, + { + "epoch": 0.4632547379703264, + "grad_norm": 3.478892267594347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95520 + }, + { + "epoch": 0.4633032361631625, + "grad_norm": 3.094421856530971e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95530 + }, + { + "epoch": 0.4633517343559986, + "grad_norm": 4.933561399411701e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95540 + }, + { + "epoch": 0.46340023254883467, + "grad_norm": 3.0498929959321686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95550 + }, + { + "epoch": 0.46344873074167076, + "grad_norm": 3.1467533290197025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95560 + }, + { + "epoch": 0.46349722893450684, + "grad_norm": 2.8633692750190676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95570 + }, + { + "epoch": 0.46354572712734293, + "grad_norm": 3.011160742971697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95580 + }, + { + "epoch": 0.463594225320179, + "grad_norm": 3.0597146860600333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95590 + }, + { + "epoch": 0.4636427235130151, + "grad_norm": 2.7960280135630455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95600 + }, + { + "epoch": 0.4636912217058512, + "grad_norm": 3.124790737274452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95610 + }, + { + "epoch": 0.4637397198986873, + "grad_norm": 3.1528929866908584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95620 + }, + { + "epoch": 0.4637882180915234, + "grad_norm": 2.790096971239109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95630 + }, + { + "epoch": 0.46383671628435946, + "grad_norm": 3.060054609704821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95640 + }, + { + "epoch": 0.46388521447719555, + "grad_norm": 3.2739274047344225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95650 + }, + { + "epoch": 0.46393371267003164, + "grad_norm": 3.068282694584923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95660 + }, + { + "epoch": 0.4639822108628677, + "grad_norm": 3.096087652920687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95670 + }, + { + "epoch": 0.4640307090557038, + "grad_norm": 2.6796263341566373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95680 + }, + { + "epoch": 0.4640792072485399, + "grad_norm": 2.648400823090924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95690 + }, + { + "epoch": 0.464127705441376, + "grad_norm": 2.7446137096376333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95700 + }, + { + "epoch": 0.4641762036342121, + "grad_norm": 2.8892227987853403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95710 + }, + { + "epoch": 0.46422470182704817, + "grad_norm": 2.9261579470585275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95720 + }, + { + "epoch": 0.46427320001988426, + "grad_norm": 1.8550094864622224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95730 + }, + { + "epoch": 0.46432169821272035, + "grad_norm": 2.531561165142193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95740 + }, + { + "epoch": 0.46437019640555643, + "grad_norm": 2.9320986527636705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95750 + }, + { + "epoch": 0.4644186945983925, + "grad_norm": 2.677999475508841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95760 + }, + { + "epoch": 0.4644671927912286, + "grad_norm": 7.235126986415708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95770 + }, + { + "epoch": 0.4645156909840647, + "grad_norm": 2.7317872763887863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95780 + }, + { + "epoch": 0.4645641891769008, + "grad_norm": 2.7323699214321095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95790 + }, + { + "epoch": 0.4646126873697369, + "grad_norm": 2.7536489710655587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95800 + }, + { + "epoch": 0.46466118556257296, + "grad_norm": 2.9499534548449446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95810 + }, + { + "epoch": 0.46470968375540905, + "grad_norm": 2.721505154568149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95820 + }, + { + "epoch": 0.46475818194824514, + "grad_norm": 2.7987519501948555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95830 + }, + { + "epoch": 0.46480668014108123, + "grad_norm": 2.3986586938917753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95840 + }, + { + "epoch": 0.4648551783339173, + "grad_norm": 3.168111959439557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95850 + }, + { + "epoch": 0.4649036765267534, + "grad_norm": 2.804308962822688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95860 + }, + { + "epoch": 0.4649521747195895, + "grad_norm": 2.5921318069777044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95870 + }, + { + "epoch": 0.4650006729124256, + "grad_norm": 2.765708018159785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95880 + }, + { + "epoch": 0.46504917110526167, + "grad_norm": 2.733130486376467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95890 + }, + { + "epoch": 0.46509766929809776, + "grad_norm": 2.6535005304140213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95900 + }, + { + "epoch": 0.46514616749093385, + "grad_norm": 2.783159800401336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95910 + }, + { + "epoch": 0.46519466568376994, + "grad_norm": 2.7018870696338126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95920 + }, + { + "epoch": 0.465243163876606, + "grad_norm": 2.5184743890349637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95930 + }, + { + "epoch": 0.4652916620694421, + "grad_norm": 2.540740808854025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95940 + }, + { + "epoch": 0.4653401602622782, + "grad_norm": 2.3937687387842743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95950 + }, + { + "epoch": 0.4653886584551143, + "grad_norm": 2.7555961423786357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95960 + }, + { + "epoch": 0.4654371566479504, + "grad_norm": 2.586829168649274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95970 + }, + { + "epoch": 0.4654856548407865, + "grad_norm": 2.70867133167485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95980 + }, + { + "epoch": 0.4655341530336226, + "grad_norm": 2.4559781763855426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 95990 + }, + { + "epoch": 0.4655826512264587, + "grad_norm": 3.119249072369712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96000 + }, + { + "epoch": 0.4656311494192948, + "grad_norm": 2.751648651155847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96010 + }, + { + "epoch": 0.4656796476121309, + "grad_norm": 2.4757810024311766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96020 + }, + { + "epoch": 0.46572814580496696, + "grad_norm": 2.439759612116177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96030 + }, + { + "epoch": 0.46577664399780305, + "grad_norm": 2.5539162606946775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96040 + }, + { + "epoch": 0.46582514219063914, + "grad_norm": 2.2132091714865965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96050 + }, + { + "epoch": 0.46587364038347523, + "grad_norm": 2.5531289793434553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96060 + }, + { + "epoch": 0.4659221385763113, + "grad_norm": 2.4748553073550283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96070 + }, + { + "epoch": 0.4659706367691474, + "grad_norm": 2.645364531872474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96080 + }, + { + "epoch": 0.4660191349619835, + "grad_norm": 2.2940091071177449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96090 + }, + { + "epoch": 0.4660676331548196, + "grad_norm": 2.1531620575387933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96100 + }, + { + "epoch": 0.46611613134765567, + "grad_norm": 2.8538286755974696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96110 + }, + { + "epoch": 0.46616462954049176, + "grad_norm": 2.3521614878063701e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96120 + }, + { + "epoch": 0.46621312773332785, + "grad_norm": 2.1651891302099102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96130 + }, + { + "epoch": 0.46626162592616394, + "grad_norm": 2.694163185879006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96140 + }, + { + "epoch": 0.466310124119, + "grad_norm": 2.4192047476390144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96150 + }, + { + "epoch": 0.4663586223118361, + "grad_norm": 2.3409270966112672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96160 + }, + { + "epoch": 0.4664071205046722, + "grad_norm": 2.418366022993723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96170 + }, + { + "epoch": 0.4664556186975083, + "grad_norm": 2.6143288778257556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96180 + }, + { + "epoch": 0.4665041168903444, + "grad_norm": 1.9885258950580464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96190 + }, + { + "epoch": 0.46655261508318047, + "grad_norm": 1.8781855715133133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96200 + }, + { + "epoch": 0.46660111327601655, + "grad_norm": 2.531050711240823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96210 + }, + { + "epoch": 0.46664961146885264, + "grad_norm": 2.6832188382286404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96220 + }, + { + "epoch": 0.46669810966168873, + "grad_norm": 2.419999134417594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96230 + }, + { + "epoch": 0.4667466078545248, + "grad_norm": 2.046592584292739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96240 + }, + { + "epoch": 0.4667951060473609, + "grad_norm": 2.2568637803033198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96250 + }, + { + "epoch": 0.466843604240197, + "grad_norm": 2.404773624675727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96260 + }, + { + "epoch": 0.4668921024330331, + "grad_norm": 2.287505793674427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96270 + }, + { + "epoch": 0.4669406006258692, + "grad_norm": 2.3802797954886046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96280 + }, + { + "epoch": 0.46698909881870526, + "grad_norm": 2.0043378867740103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96290 + }, + { + "epoch": 0.46703759701154135, + "grad_norm": 1.9810889284599398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96300 + }, + { + "epoch": 0.46708609520437744, + "grad_norm": 2.1193811505781923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96310 + }, + { + "epoch": 0.4671345933972135, + "grad_norm": 2.2296204349459003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96320 + }, + { + "epoch": 0.4671830915900496, + "grad_norm": 2.2967012114349927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96330 + }, + { + "epoch": 0.4672315897828857, + "grad_norm": 1.8904364651461947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96340 + }, + { + "epoch": 0.4672800879757218, + "grad_norm": 1.9168707865446777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96350 + }, + { + "epoch": 0.4673285861685579, + "grad_norm": 2.1573194430857257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96360 + }, + { + "epoch": 0.46737708436139397, + "grad_norm": 2.150093791897234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96370 + }, + { + "epoch": 0.46742558255423006, + "grad_norm": 2.148033502180624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96380 + }, + { + "epoch": 0.46747408074706615, + "grad_norm": 1.7850605615876702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96390 + }, + { + "epoch": 0.46752257893990223, + "grad_norm": 1.8290617731508974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96400 + }, + { + "epoch": 0.4675710771327383, + "grad_norm": 2.1090257007472246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96410 + }, + { + "epoch": 0.4676195753255744, + "grad_norm": 1.9895867353625363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96420 + }, + { + "epoch": 0.4676680735184105, + "grad_norm": 2.0716953486044076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96430 + }, + { + "epoch": 0.4677165717112466, + "grad_norm": 1.972355363477618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96440 + }, + { + "epoch": 0.4677650699040827, + "grad_norm": 2.0953636692411237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96450 + }, + { + "epoch": 0.46781356809691876, + "grad_norm": 2.2212364569895726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96460 + }, + { + "epoch": 0.46786206628975485, + "grad_norm": 2.0787467747140909e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96470 + }, + { + "epoch": 0.46791056448259094, + "grad_norm": 2.0005097667308291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96480 + }, + { + "epoch": 0.4679590626754271, + "grad_norm": 1.952078463318685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96490 + }, + { + "epoch": 0.4680075608682632, + "grad_norm": 1.8894517950229783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96500 + }, + { + "epoch": 0.46805605906109926, + "grad_norm": 2.1487740298198332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96510 + }, + { + "epoch": 0.46810455725393535, + "grad_norm": 2.0646297116400092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96520 + }, + { + "epoch": 0.46815305544677144, + "grad_norm": 2.058831967133301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96530 + }, + { + "epoch": 0.4682015536396075, + "grad_norm": 1.9133651107949845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96540 + }, + { + "epoch": 0.4682500518324436, + "grad_norm": 1.8989464933838462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96550 + }, + { + "epoch": 0.4682985500252797, + "grad_norm": 1.9894335423487064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96560 + }, + { + "epoch": 0.4683470482181158, + "grad_norm": 2.208306710826946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96570 + }, + { + "epoch": 0.4683955464109519, + "grad_norm": 2.028590131430974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96580 + }, + { + "epoch": 0.46844404460378797, + "grad_norm": 2.0048764781677164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96590 + }, + { + "epoch": 0.46849254279662406, + "grad_norm": 1.8165167148254113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96600 + }, + { + "epoch": 0.46854104098946014, + "grad_norm": 1.8889605257754738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96610 + }, + { + "epoch": 0.46858953918229623, + "grad_norm": 2.008159043498381e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96620 + }, + { + "epoch": 0.4686380373751323, + "grad_norm": 1.8920248123777128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96630 + }, + { + "epoch": 0.4686865355679684, + "grad_norm": 1.736421779696684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96640 + }, + { + "epoch": 0.4687350337608045, + "grad_norm": 1.8600017881453823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96650 + }, + { + "epoch": 0.4687835319536406, + "grad_norm": 2.0268022637992544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96660 + }, + { + "epoch": 0.4688320301464767, + "grad_norm": 1.951961650092926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96670 + }, + { + "epoch": 0.46888052833931276, + "grad_norm": 1.9990763178157067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96680 + }, + { + "epoch": 0.46892902653214885, + "grad_norm": 1.6504729671851237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96690 + }, + { + "epoch": 0.46897752472498494, + "grad_norm": 1.685839663423394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96700 + }, + { + "epoch": 0.46902602291782103, + "grad_norm": 1.8971933002376318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96710 + }, + { + "epoch": 0.4690745211106571, + "grad_norm": 2.0578028170348261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96720 + }, + { + "epoch": 0.4691230193034932, + "grad_norm": 1.8810736435170838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96730 + }, + { + "epoch": 0.4691715174963293, + "grad_norm": 1.6175980022126168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96740 + }, + { + "epoch": 0.4692200156891654, + "grad_norm": 1.6773236666267621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96750 + }, + { + "epoch": 0.46926851388200147, + "grad_norm": 1.9379058358026668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96760 + }, + { + "epoch": 0.46931701207483756, + "grad_norm": 1.9204902912406396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96770 + }, + { + "epoch": 0.46936551026767365, + "grad_norm": 1.944722640701002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96780 + }, + { + "epoch": 0.46941400846050974, + "grad_norm": 1.6458278651043656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96790 + }, + { + "epoch": 0.4694625066533458, + "grad_norm": 1.6135489033786143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96800 + }, + { + "epoch": 0.4695110048461819, + "grad_norm": 1.9385456084819452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96810 + }, + { + "epoch": 0.469559503039018, + "grad_norm": 4.7420002147191553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96820 + }, + { + "epoch": 0.4696080012318541, + "grad_norm": 1.871211736670375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96830 + }, + { + "epoch": 0.4696564994246902, + "grad_norm": 1.8702607462728338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96840 + }, + { + "epoch": 0.46970499761752627, + "grad_norm": 1.7745816194292274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96850 + }, + { + "epoch": 0.46975349581036235, + "grad_norm": 1.9129855388655415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96860 + }, + { + "epoch": 0.46980199400319844, + "grad_norm": 1.9135720208396378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96870 + }, + { + "epoch": 0.46985049219603453, + "grad_norm": 1.8790942135638034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96880 + }, + { + "epoch": 0.4698989903888706, + "grad_norm": 1.5936394959226163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96890 + }, + { + "epoch": 0.4699474885817067, + "grad_norm": 1.5247820783770294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96900 + }, + { + "epoch": 0.4699959867745428, + "grad_norm": 1.8329485840240523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96910 + }, + { + "epoch": 0.4700444849673789, + "grad_norm": 1.8492201547815057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96920 + }, + { + "epoch": 0.47009298316021497, + "grad_norm": 1.8617359387462784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96930 + }, + { + "epoch": 0.47014148135305106, + "grad_norm": 1.5974023881426547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96940 + }, + { + "epoch": 0.47018997954588715, + "grad_norm": 1.6066428543126676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96950 + }, + { + "epoch": 0.47023847773872324, + "grad_norm": 2.0690684721103025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96960 + }, + { + "epoch": 0.4702869759315593, + "grad_norm": 1.9085067037849512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96970 + }, + { + "epoch": 0.4703354741243954, + "grad_norm": 1.7447445088691893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96980 + }, + { + "epoch": 0.4703839723172315, + "grad_norm": 1.660439039596895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 96990 + }, + { + "epoch": 0.47043247051006765, + "grad_norm": 1.5565672129014274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97000 + }, + { + "epoch": 0.47048096870290373, + "grad_norm": 1.9557731434360903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97010 + }, + { + "epoch": 0.4705294668957398, + "grad_norm": 1.8458574402302474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97020 + }, + { + "epoch": 0.4705779650885759, + "grad_norm": 1.770082036500753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97030 + }, + { + "epoch": 0.470626463281412, + "grad_norm": 1.5667708908040368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97040 + }, + { + "epoch": 0.4706749614742481, + "grad_norm": 1.6888013476545893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97050 + }, + { + "epoch": 0.4707234596670842, + "grad_norm": 1.907583708771199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97060 + }, + { + "epoch": 0.47077195785992026, + "grad_norm": 1.7551811026805808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97070 + }, + { + "epoch": 0.47082045605275635, + "grad_norm": 1.8688710667902342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97080 + }, + { + "epoch": 0.47086895424559244, + "grad_norm": 1.531040112467963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97090 + }, + { + "epoch": 0.47091745243842853, + "grad_norm": 1.592561318375374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97100 + }, + { + "epoch": 0.4709659506312646, + "grad_norm": 1.8335778406708414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97110 + }, + { + "epoch": 0.4710144488241007, + "grad_norm": 1.7305740129813785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97120 + }, + { + "epoch": 0.4710629470169368, + "grad_norm": 1.7555294107296504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97130 + }, + { + "epoch": 0.4711114452097729, + "grad_norm": 1.6240663569533353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97140 + }, + { + "epoch": 0.47115994340260897, + "grad_norm": 1.4920331636858464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97150 + }, + { + "epoch": 0.47120844159544506, + "grad_norm": 1.718859579113996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97160 + }, + { + "epoch": 0.47125693978828115, + "grad_norm": 1.7865974655251193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97170 + }, + { + "epoch": 0.47130543798111724, + "grad_norm": 1.7641536942392122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97180 + }, + { + "epoch": 0.4713539361739533, + "grad_norm": 1.4344814758260327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97190 + }, + { + "epoch": 0.4714024343667894, + "grad_norm": 1.4523683944389632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97200 + }, + { + "epoch": 0.4714509325596255, + "grad_norm": 1.617485025917631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97210 + }, + { + "epoch": 0.4714994307524616, + "grad_norm": 1.7278856034863566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97220 + }, + { + "epoch": 0.4715479289452977, + "grad_norm": 1.8055283135254285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97230 + }, + { + "epoch": 0.47159642713813377, + "grad_norm": 1.4552311711213406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97240 + }, + { + "epoch": 0.47164492533096986, + "grad_norm": 1.3763457218374242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97250 + }, + { + "epoch": 0.47169342352380594, + "grad_norm": 1.6783512535312184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97260 + }, + { + "epoch": 0.47174192171664203, + "grad_norm": 1.7514236105853342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97270 + }, + { + "epoch": 0.4717904199094781, + "grad_norm": 1.637513236119048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97280 + }, + { + "epoch": 0.4718389181023142, + "grad_norm": 1.3787590091851598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97290 + }, + { + "epoch": 0.4718874162951503, + "grad_norm": 1.5259796271038795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97300 + }, + { + "epoch": 0.4719359144879864, + "grad_norm": 1.8860032469092403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97310 + }, + { + "epoch": 0.4719844126808225, + "grad_norm": 1.8020143954800005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97320 + }, + { + "epoch": 0.47203291087365856, + "grad_norm": 1.6478533382269234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97330 + }, + { + "epoch": 0.47208140906649465, + "grad_norm": 1.4067276765672432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97340 + }, + { + "epoch": 0.47212990725933074, + "grad_norm": 1.4518838042931748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97350 + }, + { + "epoch": 0.4721784054521668, + "grad_norm": 1.6148192116816062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97360 + }, + { + "epoch": 0.4722269036450029, + "grad_norm": 1.7939844099146285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97370 + }, + { + "epoch": 0.472275401837839, + "grad_norm": 1.6288028348299122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97380 + }, + { + "epoch": 0.4723239000306751, + "grad_norm": 1.3623653671857028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97390 + }, + { + "epoch": 0.4723723982235112, + "grad_norm": 1.3462567949318327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97400 + }, + { + "epoch": 0.47242089641634727, + "grad_norm": 1.6389370216529642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97410 + }, + { + "epoch": 0.47246939460918336, + "grad_norm": 1.5633129635261866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97420 + }, + { + "epoch": 0.47251789280201945, + "grad_norm": 1.576465109565106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97430 + }, + { + "epoch": 0.47256639099485553, + "grad_norm": 1.4201360443166777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97440 + }, + { + "epoch": 0.4726148891876916, + "grad_norm": 1.4655979896360805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97450 + }, + { + "epoch": 0.4726633873805277, + "grad_norm": 1.5480655690680578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97460 + }, + { + "epoch": 0.4727118855733638, + "grad_norm": 1.7459421997045865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97470 + }, + { + "epoch": 0.4727603837661999, + "grad_norm": 1.6110344347453065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97480 + }, + { + "epoch": 0.472808881959036, + "grad_norm": 1.3507771257081913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97490 + }, + { + "epoch": 0.47285738015187206, + "grad_norm": 1.520390071618749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97500 + }, + { + "epoch": 0.47290587834470815, + "grad_norm": 1.7334859592210705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97510 + }, + { + "epoch": 0.4729543765375443, + "grad_norm": 1.595350767047421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97520 + }, + { + "epoch": 0.4730028747303804, + "grad_norm": 2.7518510137269914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97530 + }, + { + "epoch": 0.4730513729232165, + "grad_norm": 1.4698035499804973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97540 + }, + { + "epoch": 0.47309987111605256, + "grad_norm": 1.3587458624897408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97550 + }, + { + "epoch": 0.47314836930888865, + "grad_norm": 1.6716256823201547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97560 + }, + { + "epoch": 0.47319686750172474, + "grad_norm": 1.5793914087680605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97570 + }, + { + "epoch": 0.4732453656945608, + "grad_norm": 1.6570649563618645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97580 + }, + { + "epoch": 0.4732938638873969, + "grad_norm": 1.3059613479526888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97590 + }, + { + "epoch": 0.473342362080233, + "grad_norm": 1.3274042487410043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97600 + }, + { + "epoch": 0.4733908602730691, + "grad_norm": 1.5174438772191934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97610 + }, + { + "epoch": 0.4734393584659052, + "grad_norm": 1.6128589663821913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97620 + }, + { + "epoch": 0.47348785665874127, + "grad_norm": 1.5057568703014113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97630 + }, + { + "epoch": 0.47353635485157736, + "grad_norm": 1.4536819037402893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97640 + }, + { + "epoch": 0.47358485304441345, + "grad_norm": 1.3578994639829034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97650 + }, + { + "epoch": 0.47363335123724953, + "grad_norm": 1.5460472013728577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97660 + }, + { + "epoch": 0.4736818494300856, + "grad_norm": 1.5611246340085927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97670 + }, + { + "epoch": 0.4737303476229217, + "grad_norm": 1.4862956732031307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97680 + }, + { + "epoch": 0.4737788458157578, + "grad_norm": 1.2709253383036412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97690 + }, + { + "epoch": 0.4738273440085939, + "grad_norm": 1.2601485366303677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97700 + }, + { + "epoch": 0.47387584220143, + "grad_norm": 1.5185823087904282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97710 + }, + { + "epoch": 0.47392434039426606, + "grad_norm": 1.46858141647499e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97720 + }, + { + "epoch": 0.47397283858710215, + "grad_norm": 1.5288127030999021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97730 + }, + { + "epoch": 0.47402133677993824, + "grad_norm": 1.3160197909201088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97740 + }, + { + "epoch": 0.47406983497277433, + "grad_norm": 1.2697718432264082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97750 + }, + { + "epoch": 0.4741183331656104, + "grad_norm": 1.4974155249092291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97760 + }, + { + "epoch": 0.4741668313584465, + "grad_norm": 1.519944134997786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97770 + }, + { + "epoch": 0.4742153295512826, + "grad_norm": 1.4191620323344978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97780 + }, + { + "epoch": 0.4742638277441187, + "grad_norm": 1.3562770107000688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97790 + }, + { + "epoch": 0.47431232593695477, + "grad_norm": 1.2499377533004008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97800 + }, + { + "epoch": 0.47436082412979086, + "grad_norm": 1.5027396216282796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97810 + }, + { + "epoch": 0.47440932232262695, + "grad_norm": 1.5538995512542897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97820 + }, + { + "epoch": 0.47445782051546304, + "grad_norm": 1.5058802205203392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97830 + }, + { + "epoch": 0.4745063187082991, + "grad_norm": 1.303137366903684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97840 + }, + { + "epoch": 0.4745548169011352, + "grad_norm": 1.230036019705949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97850 + }, + { + "epoch": 0.4746033150939713, + "grad_norm": 1.4161975059323595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97860 + }, + { + "epoch": 0.4746518132868074, + "grad_norm": 1.483332709995011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97870 + }, + { + "epoch": 0.4747003114796435, + "grad_norm": 1.4458139219186705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97880 + }, + { + "epoch": 0.47474880967247957, + "grad_norm": 1.2856101250235952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97890 + }, + { + "epoch": 0.47479730786531565, + "grad_norm": 1.2897868373329402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97900 + }, + { + "epoch": 0.47484580605815174, + "grad_norm": 1.4836957973329845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97910 + }, + { + "epoch": 0.47489430425098783, + "grad_norm": 1.330896424178718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97920 + }, + { + "epoch": 0.4749428024438239, + "grad_norm": 1.467311960823281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97930 + }, + { + "epoch": 0.47499130063666, + "grad_norm": 1.2523827308541513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97940 + }, + { + "epoch": 0.4750397988294961, + "grad_norm": 1.2729759646390448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97950 + }, + { + "epoch": 0.4750882970223322, + "grad_norm": 1.6582801265485614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97960 + }, + { + "epoch": 0.4751367952151683, + "grad_norm": 1.4703086037570756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97970 + }, + { + "epoch": 0.47518529340800436, + "grad_norm": 1.356904562044292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97980 + }, + { + "epoch": 0.47523379160084045, + "grad_norm": 1.2671722515733563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 97990 + }, + { + "epoch": 0.47528228979367654, + "grad_norm": 1.222924908006462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98000 + }, + { + "epoch": 0.4753307879865126, + "grad_norm": 1.3527910880384297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98010 + }, + { + "epoch": 0.4753792861793487, + "grad_norm": 1.4037742346317827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98020 + }, + { + "epoch": 0.47542778437218486, + "grad_norm": 1.3822568689647596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98030 + }, + { + "epoch": 0.47547628256502095, + "grad_norm": 1.203715669362282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98040 + }, + { + "epoch": 0.47552478075785704, + "grad_norm": 1.1885062178862427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98050 + }, + { + "epoch": 0.4755732789506931, + "grad_norm": 1.3450637936784915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98060 + }, + { + "epoch": 0.4756217771435292, + "grad_norm": 1.3352617145301338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98070 + }, + { + "epoch": 0.4756702753363653, + "grad_norm": 1.343003930287523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98080 + }, + { + "epoch": 0.4757187735292014, + "grad_norm": 1.1770787011755601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98090 + }, + { + "epoch": 0.4757672717220375, + "grad_norm": 1.1976558766946255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98100 + }, + { + "epoch": 0.47581576991487357, + "grad_norm": 1.4052417895982217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98110 + }, + { + "epoch": 0.47586426810770965, + "grad_norm": 1.473246982186538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98120 + }, + { + "epoch": 0.47591276630054574, + "grad_norm": 1.305261463357965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98130 + }, + { + "epoch": 0.47596126449338183, + "grad_norm": 1.429283287279759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98140 + }, + { + "epoch": 0.4760097626862179, + "grad_norm": 1.193675984723086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98150 + }, + { + "epoch": 0.476058260879054, + "grad_norm": 1.3158054912310035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98160 + }, + { + "epoch": 0.4761067590718901, + "grad_norm": 1.3340807925033005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98170 + }, + { + "epoch": 0.4761552572647262, + "grad_norm": 1.4319707020149508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98180 + }, + { + "epoch": 0.47620375545756227, + "grad_norm": 1.3277029609071178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98190 + }, + { + "epoch": 0.47625225365039836, + "grad_norm": 0.010620735585689545, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 98200 + }, + { + "epoch": 0.47630075184323445, + "grad_norm": 0.0015901103615760803, + "learning_rate": 0.0002, + "loss": 0.3258, + "step": 98210 + }, + { + "epoch": 0.47634925003607054, + "grad_norm": 0.000511295860633254, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 98220 + }, + { + "epoch": 0.4763977482289066, + "grad_norm": 0.0001325487974099815, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 98230 + }, + { + "epoch": 0.4764462464217427, + "grad_norm": 0.00018399654072709382, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98240 + }, + { + "epoch": 0.4764947446145788, + "grad_norm": 0.00012029452773276716, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98250 + }, + { + "epoch": 0.4765432428074149, + "grad_norm": 4.625397195923142e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98260 + }, + { + "epoch": 0.476591741000251, + "grad_norm": 3.87771287932992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98270 + }, + { + "epoch": 0.47664023919308707, + "grad_norm": 2.733412475208752e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 98280 + }, + { + "epoch": 0.47668873738592316, + "grad_norm": 4.4636912207352e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98290 + }, + { + "epoch": 0.47673723557875924, + "grad_norm": 4.578325024340302e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98300 + }, + { + "epoch": 0.47678573377159533, + "grad_norm": 2.5379640646860935e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98310 + }, + { + "epoch": 0.4768342319644314, + "grad_norm": 2.2437192455981858e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98320 + }, + { + "epoch": 0.4768827301572675, + "grad_norm": 2.0575595044647343e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98330 + }, + { + "epoch": 0.4769312283501036, + "grad_norm": 6.721608224324882e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98340 + }, + { + "epoch": 0.4769797265429397, + "grad_norm": 3.2943506084848195e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98350 + }, + { + "epoch": 0.4770282247357758, + "grad_norm": 1.5184517906163819e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98360 + }, + { + "epoch": 0.47707672292861186, + "grad_norm": 1.3809903975925408e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98370 + }, + { + "epoch": 0.47712522112144795, + "grad_norm": 1.2608878932951484e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98380 + }, + { + "epoch": 0.47717371931428404, + "grad_norm": 2.3719692762824707e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98390 + }, + { + "epoch": 0.47722221750712013, + "grad_norm": 2.1412417481769808e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98400 + }, + { + "epoch": 0.4772707156999562, + "grad_norm": 1.0825354365806561e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98410 + }, + { + "epoch": 0.4773192138927923, + "grad_norm": 1.0460134944878519e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98420 + }, + { + "epoch": 0.4773677120856284, + "grad_norm": 9.853770279732998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98430 + }, + { + "epoch": 0.4774162102784645, + "grad_norm": 1.7098078387789428e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98440 + }, + { + "epoch": 0.47746470847130057, + "grad_norm": 2.4638391550979577e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98450 + }, + { + "epoch": 0.47751320666413666, + "grad_norm": 8.001132300705649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98460 + }, + { + "epoch": 0.47756170485697275, + "grad_norm": 7.1976232902670745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98470 + }, + { + "epoch": 0.47761020304980883, + "grad_norm": 7.825965440133587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98480 + }, + { + "epoch": 0.4776587012426449, + "grad_norm": 1.4394012396223843e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98490 + }, + { + "epoch": 0.477707199435481, + "grad_norm": 1.317107853537891e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98500 + }, + { + "epoch": 0.4777556976283171, + "grad_norm": 6.325631602521753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98510 + }, + { + "epoch": 0.4778041958211532, + "grad_norm": 6.3279162532126065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98520 + }, + { + "epoch": 0.4778526940139893, + "grad_norm": 6.8079539232712705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98530 + }, + { + "epoch": 0.4779011922068254, + "grad_norm": 1.1578102203202434e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98540 + }, + { + "epoch": 0.4779496903996615, + "grad_norm": 1.0796381502586883e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98550 + }, + { + "epoch": 0.4779981885924976, + "grad_norm": 5.613308530882932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98560 + }, + { + "epoch": 0.4780466867853337, + "grad_norm": 5.697102096746676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98570 + }, + { + "epoch": 0.4780951849781698, + "grad_norm": 5.404455805546604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98580 + }, + { + "epoch": 0.47814368317100586, + "grad_norm": 9.220665560860652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98590 + }, + { + "epoch": 0.47819218136384195, + "grad_norm": 9.164935363514815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98600 + }, + { + "epoch": 0.47824067955667804, + "grad_norm": 4.56438601759146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98610 + }, + { + "epoch": 0.4782891777495141, + "grad_norm": 5.121904450788861e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98620 + }, + { + "epoch": 0.4783376759423502, + "grad_norm": 4.765855919686146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98630 + }, + { + "epoch": 0.4783861741351863, + "grad_norm": 8.328457624884322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98640 + }, + { + "epoch": 0.4784346723280224, + "grad_norm": 7.912258297437802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98650 + }, + { + "epoch": 0.4784831705208585, + "grad_norm": 4.2273231883882545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98660 + }, + { + "epoch": 0.47853166871369457, + "grad_norm": 3.977625055995304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98670 + }, + { + "epoch": 0.47858016690653066, + "grad_norm": 4.026651822641725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98680 + }, + { + "epoch": 0.47862866509936675, + "grad_norm": 7.253686817421112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98690 + }, + { + "epoch": 0.47867716329220283, + "grad_norm": 7.0693326961190905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98700 + }, + { + "epoch": 0.4787256614850389, + "grad_norm": 3.742344688362209e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98710 + }, + { + "epoch": 0.478774159677875, + "grad_norm": 3.6211170026945183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98720 + }, + { + "epoch": 0.4788226578707111, + "grad_norm": 3.3813296340667875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98730 + }, + { + "epoch": 0.4788711560635472, + "grad_norm": 6.3768170548428316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98740 + }, + { + "epoch": 0.4789196542563833, + "grad_norm": 6.3742659222043585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98750 + }, + { + "epoch": 0.47896815244921936, + "grad_norm": 3.1891111120785354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98760 + }, + { + "epoch": 0.47901665064205545, + "grad_norm": 1.3653313544637058e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98770 + }, + { + "epoch": 0.47906514883489154, + "grad_norm": 4.555402847472578e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 98780 + }, + { + "epoch": 0.47911364702772763, + "grad_norm": 0.00020334753207862377, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98790 + }, + { + "epoch": 0.4791621452205637, + "grad_norm": 0.00012598212924785912, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98800 + }, + { + "epoch": 0.4792106434133998, + "grad_norm": 3.358635149197653e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 98810 + }, + { + "epoch": 0.4792591416062359, + "grad_norm": 0.0012355037033557892, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 98820 + }, + { + "epoch": 0.479307639799072, + "grad_norm": 5.258618330117315e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 98830 + }, + { + "epoch": 0.47935613799190807, + "grad_norm": 0.0003587016253732145, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98840 + }, + { + "epoch": 0.47940463618474416, + "grad_norm": 0.00025613681646063924, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98850 + }, + { + "epoch": 0.47945313437758025, + "grad_norm": 6.541165203088894e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98860 + }, + { + "epoch": 0.47950163257041634, + "grad_norm": 4.348560105427168e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98870 + }, + { + "epoch": 0.4795501307632524, + "grad_norm": 3.657584238681011e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98880 + }, + { + "epoch": 0.4795986289560885, + "grad_norm": 0.00012953609984833747, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 98890 + }, + { + "epoch": 0.4796471271489246, + "grad_norm": 0.03670140355825424, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 98900 + }, + { + "epoch": 0.4796956253417607, + "grad_norm": 0.002177106449380517, + "learning_rate": 0.0002, + "loss": 0.0054, + "step": 98910 + }, + { + "epoch": 0.4797441235345968, + "grad_norm": 0.0007923998637124896, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 98920 + }, + { + "epoch": 0.47979262172743287, + "grad_norm": 0.00027336410130374134, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 98930 + }, + { + "epoch": 0.47984111992026895, + "grad_norm": 0.00028624472906813025, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98940 + }, + { + "epoch": 0.47988961811310504, + "grad_norm": 0.0023048571310937405, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 98950 + }, + { + "epoch": 0.47993811630594113, + "grad_norm": 0.00026162073481827974, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 98960 + }, + { + "epoch": 0.4799866144987772, + "grad_norm": 0.0003279481898061931, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 98970 + }, + { + "epoch": 0.4800351126916133, + "grad_norm": 0.0002982252335641533, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 98980 + }, + { + "epoch": 0.4800836108844494, + "grad_norm": 0.0001834139256970957, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 98990 + }, + { + "epoch": 0.4801321090772855, + "grad_norm": 9.599570330465212e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99000 + }, + { + "epoch": 0.4801806072701216, + "grad_norm": 4.822049959329888e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99010 + }, + { + "epoch": 0.48022910546295766, + "grad_norm": 5.055691144661978e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99020 + }, + { + "epoch": 0.48027760365579375, + "grad_norm": 0.00016170938033610582, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99030 + }, + { + "epoch": 0.48032610184862984, + "grad_norm": 5.885774226044305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99040 + }, + { + "epoch": 0.480374600041466, + "grad_norm": 5.183819666854106e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99050 + }, + { + "epoch": 0.48042309823430207, + "grad_norm": 2.5107499823207036e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99060 + }, + { + "epoch": 0.48047159642713816, + "grad_norm": 2.3958571546245366e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99070 + }, + { + "epoch": 0.48052009461997425, + "grad_norm": 1.8660233763512224e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99080 + }, + { + "epoch": 0.48056859281281034, + "grad_norm": 2.2317735783872195e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99090 + }, + { + "epoch": 0.4806170910056464, + "grad_norm": 2.274583312100731e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99100 + }, + { + "epoch": 0.4806655891984825, + "grad_norm": 0.000577298691496253, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99110 + }, + { + "epoch": 0.4807140873913186, + "grad_norm": 1.877459180832375e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99120 + }, + { + "epoch": 0.4807625855841547, + "grad_norm": 1.1908527994819451e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99130 + }, + { + "epoch": 0.4808110837769908, + "grad_norm": 1.496641561971046e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99140 + }, + { + "epoch": 0.48085958196982687, + "grad_norm": 1.4712000847794116e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99150 + }, + { + "epoch": 0.48090808016266295, + "grad_norm": 9.18797104532132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99160 + }, + { + "epoch": 0.48095657835549904, + "grad_norm": 8.904647984309122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99170 + }, + { + "epoch": 0.48100507654833513, + "grad_norm": 1.013761902868282e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99180 + }, + { + "epoch": 0.4810535747411712, + "grad_norm": 1.1238812476221938e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99190 + }, + { + "epoch": 0.4811020729340073, + "grad_norm": 1.0117847523360979e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99200 + }, + { + "epoch": 0.4811505711268434, + "grad_norm": 9.213994417223148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99210 + }, + { + "epoch": 0.4811990693196795, + "grad_norm": 8.30057524581207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99220 + }, + { + "epoch": 0.4812475675125156, + "grad_norm": 9.496741768089123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99230 + }, + { + "epoch": 0.48129606570535166, + "grad_norm": 1.0567695426288992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99240 + }, + { + "epoch": 0.48134456389818775, + "grad_norm": 1.7980610209633596e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99250 + }, + { + "epoch": 0.48139306209102384, + "grad_norm": 8.345745300175622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99260 + }, + { + "epoch": 0.4814415602838599, + "grad_norm": 7.178550731623545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99270 + }, + { + "epoch": 0.481490058476696, + "grad_norm": 3.844344973913394e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99280 + }, + { + "epoch": 0.4815385566695321, + "grad_norm": 8.755958333495073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99290 + }, + { + "epoch": 0.4815870548623682, + "grad_norm": 8.54668269312242e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99300 + }, + { + "epoch": 0.4816355530552043, + "grad_norm": 9.363699973619077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99310 + }, + { + "epoch": 0.48168405124804037, + "grad_norm": 7.011689831415424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99320 + }, + { + "epoch": 0.48173254944087646, + "grad_norm": 6.080757430027006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99330 + }, + { + "epoch": 0.48178104763371254, + "grad_norm": 9.916625458572526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99340 + }, + { + "epoch": 0.48182954582654863, + "grad_norm": 9.344529644295108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99350 + }, + { + "epoch": 0.4818780440193847, + "grad_norm": 1.0678052603907418e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99360 + }, + { + "epoch": 0.4819265422122208, + "grad_norm": 6.397626293619396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99370 + }, + { + "epoch": 0.4819750404050569, + "grad_norm": 5.858386884938227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99380 + }, + { + "epoch": 0.482023538597893, + "grad_norm": 8.821138180792332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99390 + }, + { + "epoch": 0.4820720367907291, + "grad_norm": 7.102233666955726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99400 + }, + { + "epoch": 0.48212053498356516, + "grad_norm": 4.733749392471509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99410 + }, + { + "epoch": 0.48216903317640125, + "grad_norm": 5.437937488750322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99420 + }, + { + "epoch": 0.48221753136923734, + "grad_norm": 5.075889475847362e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99430 + }, + { + "epoch": 0.48226602956207343, + "grad_norm": 5.743605925090378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99440 + }, + { + "epoch": 0.4823145277549095, + "grad_norm": 5.550513378693722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99450 + }, + { + "epoch": 0.4823630259477456, + "grad_norm": 4.838308541366132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99460 + }, + { + "epoch": 0.4824115241405817, + "grad_norm": 4.562918093142798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99470 + }, + { + "epoch": 0.4824600223334178, + "grad_norm": 4.971619091520552e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99480 + }, + { + "epoch": 0.48250852052625387, + "grad_norm": 6.164505521155661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99490 + }, + { + "epoch": 0.48255701871908996, + "grad_norm": 5.315876023814781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99500 + }, + { + "epoch": 0.48260551691192605, + "grad_norm": 3.979161192546599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99510 + }, + { + "epoch": 0.48265401510476214, + "grad_norm": 4.811583494301885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99520 + }, + { + "epoch": 0.4827025132975982, + "grad_norm": 3.5358520108275115e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99530 + }, + { + "epoch": 0.4827510114904343, + "grad_norm": 5.0660596571106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99540 + }, + { + "epoch": 0.4827995096832704, + "grad_norm": 5.21556057719863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99550 + }, + { + "epoch": 0.48284800787610654, + "grad_norm": 4.027092472824734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99560 + }, + { + "epoch": 0.48289650606894263, + "grad_norm": 8.912410521588754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99570 + }, + { + "epoch": 0.4829450042617787, + "grad_norm": 3.706335292008589e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99580 + }, + { + "epoch": 0.4829935024546148, + "grad_norm": 4.3297272895870265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99590 + }, + { + "epoch": 0.4830420006474509, + "grad_norm": 4.5388169382931665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99600 + }, + { + "epoch": 0.483090498840287, + "grad_norm": 3.1374850095744478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99610 + }, + { + "epoch": 0.4831389970331231, + "grad_norm": 6.33218087386922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99620 + }, + { + "epoch": 0.48318749522595916, + "grad_norm": 4.008224550489103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99630 + }, + { + "epoch": 0.48323599341879525, + "grad_norm": 5.49428841623012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99640 + }, + { + "epoch": 0.48328449161163134, + "grad_norm": 3.967822976846946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99650 + }, + { + "epoch": 0.48333298980446743, + "grad_norm": 3.1512211080553243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99660 + }, + { + "epoch": 0.4833814879973035, + "grad_norm": 3.691003030326101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99670 + }, + { + "epoch": 0.4834299861901396, + "grad_norm": 7.3391233854636084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99680 + }, + { + "epoch": 0.4834784843829757, + "grad_norm": 4.355898454377893e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99690 + }, + { + "epoch": 0.4835269825758118, + "grad_norm": 3.5951850350102177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99700 + }, + { + "epoch": 0.48357548076864787, + "grad_norm": 2.826045829351642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99710 + }, + { + "epoch": 0.48362397896148396, + "grad_norm": 5.799704013043083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99720 + }, + { + "epoch": 0.48367247715432005, + "grad_norm": 3.2558041311858688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99730 + }, + { + "epoch": 0.48372097534715613, + "grad_norm": 4.158406227361411e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99740 + }, + { + "epoch": 0.4837694735399922, + "grad_norm": 3.761550487979548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99750 + }, + { + "epoch": 0.4838179717328283, + "grad_norm": 3.136524810543051e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99760 + }, + { + "epoch": 0.4838664699256644, + "grad_norm": 7.013869890215574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99770 + }, + { + "epoch": 0.4839149681185005, + "grad_norm": 2.7620726541499607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99780 + }, + { + "epoch": 0.4839634663113366, + "grad_norm": 5.634104581986321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99790 + }, + { + "epoch": 0.48401196450417266, + "grad_norm": 3.7669988159905188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99800 + }, + { + "epoch": 0.48406046269700875, + "grad_norm": 2.613262495287927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99810 + }, + { + "epoch": 0.48410896088984484, + "grad_norm": 5.095670985610923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99820 + }, + { + "epoch": 0.48415745908268093, + "grad_norm": 2.1196437955950387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99830 + }, + { + "epoch": 0.484205957275517, + "grad_norm": 5.352486368792597e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99840 + }, + { + "epoch": 0.4842544554683531, + "grad_norm": 3.4143415632570395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99850 + }, + { + "epoch": 0.4843029536611892, + "grad_norm": 2.6132884158869274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99860 + }, + { + "epoch": 0.4843514518540253, + "grad_norm": 2.2133547190605896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99870 + }, + { + "epoch": 0.48439995004686137, + "grad_norm": 2.259285565742175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99880 + }, + { + "epoch": 0.48444844823969746, + "grad_norm": 3.1062443213158986e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99890 + }, + { + "epoch": 0.48449694643253355, + "grad_norm": 3.089843175985152e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99900 + }, + { + "epoch": 0.48454544462536964, + "grad_norm": 2.9292232284205966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99910 + }, + { + "epoch": 0.4845939428182057, + "grad_norm": 2.631230472616153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99920 + }, + { + "epoch": 0.4846424410110418, + "grad_norm": 3.964767074648989e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99930 + }, + { + "epoch": 0.4846909392038779, + "grad_norm": 4.6220182412071154e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 99940 + }, + { + "epoch": 0.484739437396714, + "grad_norm": 1.4168421330396086e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99950 + }, + { + "epoch": 0.4847879355895501, + "grad_norm": 4.901981355942553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99960 + }, + { + "epoch": 0.48483643378238617, + "grad_norm": 4.859492491959827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99970 + }, + { + "epoch": 0.48488493197522226, + "grad_norm": 1.577288639964536e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99980 + }, + { + "epoch": 0.48493343016805834, + "grad_norm": 1.0441952326800674e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 99990 + }, + { + "epoch": 0.48498192836089443, + "grad_norm": 9.945339115802199e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100000 + }, + { + "epoch": 0.4850304265537305, + "grad_norm": 3.0296952900243923e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100010 + }, + { + "epoch": 0.4850789247465666, + "grad_norm": 5.310352207743563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100020 + }, + { + "epoch": 0.4851274229394027, + "grad_norm": 3.81025483875419e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100030 + }, + { + "epoch": 0.4851759211322388, + "grad_norm": 8.399696525884792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100040 + }, + { + "epoch": 0.4852244193250749, + "grad_norm": 7.330181688303128e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100050 + }, + { + "epoch": 0.48527291751791096, + "grad_norm": 8.733311005926225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100060 + }, + { + "epoch": 0.48532141571074705, + "grad_norm": 4.63633296021726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100070 + }, + { + "epoch": 0.4853699139035832, + "grad_norm": 5.429352313512936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100080 + }, + { + "epoch": 0.4854184120964193, + "grad_norm": 5.923508069827221e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100090 + }, + { + "epoch": 0.48546691028925537, + "grad_norm": 6.504060365841724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100100 + }, + { + "epoch": 0.48551540848209146, + "grad_norm": 4.627038379112491e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100110 + }, + { + "epoch": 0.48556390667492755, + "grad_norm": 3.547853111740551e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100120 + }, + { + "epoch": 0.48561240486776364, + "grad_norm": 3.3439757771702716e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100130 + }, + { + "epoch": 0.4856609030605997, + "grad_norm": 5.005327693652362e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100140 + }, + { + "epoch": 0.4857094012534358, + "grad_norm": 5.69055873711477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100150 + }, + { + "epoch": 0.4857578994462719, + "grad_norm": 2.660118980202242e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100160 + }, + { + "epoch": 0.485806397639108, + "grad_norm": 2.9221109798527323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100170 + }, + { + "epoch": 0.4858548958319441, + "grad_norm": 2.442383902234724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100180 + }, + { + "epoch": 0.48590339402478017, + "grad_norm": 4.633170647139195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100190 + }, + { + "epoch": 0.48595189221761625, + "grad_norm": 4.344362878327956e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100200 + }, + { + "epoch": 0.48600039041045234, + "grad_norm": 2.718906216614414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100210 + }, + { + "epoch": 0.48604888860328843, + "grad_norm": 1.926804725371767e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100220 + }, + { + "epoch": 0.4860973867961245, + "grad_norm": 2.803393044814584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100230 + }, + { + "epoch": 0.4861458849889606, + "grad_norm": 3.83705355488928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100240 + }, + { + "epoch": 0.4861943831817967, + "grad_norm": 3.5925820611737436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100250 + }, + { + "epoch": 0.4862428813746328, + "grad_norm": 2.1298876617947826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100260 + }, + { + "epoch": 0.4862913795674689, + "grad_norm": 2.0787358607776696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100270 + }, + { + "epoch": 0.48633987776030496, + "grad_norm": 2.3465556751034455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100280 + }, + { + "epoch": 0.48638837595314105, + "grad_norm": 3.789962420341908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100290 + }, + { + "epoch": 0.48643687414597714, + "grad_norm": 3.684442390294862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100300 + }, + { + "epoch": 0.4864853723388132, + "grad_norm": 2.055934146483196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100310 + }, + { + "epoch": 0.4865338705316493, + "grad_norm": 2.0986017261748202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100320 + }, + { + "epoch": 0.4865823687244854, + "grad_norm": 1.9222404716856545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100330 + }, + { + "epoch": 0.4866308669173215, + "grad_norm": 3.550267820173758e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100340 + }, + { + "epoch": 0.4866793651101576, + "grad_norm": 3.6696378629130777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100350 + }, + { + "epoch": 0.48672786330299367, + "grad_norm": 1.9584119854698656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100360 + }, + { + "epoch": 0.48677636149582976, + "grad_norm": 2.2726455881638685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100370 + }, + { + "epoch": 0.48682485968866585, + "grad_norm": 1.8465906350684236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100380 + }, + { + "epoch": 0.48687335788150193, + "grad_norm": 3.2453281164634973e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100390 + }, + { + "epoch": 0.486921856074338, + "grad_norm": 3.4535678423708305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100400 + }, + { + "epoch": 0.4869703542671741, + "grad_norm": 0.35638073086738586, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 100410 + }, + { + "epoch": 0.4870188524600102, + "grad_norm": 5.876703653484583e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 100420 + }, + { + "epoch": 0.4870673506528463, + "grad_norm": 7.5078351073898375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100430 + }, + { + "epoch": 0.4871158488456824, + "grad_norm": 1.6089690689113922e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100440 + }, + { + "epoch": 0.48716434703851846, + "grad_norm": 5.8482197346165776e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100450 + }, + { + "epoch": 0.48721284523135455, + "grad_norm": 5.889576641493477e-06, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 100460 + }, + { + "epoch": 0.48726134342419064, + "grad_norm": 0.025689730420708656, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100470 + }, + { + "epoch": 0.48730984161702673, + "grad_norm": 9.551749099045992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100480 + }, + { + "epoch": 0.4873583398098628, + "grad_norm": 0.00018044511671178043, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100490 + }, + { + "epoch": 0.4874068380026989, + "grad_norm": 7.0374553615693e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100500 + }, + { + "epoch": 0.487455336195535, + "grad_norm": 6.816575478296727e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100510 + }, + { + "epoch": 0.4875038343883711, + "grad_norm": 5.972523922537221e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100520 + }, + { + "epoch": 0.48755233258120717, + "grad_norm": 6.389091595337959e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100530 + }, + { + "epoch": 0.48760083077404326, + "grad_norm": 2.3014043108560145e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100540 + }, + { + "epoch": 0.48764932896687935, + "grad_norm": 1.290706313739065e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100550 + }, + { + "epoch": 0.48769782715971544, + "grad_norm": 7.298829132196261e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 100560 + }, + { + "epoch": 0.4877463253525515, + "grad_norm": 2.693426540645305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100570 + }, + { + "epoch": 0.4877948235453876, + "grad_norm": 2.8454073799366597e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100580 + }, + { + "epoch": 0.48784332173822376, + "grad_norm": 8.571575563109946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100590 + }, + { + "epoch": 0.48789181993105984, + "grad_norm": 2.9379034458543174e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100600 + }, + { + "epoch": 0.48794031812389593, + "grad_norm": 2.695044940992375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100610 + }, + { + "epoch": 0.487988816316732, + "grad_norm": 3.6879821436741622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100620 + }, + { + "epoch": 0.4880373145095681, + "grad_norm": 2.5847348297247663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100630 + }, + { + "epoch": 0.4880858127024042, + "grad_norm": 5.9998087635904085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100640 + }, + { + "epoch": 0.4881343108952403, + "grad_norm": 6.797092737542698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100650 + }, + { + "epoch": 0.4881828090880764, + "grad_norm": 2.2887868453835836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100660 + }, + { + "epoch": 0.48823130728091246, + "grad_norm": 2.7778619369200896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100670 + }, + { + "epoch": 0.48827980547374855, + "grad_norm": 5.229252565186471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100680 + }, + { + "epoch": 0.48832830366658464, + "grad_norm": 6.743162430211669e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100690 + }, + { + "epoch": 0.48837680185942073, + "grad_norm": 4.798261670657666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100700 + }, + { + "epoch": 0.4884253000522568, + "grad_norm": 2.107032059939229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100710 + }, + { + "epoch": 0.4884737982450929, + "grad_norm": 2.206145836680662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100720 + }, + { + "epoch": 0.488522296437929, + "grad_norm": 1.9214824078517267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100730 + }, + { + "epoch": 0.4885707946307651, + "grad_norm": 4.26606720793643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100740 + }, + { + "epoch": 0.48861929282360117, + "grad_norm": 4.035879555885913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100750 + }, + { + "epoch": 0.48866779101643726, + "grad_norm": 1.9659094050439307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100760 + }, + { + "epoch": 0.48871628920927335, + "grad_norm": 1.744288852023601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100770 + }, + { + "epoch": 0.48876478740210944, + "grad_norm": 1.805497049645055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100780 + }, + { + "epoch": 0.4888132855949455, + "grad_norm": 5.00084343002527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100790 + }, + { + "epoch": 0.4888617837877816, + "grad_norm": 3.8942125684116036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100800 + }, + { + "epoch": 0.4889102819806177, + "grad_norm": 1.7558891158842016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100810 + }, + { + "epoch": 0.4889587801734538, + "grad_norm": 2.009798890867387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100820 + }, + { + "epoch": 0.4890072783662899, + "grad_norm": 2.230171276096371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100830 + }, + { + "epoch": 0.48905577655912597, + "grad_norm": 6.038769697624957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100840 + }, + { + "epoch": 0.48910427475196205, + "grad_norm": 3.5012456010008464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100850 + }, + { + "epoch": 0.48915277294479814, + "grad_norm": 1.9170634004694875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100860 + }, + { + "epoch": 0.48920127113763423, + "grad_norm": 1.4412345308301155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100870 + }, + { + "epoch": 0.4892497693304703, + "grad_norm": 1.5071582311065868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100880 + }, + { + "epoch": 0.4892982675233064, + "grad_norm": 3.831030426226789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100890 + }, + { + "epoch": 0.4893467657161425, + "grad_norm": 5.3088233471498825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100900 + }, + { + "epoch": 0.4893952639089786, + "grad_norm": 1.5529526535829064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100910 + }, + { + "epoch": 0.48944376210181467, + "grad_norm": 1.7382278656441486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100920 + }, + { + "epoch": 0.48949226029465076, + "grad_norm": 1.7620502603676869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100930 + }, + { + "epoch": 0.48954075848748685, + "grad_norm": 3.115869276371086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100940 + }, + { + "epoch": 0.48958925668032294, + "grad_norm": 2.994810301970574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100950 + }, + { + "epoch": 0.489637754873159, + "grad_norm": 1.3697679150936892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100960 + }, + { + "epoch": 0.4896862530659951, + "grad_norm": 1.484686094954668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100970 + }, + { + "epoch": 0.4897347512588312, + "grad_norm": 1.379353534503025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100980 + }, + { + "epoch": 0.4897832494516673, + "grad_norm": 3.011349008374964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 100990 + }, + { + "epoch": 0.4898317476445034, + "grad_norm": 2.9978034490341088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101000 + }, + { + "epoch": 0.48988024583733947, + "grad_norm": 1.2223656540299999e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101010 + }, + { + "epoch": 0.48992874403017556, + "grad_norm": 1.0713143865359598e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101020 + }, + { + "epoch": 0.48997724222301164, + "grad_norm": 1.0787516657728702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101030 + }, + { + "epoch": 0.49002574041584773, + "grad_norm": 3.215902779629687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101040 + }, + { + "epoch": 0.4900742386086838, + "grad_norm": 2.487463007128099e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101050 + }, + { + "epoch": 0.4901227368015199, + "grad_norm": 1.0858484529308043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101060 + }, + { + "epoch": 0.490171234994356, + "grad_norm": 1.3060694072919432e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101070 + }, + { + "epoch": 0.4902197331871921, + "grad_norm": 1.2817286005883943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101080 + }, + { + "epoch": 0.4902682313800282, + "grad_norm": 2.304898998772842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101090 + }, + { + "epoch": 0.4903167295728643, + "grad_norm": 2.53365305979969e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101100 + }, + { + "epoch": 0.4903652277657004, + "grad_norm": 1.7047328810804174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101110 + }, + { + "epoch": 0.4904137259585365, + "grad_norm": 1.4313228575701942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101120 + }, + { + "epoch": 0.4904622241513726, + "grad_norm": 1.3303532568897936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101130 + }, + { + "epoch": 0.49051072234420867, + "grad_norm": 2.3222562504088273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101140 + }, + { + "epoch": 0.49055922053704476, + "grad_norm": 2.0726183720398694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101150 + }, + { + "epoch": 0.49060771872988085, + "grad_norm": 1.369915025861701e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101160 + }, + { + "epoch": 0.49065621692271694, + "grad_norm": 1.1169042863912182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101170 + }, + { + "epoch": 0.490704715115553, + "grad_norm": 1.0693787544369115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101180 + }, + { + "epoch": 0.4907532133083891, + "grad_norm": 2.16454145629541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101190 + }, + { + "epoch": 0.4908017115012252, + "grad_norm": 2.043915856120293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101200 + }, + { + "epoch": 0.4908502096940613, + "grad_norm": 1.0075685850097216e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101210 + }, + { + "epoch": 0.4908987078868974, + "grad_norm": 8.782130294093804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101220 + }, + { + "epoch": 0.49094720607973347, + "grad_norm": 9.71673330241174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101230 + }, + { + "epoch": 0.49099570427256956, + "grad_norm": 2.373375082243001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101240 + }, + { + "epoch": 0.49104420246540564, + "grad_norm": 1.9597282516770065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101250 + }, + { + "epoch": 0.49109270065824173, + "grad_norm": 8.232311188294261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101260 + }, + { + "epoch": 0.4911411988510778, + "grad_norm": 1.0336751756767626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101270 + }, + { + "epoch": 0.4911896970439139, + "grad_norm": 1.0412416031613247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101280 + }, + { + "epoch": 0.49123819523675, + "grad_norm": 1.8826225414159126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101290 + }, + { + "epoch": 0.4912866934295861, + "grad_norm": 1.9179512946720934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101300 + }, + { + "epoch": 0.4913351916224222, + "grad_norm": 7.843927392059413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101310 + }, + { + "epoch": 0.49138368981525826, + "grad_norm": 8.863778475642903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101320 + }, + { + "epoch": 0.49143218800809435, + "grad_norm": 7.893102633715898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101330 + }, + { + "epoch": 0.49148068620093044, + "grad_norm": 1.6852110320542124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101340 + }, + { + "epoch": 0.4915291843937665, + "grad_norm": 1.7003910670609912e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101350 + }, + { + "epoch": 0.4915776825866026, + "grad_norm": 1.0084355608341866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101360 + }, + { + "epoch": 0.4916261807794387, + "grad_norm": 7.700295441281924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101370 + }, + { + "epoch": 0.4916746789722748, + "grad_norm": 8.319112225763092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101380 + }, + { + "epoch": 0.4917231771651109, + "grad_norm": 1.8283404870089726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101390 + }, + { + "epoch": 0.49177167535794697, + "grad_norm": 1.7991255845117848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101400 + }, + { + "epoch": 0.49182017355078306, + "grad_norm": 1.1692536645568907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101410 + }, + { + "epoch": 0.49186867174361915, + "grad_norm": 9.934899480867898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101420 + }, + { + "epoch": 0.49191716993645523, + "grad_norm": 8.268279430012626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101430 + }, + { + "epoch": 0.4919656681292913, + "grad_norm": 1.6188204199352185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101440 + }, + { + "epoch": 0.4920141663221274, + "grad_norm": 2.120874569300213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101450 + }, + { + "epoch": 0.4920626645149635, + "grad_norm": 7.060560847094166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101460 + }, + { + "epoch": 0.4921111627077996, + "grad_norm": 7.421835448440106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101470 + }, + { + "epoch": 0.4921596609006357, + "grad_norm": 1.0468478421898908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101480 + }, + { + "epoch": 0.49220815909347176, + "grad_norm": 1.5980384659997071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101490 + }, + { + "epoch": 0.49225665728630785, + "grad_norm": 1.4218569504009793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101500 + }, + { + "epoch": 0.49230515547914394, + "grad_norm": 7.655162903574819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101510 + }, + { + "epoch": 0.49235365367198003, + "grad_norm": 7.171836386987707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101520 + }, + { + "epoch": 0.4924021518648161, + "grad_norm": 7.564557904515823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101530 + }, + { + "epoch": 0.4924506500576522, + "grad_norm": 1.2437357099770452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101540 + }, + { + "epoch": 0.4924991482504883, + "grad_norm": 1.6702445009286748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101550 + }, + { + "epoch": 0.4925476464433244, + "grad_norm": 6.326660013655783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101560 + }, + { + "epoch": 0.49259614463616047, + "grad_norm": 9.869776249615825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101570 + }, + { + "epoch": 0.49264464282899656, + "grad_norm": 5.830695499753347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101580 + }, + { + "epoch": 0.49269314102183265, + "grad_norm": 1.3827503835273092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101590 + }, + { + "epoch": 0.49274163921466874, + "grad_norm": 1.3121108395353076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101600 + }, + { + "epoch": 0.4927901374075049, + "grad_norm": 6.836781949459692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101610 + }, + { + "epoch": 0.49283863560034097, + "grad_norm": 2.3836821583245182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101620 + }, + { + "epoch": 0.49288713379317706, + "grad_norm": 8.609661676928226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101630 + }, + { + "epoch": 0.49293563198601315, + "grad_norm": 1.675786961641279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101640 + }, + { + "epoch": 0.49298413017884923, + "grad_norm": 1.2709704151347978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101650 + }, + { + "epoch": 0.4930326283716853, + "grad_norm": 8.910254223337688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101660 + }, + { + "epoch": 0.4930811265645214, + "grad_norm": 5.999320364935556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101670 + }, + { + "epoch": 0.4931296247573575, + "grad_norm": 7.192468842731614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101680 + }, + { + "epoch": 0.4931781229501936, + "grad_norm": 1.288220801143325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101690 + }, + { + "epoch": 0.4932266211430297, + "grad_norm": 1.2960251751792384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101700 + }, + { + "epoch": 0.49327511933586576, + "grad_norm": 5.681966399606608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101710 + }, + { + "epoch": 0.49332361752870185, + "grad_norm": 6.158524570309964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101720 + }, + { + "epoch": 0.49337211572153794, + "grad_norm": 9.876878266368294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101730 + }, + { + "epoch": 0.49342061391437403, + "grad_norm": 1.2505610129665001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101740 + }, + { + "epoch": 0.4934691121072101, + "grad_norm": 1.0841899893421214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101750 + }, + { + "epoch": 0.4935176103000462, + "grad_norm": 7.917300308690756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101760 + }, + { + "epoch": 0.4935661084928823, + "grad_norm": 5.6568831041659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101770 + }, + { + "epoch": 0.4936146066857184, + "grad_norm": 6.811301886955334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101780 + }, + { + "epoch": 0.49366310487855447, + "grad_norm": 1.115472173296439e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101790 + }, + { + "epoch": 0.49371160307139056, + "grad_norm": 1.1741992693714565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101800 + }, + { + "epoch": 0.49376010126422665, + "grad_norm": 5.680722097167745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101810 + }, + { + "epoch": 0.49380859945706274, + "grad_norm": 5.725563596570282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101820 + }, + { + "epoch": 0.4938570976498988, + "grad_norm": 5.498415589499928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101830 + }, + { + "epoch": 0.4939055958427349, + "grad_norm": 1.1216253597012837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101840 + }, + { + "epoch": 0.493954094035571, + "grad_norm": 1.1849447218992282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101850 + }, + { + "epoch": 0.4940025922284071, + "grad_norm": 6.19426373305032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101860 + }, + { + "epoch": 0.4940510904212432, + "grad_norm": 5.156728661859233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101870 + }, + { + "epoch": 0.49409958861407927, + "grad_norm": 4.78532342640392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101880 + }, + { + "epoch": 0.49414808680691535, + "grad_norm": 9.8798818726209e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101890 + }, + { + "epoch": 0.49419658499975144, + "grad_norm": 1.0097521681018407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101900 + }, + { + "epoch": 0.49424508319258753, + "grad_norm": 4.918503577755473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101910 + }, + { + "epoch": 0.4942935813854236, + "grad_norm": 5.503548550223059e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101920 + }, + { + "epoch": 0.4943420795782597, + "grad_norm": 5.022816367272753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101930 + }, + { + "epoch": 0.4943905777710958, + "grad_norm": 8.82373797139735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101940 + }, + { + "epoch": 0.4944390759639319, + "grad_norm": 1.0047112937172642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101950 + }, + { + "epoch": 0.494487574156768, + "grad_norm": 7.33379920347943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101960 + }, + { + "epoch": 0.49453607234960406, + "grad_norm": 7.021060355327791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101970 + }, + { + "epoch": 0.49458457054244015, + "grad_norm": 5.450531261885772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101980 + }, + { + "epoch": 0.49463306873527624, + "grad_norm": 9.153605446954316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 101990 + }, + { + "epoch": 0.4946815669281123, + "grad_norm": 9.38511107051454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102000 + }, + { + "epoch": 0.4947300651209484, + "grad_norm": 4.2303190639358945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102010 + }, + { + "epoch": 0.4947785633137845, + "grad_norm": 3.962754249187128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102020 + }, + { + "epoch": 0.4948270615066206, + "grad_norm": 5.972739245407865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102030 + }, + { + "epoch": 0.4948755596994567, + "grad_norm": 7.823919077054597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102040 + }, + { + "epoch": 0.49492405789229277, + "grad_norm": 1.0448618468217319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102050 + }, + { + "epoch": 0.49497255608512886, + "grad_norm": 2.598708533696481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102060 + }, + { + "epoch": 0.49502105427796494, + "grad_norm": 4.5671137627323333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102070 + }, + { + "epoch": 0.49506955247080103, + "grad_norm": 4.205950006053172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102080 + }, + { + "epoch": 0.4951180506636371, + "grad_norm": 7.746673418296268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102090 + }, + { + "epoch": 0.4951665488564732, + "grad_norm": 8.254019121523015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102100 + }, + { + "epoch": 0.4952150470493093, + "grad_norm": 4.086010676473961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102110 + }, + { + "epoch": 0.49526354524214544, + "grad_norm": 4.73748571039323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102120 + }, + { + "epoch": 0.49531204343498153, + "grad_norm": 4.6031803435653273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102130 + }, + { + "epoch": 0.4953605416278176, + "grad_norm": 8.974683964879659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102140 + }, + { + "epoch": 0.4954090398206537, + "grad_norm": 8.266461009043269e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102150 + }, + { + "epoch": 0.4954575380134898, + "grad_norm": 6.22630125235446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102160 + }, + { + "epoch": 0.4955060362063259, + "grad_norm": 4.4624638917412085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102170 + }, + { + "epoch": 0.49555453439916197, + "grad_norm": 4.0227874364973104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102180 + }, + { + "epoch": 0.49560303259199806, + "grad_norm": 7.380171496151888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102190 + }, + { + "epoch": 0.49565153078483415, + "grad_norm": 9.96297899291676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102200 + }, + { + "epoch": 0.49570002897767024, + "grad_norm": 4.023588928703248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102210 + }, + { + "epoch": 0.4957485271705063, + "grad_norm": 5.501817668118747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102220 + }, + { + "epoch": 0.4957970253633424, + "grad_norm": 4.4640933083428536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102230 + }, + { + "epoch": 0.4958455235561785, + "grad_norm": 7.927641831884102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102240 + }, + { + "epoch": 0.4958940217490146, + "grad_norm": 6.959448342058749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102250 + }, + { + "epoch": 0.4959425199418507, + "grad_norm": 3.9711707700007537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102260 + }, + { + "epoch": 0.49599101813468677, + "grad_norm": 6.398277605512703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102270 + }, + { + "epoch": 0.49603951632752286, + "grad_norm": 5.105802301841322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102280 + }, + { + "epoch": 0.49608801452035894, + "grad_norm": 2.377110149609507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102290 + }, + { + "epoch": 0.49613651271319503, + "grad_norm": 3.4428735489200335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102300 + }, + { + "epoch": 0.4961850109060311, + "grad_norm": 1.1527241667863564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102310 + }, + { + "epoch": 0.4962335090988672, + "grad_norm": 8.774912316766859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102320 + }, + { + "epoch": 0.4962820072917033, + "grad_norm": 7.995188866516401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102330 + }, + { + "epoch": 0.4963305054845394, + "grad_norm": 1.0499519476070418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102340 + }, + { + "epoch": 0.4963790036773755, + "grad_norm": 1.546928729112551e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102350 + }, + { + "epoch": 0.49642750187021156, + "grad_norm": 8.206904453800234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102360 + }, + { + "epoch": 0.49647600006304765, + "grad_norm": 2.03526519726438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102370 + }, + { + "epoch": 0.49652449825588374, + "grad_norm": 9.173969033327012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102380 + }, + { + "epoch": 0.49657299644871983, + "grad_norm": 1.1776890005421592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102390 + }, + { + "epoch": 0.4966214946415559, + "grad_norm": 2.2617855393036734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102400 + }, + { + "epoch": 0.496669992834392, + "grad_norm": 8.033898666326422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102410 + }, + { + "epoch": 0.4967184910272281, + "grad_norm": 5.26404448919493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102420 + }, + { + "epoch": 0.4967669892200642, + "grad_norm": 5.58723741050926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102430 + }, + { + "epoch": 0.49681548741290027, + "grad_norm": 9.701004728412954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102440 + }, + { + "epoch": 0.49686398560573636, + "grad_norm": 3.5067509429609345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102450 + }, + { + "epoch": 0.49691248379857245, + "grad_norm": 6.306456725724274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102460 + }, + { + "epoch": 0.49696098199140853, + "grad_norm": 3.8402973245865724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102470 + }, + { + "epoch": 0.4970094801842446, + "grad_norm": 4.908330311081954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102480 + }, + { + "epoch": 0.4970579783770807, + "grad_norm": 3.206248493370367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102490 + }, + { + "epoch": 0.4971064765699168, + "grad_norm": 3.1182878501567757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102500 + }, + { + "epoch": 0.4971549747627529, + "grad_norm": 3.9158209119705134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102510 + }, + { + "epoch": 0.497203472955589, + "grad_norm": 3.2705023045309645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102520 + }, + { + "epoch": 0.49725197114842506, + "grad_norm": 9.828909242060035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102530 + }, + { + "epoch": 0.49730046934126115, + "grad_norm": 2.7797042889687873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102540 + }, + { + "epoch": 0.49734896753409724, + "grad_norm": 2.725678882598004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102550 + }, + { + "epoch": 0.49739746572693333, + "grad_norm": 4.7028379412950017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102560 + }, + { + "epoch": 0.4974459639197694, + "grad_norm": 8.249380698543973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102570 + }, + { + "epoch": 0.4974944621126055, + "grad_norm": 4.3240405034339346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102580 + }, + { + "epoch": 0.4975429603054416, + "grad_norm": 4.401451860758243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102590 + }, + { + "epoch": 0.4975914584982777, + "grad_norm": 3.8333715224325715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102600 + }, + { + "epoch": 0.49763995669111377, + "grad_norm": 3.547568496742315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102610 + }, + { + "epoch": 0.49768845488394986, + "grad_norm": 3.859642561110377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102620 + }, + { + "epoch": 0.497736953076786, + "grad_norm": 3.248975133374188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102630 + }, + { + "epoch": 0.4977854512696221, + "grad_norm": 3.1043265380503726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102640 + }, + { + "epoch": 0.4978339494624582, + "grad_norm": 2.666829459485598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102650 + }, + { + "epoch": 0.49788244765529427, + "grad_norm": 3.288410823643062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102660 + }, + { + "epoch": 0.49793094584813036, + "grad_norm": 4.3015199935325654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102670 + }, + { + "epoch": 0.49797944404096645, + "grad_norm": 3.9365789916701033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102680 + }, + { + "epoch": 0.49802794223380253, + "grad_norm": 2.816226754021045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102690 + }, + { + "epoch": 0.4980764404266386, + "grad_norm": 3.2147480055755295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102700 + }, + { + "epoch": 0.4981249386194747, + "grad_norm": 3.5226736372351297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102710 + }, + { + "epoch": 0.4981734368123108, + "grad_norm": 3.8780720501563337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102720 + }, + { + "epoch": 0.4982219350051469, + "grad_norm": 4.3802398863590497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102730 + }, + { + "epoch": 0.498270433197983, + "grad_norm": 3.5978550272375287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102740 + }, + { + "epoch": 0.49831893139081906, + "grad_norm": 3.197190210357803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102750 + }, + { + "epoch": 0.49836742958365515, + "grad_norm": 3.1817924650567875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102760 + }, + { + "epoch": 0.49841592777649124, + "grad_norm": 4.447985304523172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102770 + }, + { + "epoch": 0.49846442596932733, + "grad_norm": 6.034842954250053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102780 + }, + { + "epoch": 0.4985129241621634, + "grad_norm": 2.796261071580375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102790 + }, + { + "epoch": 0.4985614223549995, + "grad_norm": 2.6533126629146864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102800 + }, + { + "epoch": 0.4986099205478356, + "grad_norm": 4.3990843323626905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102810 + }, + { + "epoch": 0.4986584187406717, + "grad_norm": 2.795491980123188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102820 + }, + { + "epoch": 0.49870691693350777, + "grad_norm": 2.835129180311924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102830 + }, + { + "epoch": 0.49875541512634386, + "grad_norm": 2.596888464267977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102840 + }, + { + "epoch": 0.49880391331917995, + "grad_norm": 3.013857678979548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102850 + }, + { + "epoch": 0.49885241151201604, + "grad_norm": 5.869155756954569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102860 + }, + { + "epoch": 0.4989009097048521, + "grad_norm": 3.0212049750844017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102870 + }, + { + "epoch": 0.4989494078976882, + "grad_norm": 3.0530750905199966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102880 + }, + { + "epoch": 0.4989979060905243, + "grad_norm": 2.978102315864817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102890 + }, + { + "epoch": 0.4990464042833604, + "grad_norm": 2.7473558361634787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102900 + }, + { + "epoch": 0.4990949024761965, + "grad_norm": 1.532656256131304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102910 + }, + { + "epoch": 0.49914340066903257, + "grad_norm": 2.4248200247711793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102920 + }, + { + "epoch": 0.49919189886186865, + "grad_norm": 3.291165739938151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102930 + }, + { + "epoch": 0.49924039705470474, + "grad_norm": 3.4704282825259725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102940 + }, + { + "epoch": 0.49928889524754083, + "grad_norm": 2.8307979960118246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102950 + }, + { + "epoch": 0.4993373934403769, + "grad_norm": 8.216373430514068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102960 + }, + { + "epoch": 0.499385891633213, + "grad_norm": 2.530560720970243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102970 + }, + { + "epoch": 0.4994343898260491, + "grad_norm": 2.3041351937536092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102980 + }, + { + "epoch": 0.4994828880188852, + "grad_norm": 2.7745255692934734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 102990 + }, + { + "epoch": 0.4995313862117213, + "grad_norm": 1.2561794164867024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103000 + }, + { + "epoch": 0.49957988440455736, + "grad_norm": 2.4596545245003654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103010 + }, + { + "epoch": 0.49962838259739345, + "grad_norm": 2.524327555875061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103020 + }, + { + "epoch": 0.49967688079022954, + "grad_norm": 2.513178003482608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103030 + }, + { + "epoch": 0.4997253789830656, + "grad_norm": 2.7529569024409284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103040 + }, + { + "epoch": 0.4997738771759017, + "grad_norm": 2.4020832256610447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103050 + }, + { + "epoch": 0.4998223753687378, + "grad_norm": 3.0451525390162715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103060 + }, + { + "epoch": 0.4998708735615739, + "grad_norm": 3.557371144324861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103070 + }, + { + "epoch": 0.49991937175441, + "grad_norm": 3.0265425721154315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103080 + }, + { + "epoch": 0.49996786994724607, + "grad_norm": 2.2678979405554855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103090 + }, + { + "epoch": 0.5000163681400822, + "grad_norm": 2.425180696263851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103100 + }, + { + "epoch": 0.5000648663329182, + "grad_norm": 2.697696572795394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103110 + }, + { + "epoch": 0.5001133645257544, + "grad_norm": 2.156204459424771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103120 + }, + { + "epoch": 0.5001618627185904, + "grad_norm": 2.502618769995024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103130 + }, + { + "epoch": 0.5002103609114266, + "grad_norm": 2.470559650191717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103140 + }, + { + "epoch": 0.5002588591042626, + "grad_norm": 3.955998408855521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103150 + }, + { + "epoch": 0.5003073572970987, + "grad_norm": 2.051916254686148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103160 + }, + { + "epoch": 0.5003558554899348, + "grad_norm": 3.2206850164584466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103170 + }, + { + "epoch": 0.5004043536827709, + "grad_norm": 3.350600366047729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103180 + }, + { + "epoch": 0.500452851875607, + "grad_norm": 3.5555336808101856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103190 + }, + { + "epoch": 0.5005013500684431, + "grad_norm": 2.4227307449109503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103200 + }, + { + "epoch": 0.5005498482612791, + "grad_norm": 2.3215501698814478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103210 + }, + { + "epoch": 0.5005983464541153, + "grad_norm": 2.798055902530905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103220 + }, + { + "epoch": 0.5006468446469513, + "grad_norm": 1.9957747099397238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103230 + }, + { + "epoch": 0.5006953428397874, + "grad_norm": 2.2779083508339681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103240 + }, + { + "epoch": 0.5007438410326235, + "grad_norm": 2.2786393572005181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103250 + }, + { + "epoch": 0.5007923392254596, + "grad_norm": 3.247316726628924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103260 + }, + { + "epoch": 0.5008408374182957, + "grad_norm": 2.2386636544524663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103270 + }, + { + "epoch": 0.5008893356111318, + "grad_norm": 2.581147668934136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103280 + }, + { + "epoch": 0.5009378338039678, + "grad_norm": 2.3408705374095007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103290 + }, + { + "epoch": 0.500986331996804, + "grad_norm": 3.421210408305342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103300 + }, + { + "epoch": 0.50103483018964, + "grad_norm": 2.3259667614183854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103310 + }, + { + "epoch": 0.5010833283824762, + "grad_norm": 2.0802140454634355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103320 + }, + { + "epoch": 0.5011318265753122, + "grad_norm": 2.197223523126013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103330 + }, + { + "epoch": 0.5011803247681483, + "grad_norm": 2.692167129225709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103340 + }, + { + "epoch": 0.5012288229609844, + "grad_norm": 2.377986305646118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103350 + }, + { + "epoch": 0.5012773211538205, + "grad_norm": 2.0996829164232622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103360 + }, + { + "epoch": 0.5013258193466565, + "grad_norm": 2.7903601562684344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103370 + }, + { + "epoch": 0.5013743175394927, + "grad_norm": 3.4930815218103817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103380 + }, + { + "epoch": 0.5014228157323287, + "grad_norm": 2.1561328367170063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103390 + }, + { + "epoch": 0.5014713139251649, + "grad_norm": 2.472847882017959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103400 + }, + { + "epoch": 0.501519812118001, + "grad_norm": 1.9317667465656996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103410 + }, + { + "epoch": 0.501568310310837, + "grad_norm": 2.0663553357280762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103420 + }, + { + "epoch": 0.5016168085036732, + "grad_norm": 2.0712276693757303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103430 + }, + { + "epoch": 0.5016653066965092, + "grad_norm": 2.2306031155494566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103440 + }, + { + "epoch": 0.5017138048893454, + "grad_norm": 2.813858941408398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103450 + }, + { + "epoch": 0.5017623030821814, + "grad_norm": 2.3550876449007774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103460 + }, + { + "epoch": 0.5018108012750175, + "grad_norm": 2.1027997831879475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103470 + }, + { + "epoch": 0.5018592994678536, + "grad_norm": 3.311158138785686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103480 + }, + { + "epoch": 0.5019077976606897, + "grad_norm": 2.0376486986606324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103490 + }, + { + "epoch": 0.5019562958535257, + "grad_norm": 2.3817756300559267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103500 + }, + { + "epoch": 0.5020047940463619, + "grad_norm": 2.8262846285542764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103510 + }, + { + "epoch": 0.5020532922391979, + "grad_norm": 1.9399038819756242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103520 + }, + { + "epoch": 0.5021017904320341, + "grad_norm": 1.7773868421500083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103530 + }, + { + "epoch": 0.5021502886248701, + "grad_norm": 2.2954891676363331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103540 + }, + { + "epoch": 0.5021987868177062, + "grad_norm": 2.0683863510839728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103550 + }, + { + "epoch": 0.5022472850105423, + "grad_norm": 1.9012352936442767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103560 + }, + { + "epoch": 0.5022957832033784, + "grad_norm": 2.2932832166588923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103570 + }, + { + "epoch": 0.5023442813962145, + "grad_norm": 2.1979043651754182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103580 + }, + { + "epoch": 0.5023927795890506, + "grad_norm": 2.0724611715650099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103590 + }, + { + "epoch": 0.5024412777818866, + "grad_norm": 2.909931140493427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103600 + }, + { + "epoch": 0.5024897759747228, + "grad_norm": 2.019615124027041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103610 + }, + { + "epoch": 0.5025382741675588, + "grad_norm": 1.9062775891143247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103620 + }, + { + "epoch": 0.502586772360395, + "grad_norm": 1.8774805710108922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103630 + }, + { + "epoch": 0.502635270553231, + "grad_norm": 2.035791766274997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103640 + }, + { + "epoch": 0.5026837687460671, + "grad_norm": 2.3423194761562627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103650 + }, + { + "epoch": 0.5027322669389032, + "grad_norm": 1.8828006886906223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103660 + }, + { + "epoch": 0.5027807651317393, + "grad_norm": 2.2259747822772624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103670 + }, + { + "epoch": 0.5028292633245753, + "grad_norm": 1.9670133610816265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103680 + }, + { + "epoch": 0.5028777615174115, + "grad_norm": 2.7017776460525056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103690 + }, + { + "epoch": 0.5029262597102475, + "grad_norm": 3.0305761811177945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103700 + }, + { + "epoch": 0.5029747579030837, + "grad_norm": 2.86588800690879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103710 + }, + { + "epoch": 0.5030232560959197, + "grad_norm": 2.040033990624579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103720 + }, + { + "epoch": 0.5030717542887558, + "grad_norm": 1.807506748718879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103730 + }, + { + "epoch": 0.5031202524815919, + "grad_norm": 2.3282599670437776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103740 + }, + { + "epoch": 0.503168750674428, + "grad_norm": 2.055684404922431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103750 + }, + { + "epoch": 0.503217248867264, + "grad_norm": 1.6662266943967552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103760 + }, + { + "epoch": 0.5032657470601002, + "grad_norm": 3.278050257904397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103770 + }, + { + "epoch": 0.5033142452529362, + "grad_norm": 2.0212954154885665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103780 + }, + { + "epoch": 0.5033627434457724, + "grad_norm": 1.872564183713621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103790 + }, + { + "epoch": 0.5034112416386084, + "grad_norm": 4.161159097293421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103800 + }, + { + "epoch": 0.5034597398314445, + "grad_norm": 2.644026722009585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103810 + }, + { + "epoch": 0.5035082380242806, + "grad_norm": 2.1236564862192608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103820 + }, + { + "epoch": 0.5035567362171167, + "grad_norm": 1.700518765801462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103830 + }, + { + "epoch": 0.5036052344099528, + "grad_norm": 2.2076956440741924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103840 + }, + { + "epoch": 0.5036537326027889, + "grad_norm": 1.9730363476355706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103850 + }, + { + "epoch": 0.5037022307956249, + "grad_norm": 3.8664717294523143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103860 + }, + { + "epoch": 0.5037507289884611, + "grad_norm": 1.794573734059668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103870 + }, + { + "epoch": 0.5037992271812971, + "grad_norm": 1.744747066823038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103880 + }, + { + "epoch": 0.5038477253741332, + "grad_norm": 2.0556839785967895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103890 + }, + { + "epoch": 0.5038962235669693, + "grad_norm": 2.1561272944836674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103900 + }, + { + "epoch": 0.5039447217598054, + "grad_norm": 1.8402664636596455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103910 + }, + { + "epoch": 0.5039932199526416, + "grad_norm": 2.595240573555202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103920 + }, + { + "epoch": 0.5040417181454776, + "grad_norm": 1.790576504845376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103930 + }, + { + "epoch": 0.5040902163383137, + "grad_norm": 1.9496661707307794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103940 + }, + { + "epoch": 0.5041387145311498, + "grad_norm": 1.9378394711111468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103950 + }, + { + "epoch": 0.5041872127239859, + "grad_norm": 2.193370391978533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103960 + }, + { + "epoch": 0.504235710916822, + "grad_norm": 1.584453741543257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103970 + }, + { + "epoch": 0.5042842091096581, + "grad_norm": 1.6860063567492034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103980 + }, + { + "epoch": 0.5043327073024941, + "grad_norm": 1.9535615081167634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 103990 + }, + { + "epoch": 0.5043812054953303, + "grad_norm": 1.9287777774934511e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104000 + }, + { + "epoch": 0.5044297036881663, + "grad_norm": 2.1743447575772734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104010 + }, + { + "epoch": 0.5044782018810025, + "grad_norm": 1.672702865107567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104020 + }, + { + "epoch": 0.5045267000738385, + "grad_norm": 2.2278658207142144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104030 + }, + { + "epoch": 0.5045751982666746, + "grad_norm": 1.875713593335604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104040 + }, + { + "epoch": 0.5046236964595107, + "grad_norm": 2.2132813626285497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104050 + }, + { + "epoch": 0.5046721946523468, + "grad_norm": 2.0842020376221626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104060 + }, + { + "epoch": 0.5047206928451828, + "grad_norm": 1.6609843100923172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104070 + }, + { + "epoch": 0.504769191038019, + "grad_norm": 2.1022050589181163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104080 + }, + { + "epoch": 0.504817689230855, + "grad_norm": 3.7228002724987164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104090 + }, + { + "epoch": 0.5048661874236912, + "grad_norm": 2.5590378527340363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104100 + }, + { + "epoch": 0.5049146856165272, + "grad_norm": 1.5945455800192576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104110 + }, + { + "epoch": 0.5049631838093633, + "grad_norm": 1.723029612321625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104120 + }, + { + "epoch": 0.5050116820021994, + "grad_norm": 2.1302167851899867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104130 + }, + { + "epoch": 0.5050601801950355, + "grad_norm": 3.1383683563035447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104140 + }, + { + "epoch": 0.5051086783878715, + "grad_norm": 1.9595947264861024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104150 + }, + { + "epoch": 0.5051571765807077, + "grad_norm": 1.6840451166899584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104160 + }, + { + "epoch": 0.5052056747735437, + "grad_norm": 1.6885684317458072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104170 + }, + { + "epoch": 0.5052541729663799, + "grad_norm": 1.6100540278785047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104180 + }, + { + "epoch": 0.5053026711592159, + "grad_norm": 2.1975267827656353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104190 + }, + { + "epoch": 0.505351169352052, + "grad_norm": 1.841095240706636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104200 + }, + { + "epoch": 0.5053996675448881, + "grad_norm": 1.6933951485498255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104210 + }, + { + "epoch": 0.5054481657377242, + "grad_norm": 1.5785052198680205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104220 + }, + { + "epoch": 0.5054966639305603, + "grad_norm": 3.2476967248840083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104230 + }, + { + "epoch": 0.5055451621233964, + "grad_norm": 2.3485770839215547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104240 + }, + { + "epoch": 0.5055936603162324, + "grad_norm": 1.753793128500547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104250 + }, + { + "epoch": 0.5056421585090686, + "grad_norm": 1.8085253827848646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104260 + }, + { + "epoch": 0.5056906567019046, + "grad_norm": 1.5400333097659313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104270 + }, + { + "epoch": 0.5057391548947408, + "grad_norm": 1.6767371846526657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104280 + }, + { + "epoch": 0.5057876530875768, + "grad_norm": 1.89904810099506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104290 + }, + { + "epoch": 0.5058361512804129, + "grad_norm": 1.7093123005906818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104300 + }, + { + "epoch": 0.505884649473249, + "grad_norm": 2.61698772874297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104310 + }, + { + "epoch": 0.5059331476660851, + "grad_norm": 1.6222827525780303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104320 + }, + { + "epoch": 0.5059816458589211, + "grad_norm": 1.826230970891629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104330 + }, + { + "epoch": 0.5060301440517573, + "grad_norm": 3.1374753461932414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104340 + }, + { + "epoch": 0.5060786422445933, + "grad_norm": 1.9657925065530435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104350 + }, + { + "epoch": 0.5061271404374295, + "grad_norm": 1.4639516621173243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104360 + }, + { + "epoch": 0.5061756386302655, + "grad_norm": 1.6952499493072537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104370 + }, + { + "epoch": 0.5062241368231016, + "grad_norm": 1.555291646582191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104380 + }, + { + "epoch": 0.5062726350159377, + "grad_norm": 2.3084488987024088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104390 + }, + { + "epoch": 0.5063211332087738, + "grad_norm": 2.634534155276924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104400 + }, + { + "epoch": 0.5063696314016098, + "grad_norm": 2.4547421162424143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104410 + }, + { + "epoch": 0.506418129594446, + "grad_norm": 1.4310494123037643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104420 + }, + { + "epoch": 0.5064666277872821, + "grad_norm": 1.5215661619549792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104430 + }, + { + "epoch": 0.5065151259801182, + "grad_norm": 1.8206588947577984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104440 + }, + { + "epoch": 0.5065636241729543, + "grad_norm": 1.8022362269221048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104450 + }, + { + "epoch": 0.5066121223657903, + "grad_norm": 1.5917431994694198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104460 + }, + { + "epoch": 0.5066606205586265, + "grad_norm": 1.7321843870377052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104470 + }, + { + "epoch": 0.5067091187514625, + "grad_norm": 1.0860361498998827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104480 + }, + { + "epoch": 0.5067576169442987, + "grad_norm": 1.805282039413214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104490 + }, + { + "epoch": 0.5068061151371347, + "grad_norm": 1.7869547264126595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104500 + }, + { + "epoch": 0.5068546133299708, + "grad_norm": 1.6221275700445403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104510 + }, + { + "epoch": 0.5069031115228069, + "grad_norm": 1.586314084534024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104520 + }, + { + "epoch": 0.506951609715643, + "grad_norm": 1.5507808370784915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104530 + }, + { + "epoch": 0.507000107908479, + "grad_norm": 1.6805975633360504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104540 + }, + { + "epoch": 0.5070486061013152, + "grad_norm": 1.8089909303853347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104550 + }, + { + "epoch": 0.5070971042941512, + "grad_norm": 1.5708970124705957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104560 + }, + { + "epoch": 0.5071456024869874, + "grad_norm": 1.5133900888031349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104570 + }, + { + "epoch": 0.5071941006798234, + "grad_norm": 1.4052545793674653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104580 + }, + { + "epoch": 0.5072425988726595, + "grad_norm": 1.6381810041821154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104590 + }, + { + "epoch": 0.5072910970654956, + "grad_norm": 1.804466762678203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104600 + }, + { + "epoch": 0.5073395952583317, + "grad_norm": 1.453451687893903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104610 + }, + { + "epoch": 0.5073880934511678, + "grad_norm": 1.72769432538189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104620 + }, + { + "epoch": 0.5074365916440039, + "grad_norm": 1.360075430056895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104630 + }, + { + "epoch": 0.5074850898368399, + "grad_norm": 1.7020445852722332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104640 + }, + { + "epoch": 0.5075335880296761, + "grad_norm": 1.7949631114788644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104650 + }, + { + "epoch": 0.5075820862225121, + "grad_norm": 1.4934194325633143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104660 + }, + { + "epoch": 0.5076305844153483, + "grad_norm": 1.369342044199584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104670 + }, + { + "epoch": 0.5076790826081843, + "grad_norm": 1.9483904623029957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104680 + }, + { + "epoch": 0.5077275808010204, + "grad_norm": 1.6006457315143052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104690 + }, + { + "epoch": 0.5077760789938565, + "grad_norm": 1.751372025182718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104700 + }, + { + "epoch": 0.5078245771866926, + "grad_norm": 1.6221588339249138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104710 + }, + { + "epoch": 0.5078730753795286, + "grad_norm": 1.4719739738211501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104720 + }, + { + "epoch": 0.5079215735723648, + "grad_norm": 1.443129491462969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104730 + }, + { + "epoch": 0.5079700717652008, + "grad_norm": 1.7303027277648653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104740 + }, + { + "epoch": 0.508018569958037, + "grad_norm": 1.664216711105837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104750 + }, + { + "epoch": 0.508067068150873, + "grad_norm": 1.38884388434235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104760 + }, + { + "epoch": 0.5081155663437091, + "grad_norm": 1.5958094934376277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104770 + }, + { + "epoch": 0.5081640645365452, + "grad_norm": 1.482016358522742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104780 + }, + { + "epoch": 0.5082125627293813, + "grad_norm": 1.7310080124843807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104790 + }, + { + "epoch": 0.5082610609222173, + "grad_norm": 1.668793458975415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104800 + }, + { + "epoch": 0.5083095591150535, + "grad_norm": 1.874222732567432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104810 + }, + { + "epoch": 0.5083580573078895, + "grad_norm": 1.606040314072743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104820 + }, + { + "epoch": 0.5084065555007257, + "grad_norm": 1.4911658752225776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104830 + }, + { + "epoch": 0.5084550536935617, + "grad_norm": 1.6015862058793573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104840 + }, + { + "epoch": 0.5085035518863978, + "grad_norm": 1.8089073705596093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104850 + }, + { + "epoch": 0.5085520500792339, + "grad_norm": 1.4467720177435694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104860 + }, + { + "epoch": 0.50860054827207, + "grad_norm": 1.4472725240466389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104870 + }, + { + "epoch": 0.508649046464906, + "grad_norm": 1.3861344427823497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104880 + }, + { + "epoch": 0.5086975446577422, + "grad_norm": 1.7673937691142783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104890 + }, + { + "epoch": 0.5087460428505782, + "grad_norm": 1.706390264644142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104900 + }, + { + "epoch": 0.5087945410434144, + "grad_norm": 1.40430628903232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104910 + }, + { + "epoch": 0.5088430392362504, + "grad_norm": 1.3777004426174244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104920 + }, + { + "epoch": 0.5088915374290865, + "grad_norm": 1.600555776803958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104930 + }, + { + "epoch": 0.5089400356219227, + "grad_norm": 1.7355478121316992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104940 + }, + { + "epoch": 0.5089885338147587, + "grad_norm": 1.8577698313038127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104950 + }, + { + "epoch": 0.5090370320075949, + "grad_norm": 1.851475133207714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104960 + }, + { + "epoch": 0.5090855302004309, + "grad_norm": 1.4592163211091247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104970 + }, + { + "epoch": 0.509134028393267, + "grad_norm": 1.324065834751309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104980 + }, + { + "epoch": 0.5091825265861031, + "grad_norm": 1.7668939733539446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 104990 + }, + { + "epoch": 0.5092310247789392, + "grad_norm": 1.6228258914452454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105000 + }, + { + "epoch": 0.5092795229717753, + "grad_norm": 1.8239205701320316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105010 + }, + { + "epoch": 0.5093280211646114, + "grad_norm": 1.4010960569521558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105020 + }, + { + "epoch": 0.5093765193574474, + "grad_norm": 1.423055664417916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105030 + }, + { + "epoch": 0.5094250175502836, + "grad_norm": 1.5827764343612216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105040 + }, + { + "epoch": 0.5094735157431196, + "grad_norm": 1.459764291666943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105050 + }, + { + "epoch": 0.5095220139359558, + "grad_norm": 1.305617445268581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105060 + }, + { + "epoch": 0.5095705121287918, + "grad_norm": 1.4420072602661094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105070 + }, + { + "epoch": 0.5096190103216279, + "grad_norm": 1.3018902222938777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105080 + }, + { + "epoch": 0.509667508514464, + "grad_norm": 1.5205277748009394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105090 + }, + { + "epoch": 0.5097160067073001, + "grad_norm": 1.5868276648234314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105100 + }, + { + "epoch": 0.5097645049001361, + "grad_norm": 1.3594655001725187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105110 + }, + { + "epoch": 0.5098130030929723, + "grad_norm": 1.5373861117495835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105120 + }, + { + "epoch": 0.5098615012858083, + "grad_norm": 1.3312738644799538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105130 + }, + { + "epoch": 0.5099099994786445, + "grad_norm": 1.5422553190092003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105140 + }, + { + "epoch": 0.5099584976714805, + "grad_norm": 1.6069627406523068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105150 + }, + { + "epoch": 0.5100069958643166, + "grad_norm": 1.4299276074325462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105160 + }, + { + "epoch": 0.5100554940571527, + "grad_norm": 1.3501961859674338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105170 + }, + { + "epoch": 0.5101039922499888, + "grad_norm": 1.3281803035170014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105180 + }, + { + "epoch": 0.5101524904428248, + "grad_norm": 1.476985289627919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105190 + }, + { + "epoch": 0.510200988635661, + "grad_norm": 1.4123013158950926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105200 + }, + { + "epoch": 0.510249486828497, + "grad_norm": 1.5455496793492784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105210 + }, + { + "epoch": 0.5102979850213332, + "grad_norm": 1.3426864597931853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105220 + }, + { + "epoch": 0.5103464832141692, + "grad_norm": 1.2286092498925427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105230 + }, + { + "epoch": 0.5103949814070053, + "grad_norm": 1.5425538890667667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105240 + }, + { + "epoch": 0.5104434795998414, + "grad_norm": 1.549744723661206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105250 + }, + { + "epoch": 0.5104919777926775, + "grad_norm": 1.3131345610872813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105260 + }, + { + "epoch": 0.5105404759855136, + "grad_norm": 1.2677155325491185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105270 + }, + { + "epoch": 0.5105889741783497, + "grad_norm": 1.2714873776076274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105280 + }, + { + "epoch": 0.5106374723711857, + "grad_norm": 3.301395281596342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105290 + }, + { + "epoch": 0.5106859705640219, + "grad_norm": 1.4726315100688225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105300 + }, + { + "epoch": 0.5107344687568579, + "grad_norm": 1.252145978014596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105310 + }, + { + "epoch": 0.510782966949694, + "grad_norm": 1.3300623891154828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105320 + }, + { + "epoch": 0.5108314651425301, + "grad_norm": 1.3842033297351009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105330 + }, + { + "epoch": 0.5108799633353662, + "grad_norm": 1.583315309972022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105340 + }, + { + "epoch": 0.5109284615282023, + "grad_norm": 1.4398416681160597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105350 + }, + { + "epoch": 0.5109769597210384, + "grad_norm": 1.8714710847689275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105360 + }, + { + "epoch": 0.5110254579138744, + "grad_norm": 1.23430424991966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105370 + }, + { + "epoch": 0.5110739561067106, + "grad_norm": 1.2861289633292472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105380 + }, + { + "epoch": 0.5111224542995466, + "grad_norm": 1.9615721669197228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105390 + }, + { + "epoch": 0.5111709524923828, + "grad_norm": 1.4893254274284118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105400 + }, + { + "epoch": 0.5112194506852188, + "grad_norm": 1.2553422834571393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105410 + }, + { + "epoch": 0.5112679488780549, + "grad_norm": 1.5912642936655175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105420 + }, + { + "epoch": 0.511316447070891, + "grad_norm": 1.30680106735781e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105430 + }, + { + "epoch": 0.5113649452637271, + "grad_norm": 1.4518020918785624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105440 + }, + { + "epoch": 0.5114134434565633, + "grad_norm": 1.4551312688126927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105450 + }, + { + "epoch": 0.5114619416493993, + "grad_norm": 1.2017216022286448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105460 + }, + { + "epoch": 0.5115104398422354, + "grad_norm": 1.4301836870345142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105470 + }, + { + "epoch": 0.5115589380350715, + "grad_norm": 1.4044769613974495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105480 + }, + { + "epoch": 0.5116074362279076, + "grad_norm": 1.476496294117169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105490 + }, + { + "epoch": 0.5116559344207436, + "grad_norm": 1.420243904703966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105500 + }, + { + "epoch": 0.5117044326135798, + "grad_norm": 1.2287266315524903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105510 + }, + { + "epoch": 0.5117529308064158, + "grad_norm": 1.2787278080850228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105520 + }, + { + "epoch": 0.511801428999252, + "grad_norm": 1.263586142385975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105530 + }, + { + "epoch": 0.511849927192088, + "grad_norm": 1.3509463769878494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105540 + }, + { + "epoch": 0.5118984253849241, + "grad_norm": 1.4496839639832615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105550 + }, + { + "epoch": 0.5119469235777602, + "grad_norm": 1.254259416327841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105560 + }, + { + "epoch": 0.5119954217705963, + "grad_norm": 1.2071836863469798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105570 + }, + { + "epoch": 0.5120439199634323, + "grad_norm": 1.842237509208644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105580 + }, + { + "epoch": 0.5120924181562685, + "grad_norm": 1.347271307849951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105590 + }, + { + "epoch": 0.5121409163491045, + "grad_norm": 1.4069550502426864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105600 + }, + { + "epoch": 0.5121894145419407, + "grad_norm": 1.214693980955417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105610 + }, + { + "epoch": 0.5122379127347767, + "grad_norm": 1.2606794541625277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105620 + }, + { + "epoch": 0.5122864109276128, + "grad_norm": 1.280963175531724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105630 + }, + { + "epoch": 0.5123349091204489, + "grad_norm": 1.3800608655856195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105640 + }, + { + "epoch": 0.512383407313285, + "grad_norm": 1.3897853534672322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105650 + }, + { + "epoch": 0.512431905506121, + "grad_norm": 1.1788014120384105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105660 + }, + { + "epoch": 0.5124804036989572, + "grad_norm": 1.2218848155498563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105670 + }, + { + "epoch": 0.5125289018917932, + "grad_norm": 1.1888296569395607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105680 + }, + { + "epoch": 0.5125774000846294, + "grad_norm": 1.3695994027784764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105690 + }, + { + "epoch": 0.5126258982774654, + "grad_norm": 1.4264530534546793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105700 + }, + { + "epoch": 0.5126743964703016, + "grad_norm": 1.270294234245739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105710 + }, + { + "epoch": 0.5127228946631376, + "grad_norm": 1.168444754284792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105720 + }, + { + "epoch": 0.5127713928559737, + "grad_norm": 1.1803183497249847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105730 + }, + { + "epoch": 0.5128198910488098, + "grad_norm": 1.4071579812480195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105740 + }, + { + "epoch": 0.5128683892416459, + "grad_norm": 1.3393322717547562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105750 + }, + { + "epoch": 0.5129168874344819, + "grad_norm": 1.18583905361902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105760 + }, + { + "epoch": 0.5129653856273181, + "grad_norm": 1.1976342761954584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105770 + }, + { + "epoch": 0.5130138838201541, + "grad_norm": 1.1712079128756159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105780 + }, + { + "epoch": 0.5130623820129903, + "grad_norm": 1.3371672480388952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105790 + }, + { + "epoch": 0.5131108802058263, + "grad_norm": 1.3395616349498596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105800 + }, + { + "epoch": 0.5131593783986624, + "grad_norm": 1.1813077804845307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105810 + }, + { + "epoch": 0.5132078765914985, + "grad_norm": 1.364753074994951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105820 + }, + { + "epoch": 0.5132563747843346, + "grad_norm": 1.2484902356391103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105830 + }, + { + "epoch": 0.5133048729771706, + "grad_norm": 1.433178624665743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105840 + }, + { + "epoch": 0.5133533711700068, + "grad_norm": 1.4391845581940288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105850 + }, + { + "epoch": 0.5134018693628428, + "grad_norm": 1.1734155691556225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105860 + }, + { + "epoch": 0.513450367555679, + "grad_norm": 2.1626186708090245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105870 + }, + { + "epoch": 0.513498865748515, + "grad_norm": 1.1151396250852486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105880 + }, + { + "epoch": 0.5135473639413511, + "grad_norm": 1.372048217263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105890 + }, + { + "epoch": 0.5135958621341872, + "grad_norm": 1.2942368243784586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105900 + }, + { + "epoch": 0.5136443603270233, + "grad_norm": 1.1393866827802412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105910 + }, + { + "epoch": 0.5136928585198594, + "grad_norm": 1.2181479291939468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105920 + }, + { + "epoch": 0.5137413567126955, + "grad_norm": 1.1911334496517156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105930 + }, + { + "epoch": 0.5137898549055315, + "grad_norm": 1.264548785684383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105940 + }, + { + "epoch": 0.5138383530983677, + "grad_norm": 1.3969170709060563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105950 + }, + { + "epoch": 0.5138868512912038, + "grad_norm": 1.257157435929912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105960 + }, + { + "epoch": 0.5139353494840398, + "grad_norm": 1.1828098678279275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105970 + }, + { + "epoch": 0.513983847676876, + "grad_norm": 1.1563992785568189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105980 + }, + { + "epoch": 0.514032345869712, + "grad_norm": 1.27281055029016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 105990 + }, + { + "epoch": 0.5140808440625482, + "grad_norm": 1.3482441829637537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106000 + }, + { + "epoch": 0.5141293422553842, + "grad_norm": 1.2925478642955568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106010 + }, + { + "epoch": 0.5141778404482203, + "grad_norm": 1.9374547832740063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106020 + }, + { + "epoch": 0.5142263386410564, + "grad_norm": 2.332264017468333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106030 + }, + { + "epoch": 0.5142748368338925, + "grad_norm": 1.4118947433416906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106040 + }, + { + "epoch": 0.5143233350267286, + "grad_norm": 1.3270016552269226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106050 + }, + { + "epoch": 0.5143718332195647, + "grad_norm": 1.2139879856931657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106060 + }, + { + "epoch": 0.5144203314124007, + "grad_norm": 1.1001164779145256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106070 + }, + { + "epoch": 0.5144688296052369, + "grad_norm": 1.138703993319723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106080 + }, + { + "epoch": 0.5145173277980729, + "grad_norm": 1.2496242618453834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106090 + }, + { + "epoch": 0.514565825990909, + "grad_norm": 1.351692020534756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106100 + }, + { + "epoch": 0.5146143241837451, + "grad_norm": 1.1298594415620755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106110 + }, + { + "epoch": 0.5146628223765812, + "grad_norm": 1.4577979357000004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106120 + }, + { + "epoch": 0.5147113205694173, + "grad_norm": 1.0829138830104057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106130 + }, + { + "epoch": 0.5147598187622534, + "grad_norm": 1.2822340522689046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106140 + }, + { + "epoch": 0.5148083169550894, + "grad_norm": 1.3118103936449188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106150 + }, + { + "epoch": 0.5148568151479256, + "grad_norm": 1.406276908255677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106160 + }, + { + "epoch": 0.5149053133407616, + "grad_norm": 1.1100615893155918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106170 + }, + { + "epoch": 0.5149538115335978, + "grad_norm": 1.1096624064066418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106180 + }, + { + "epoch": 0.5150023097264338, + "grad_norm": 1.332143426679977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106190 + }, + { + "epoch": 0.5150508079192699, + "grad_norm": 1.2341654098690924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106200 + }, + { + "epoch": 0.515099306112106, + "grad_norm": 1.1314839554188438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106210 + }, + { + "epoch": 0.5151478043049421, + "grad_norm": 1.5414615006648091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106220 + }, + { + "epoch": 0.5151963024977781, + "grad_norm": 1.1240126696066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106230 + }, + { + "epoch": 0.5152448006906143, + "grad_norm": 1.35015696400842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106240 + }, + { + "epoch": 0.5152932988834503, + "grad_norm": 1.3643088436765538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106250 + }, + { + "epoch": 0.5153417970762865, + "grad_norm": 1.0583912057882117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106260 + }, + { + "epoch": 0.5153902952691225, + "grad_norm": 1.2434125551408215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106270 + }, + { + "epoch": 0.5154387934619586, + "grad_norm": 1.1434234181706415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106280 + }, + { + "epoch": 0.5154872916547947, + "grad_norm": 1.2654241743348393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106290 + }, + { + "epoch": 0.5155357898476308, + "grad_norm": 1.3204628146468167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106300 + }, + { + "epoch": 0.5155842880404669, + "grad_norm": 1.034028684898658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106310 + }, + { + "epoch": 0.515632786233303, + "grad_norm": 1.0274902706441935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106320 + }, + { + "epoch": 0.515681284426139, + "grad_norm": 1.0055810406583987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106330 + }, + { + "epoch": 0.5157297826189752, + "grad_norm": 1.236972195783892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106340 + }, + { + "epoch": 0.5157782808118112, + "grad_norm": 1.2147097550041508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106350 + }, + { + "epoch": 0.5158267790046474, + "grad_norm": 1.0514890647073116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106360 + }, + { + "epoch": 0.5158752771974834, + "grad_norm": 1.096892745522382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106370 + }, + { + "epoch": 0.5159237753903195, + "grad_norm": 1.049014954901395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106380 + }, + { + "epoch": 0.5159722735831556, + "grad_norm": 1.1435805191695181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106390 + }, + { + "epoch": 0.5160207717759917, + "grad_norm": 1.2268560567463282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106400 + }, + { + "epoch": 0.5160692699688277, + "grad_norm": 1.0928958715794579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106410 + }, + { + "epoch": 0.5161177681616639, + "grad_norm": 9.852890059391939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106420 + }, + { + "epoch": 0.5161662663544999, + "grad_norm": 1.1515056996813655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106430 + }, + { + "epoch": 0.5162147645473361, + "grad_norm": 1.1535236410509242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106440 + }, + { + "epoch": 0.5162632627401721, + "grad_norm": 1.5729250435470021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106450 + }, + { + "epoch": 0.5163117609330082, + "grad_norm": 1.0437585729050625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106460 + }, + { + "epoch": 0.5163602591258444, + "grad_norm": 1.0674970951640717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106470 + }, + { + "epoch": 0.5164087573186804, + "grad_norm": 9.803558498333587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106480 + }, + { + "epoch": 0.5164572555115166, + "grad_norm": 1.1988525727701926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106490 + }, + { + "epoch": 0.5165057537043526, + "grad_norm": 1.9233301884469256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106500 + }, + { + "epoch": 0.5165542518971887, + "grad_norm": 1.145773040889253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106510 + }, + { + "epoch": 0.5166027500900248, + "grad_norm": 1.0075908818407697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106520 + }, + { + "epoch": 0.5166512482828609, + "grad_norm": 1.0238576919618936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106530 + }, + { + "epoch": 0.5166997464756969, + "grad_norm": 1.1736329952327651e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106540 + }, + { + "epoch": 0.5167482446685331, + "grad_norm": 1.1403455602021495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106550 + }, + { + "epoch": 0.5167967428613691, + "grad_norm": 1.0396755811825642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106560 + }, + { + "epoch": 0.5168452410542053, + "grad_norm": 1.0952468443292673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106570 + }, + { + "epoch": 0.5168937392470413, + "grad_norm": 9.992409388814849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106580 + }, + { + "epoch": 0.5169422374398774, + "grad_norm": 1.2166202623120626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106590 + }, + { + "epoch": 0.5169907356327135, + "grad_norm": 1.1431081503587848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106600 + }, + { + "epoch": 0.5170392338255496, + "grad_norm": 1.0261071281547629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106610 + }, + { + "epoch": 0.5170877320183856, + "grad_norm": 9.910933584933446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106620 + }, + { + "epoch": 0.5171362302112218, + "grad_norm": 1.1093469964862379e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106630 + }, + { + "epoch": 0.5171847284040578, + "grad_norm": 1.1933767041227838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106640 + }, + { + "epoch": 0.517233226596894, + "grad_norm": 1.1637713726031507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106650 + }, + { + "epoch": 0.51728172478973, + "grad_norm": 1.220334553408975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106660 + }, + { + "epoch": 0.5173302229825661, + "grad_norm": 9.880926654659561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106670 + }, + { + "epoch": 0.5173787211754022, + "grad_norm": 1.0339737599451837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106680 + }, + { + "epoch": 0.5174272193682383, + "grad_norm": 1.1428732449303425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106690 + }, + { + "epoch": 0.5174757175610744, + "grad_norm": 1.1388102194587191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106700 + }, + { + "epoch": 0.5175242157539105, + "grad_norm": 1.0540511397039154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106710 + }, + { + "epoch": 0.5175727139467465, + "grad_norm": 9.777622267392871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106720 + }, + { + "epoch": 0.5176212121395827, + "grad_norm": 9.89807418250166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106730 + }, + { + "epoch": 0.5176697103324187, + "grad_norm": 1.1239024644282836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106740 + }, + { + "epoch": 0.5177182085252549, + "grad_norm": 1.1050548920366055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106750 + }, + { + "epoch": 0.5177667067180909, + "grad_norm": 9.610631934720004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106760 + }, + { + "epoch": 0.517815204910927, + "grad_norm": 9.892092833752031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106770 + }, + { + "epoch": 0.5178637031037631, + "grad_norm": 1.0036525566192722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106780 + }, + { + "epoch": 0.5179122012965992, + "grad_norm": 1.1982402270405146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106790 + }, + { + "epoch": 0.5179606994894352, + "grad_norm": 1.1104388875082805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106800 + }, + { + "epoch": 0.5180091976822714, + "grad_norm": 9.317705007561017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106810 + }, + { + "epoch": 0.5180576958751074, + "grad_norm": 1.1670650934547666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106820 + }, + { + "epoch": 0.5181061940679436, + "grad_norm": 1.011620085478171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106830 + }, + { + "epoch": 0.5181546922607796, + "grad_norm": 1.1220674167589095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106840 + }, + { + "epoch": 0.5182031904536157, + "grad_norm": 1.211583082749712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106850 + }, + { + "epoch": 0.5182516886464518, + "grad_norm": 9.701266634465355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106860 + }, + { + "epoch": 0.5183001868392879, + "grad_norm": 9.470029738167796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106870 + }, + { + "epoch": 0.518348685032124, + "grad_norm": 1.0041558340390111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106880 + }, + { + "epoch": 0.5183971832249601, + "grad_norm": 1.1533624189041802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106890 + }, + { + "epoch": 0.5184456814177961, + "grad_norm": 1.0732028954407724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106900 + }, + { + "epoch": 0.5184941796106323, + "grad_norm": 9.740361406329612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106910 + }, + { + "epoch": 0.5185426778034683, + "grad_norm": 1.1926090337510686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106920 + }, + { + "epoch": 0.5185911759963044, + "grad_norm": 9.448847038129315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106930 + }, + { + "epoch": 0.5186396741891405, + "grad_norm": 1.0512968628972885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106940 + }, + { + "epoch": 0.5186881723819766, + "grad_norm": 1.1084864581789589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106950 + }, + { + "epoch": 0.5187366705748127, + "grad_norm": 9.61581108072096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106960 + }, + { + "epoch": 0.5187851687676488, + "grad_norm": 9.69427418340274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106970 + }, + { + "epoch": 0.5188336669604848, + "grad_norm": 1.0588050258775183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106980 + }, + { + "epoch": 0.518882165153321, + "grad_norm": 1.0655315207941385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 106990 + }, + { + "epoch": 0.5189306633461571, + "grad_norm": 1.11032164795688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107000 + }, + { + "epoch": 0.5189791615389932, + "grad_norm": 9.188099880930167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107010 + }, + { + "epoch": 0.5190276597318293, + "grad_norm": 1.448453303964925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107020 + }, + { + "epoch": 0.5190761579246653, + "grad_norm": 9.661017941198224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107030 + }, + { + "epoch": 0.5191246561175015, + "grad_norm": 1.0546909834374674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107040 + }, + { + "epoch": 0.5191731543103375, + "grad_norm": 1.0829357677266671e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107050 + }, + { + "epoch": 0.5192216525031736, + "grad_norm": 9.171559867127144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107060 + }, + { + "epoch": 0.5192701506960097, + "grad_norm": 9.091608177413946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107070 + }, + { + "epoch": 0.5193186488888458, + "grad_norm": 1.0486928658792749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107080 + }, + { + "epoch": 0.5193671470816819, + "grad_norm": 1.0224584201523612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107090 + }, + { + "epoch": 0.519415645274518, + "grad_norm": 1.0677126738301013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107100 + }, + { + "epoch": 0.519464143467354, + "grad_norm": 9.45811038377542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107110 + }, + { + "epoch": 0.5195126416601902, + "grad_norm": 9.256014266156853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107120 + }, + { + "epoch": 0.5195611398530262, + "grad_norm": 8.996429556873409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107130 + }, + { + "epoch": 0.5196096380458624, + "grad_norm": 1.0775956127417885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107140 + }, + { + "epoch": 0.5196581362386984, + "grad_norm": 1.0699436359118408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107150 + }, + { + "epoch": 0.5197066344315345, + "grad_norm": 1.0109101111765995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107160 + }, + { + "epoch": 0.5197551326243706, + "grad_norm": 9.246399912399283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107170 + }, + { + "epoch": 0.5198036308172067, + "grad_norm": 9.239479936695716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107180 + }, + { + "epoch": 0.5198521290100427, + "grad_norm": 1.1312184255984903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107190 + }, + { + "epoch": 0.5199006272028789, + "grad_norm": 1.1052205906025847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107200 + }, + { + "epoch": 0.5199491253957149, + "grad_norm": 8.845713495020391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107210 + }, + { + "epoch": 0.5199976235885511, + "grad_norm": 9.34255552920149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107220 + }, + { + "epoch": 0.5200461217813871, + "grad_norm": 1.2834686913265614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107230 + }, + { + "epoch": 0.5200946199742232, + "grad_norm": 1.0289145535580246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107240 + }, + { + "epoch": 0.5201431181670593, + "grad_norm": 1.0498377633894052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107250 + }, + { + "epoch": 0.5201916163598954, + "grad_norm": 9.651512300479226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107260 + }, + { + "epoch": 0.5202401145527314, + "grad_norm": 8.62989963934524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107270 + }, + { + "epoch": 0.5202886127455676, + "grad_norm": 9.122693001017979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107280 + }, + { + "epoch": 0.5203371109384036, + "grad_norm": 1.0128185579105775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107290 + }, + { + "epoch": 0.5203856091312398, + "grad_norm": 1.0341613432274244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107300 + }, + { + "epoch": 0.5204341073240758, + "grad_norm": 1.4238116818887647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107310 + }, + { + "epoch": 0.520482605516912, + "grad_norm": 1.1141320044316672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107320 + }, + { + "epoch": 0.520531103709748, + "grad_norm": 8.80721771068238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107330 + }, + { + "epoch": 0.5205796019025841, + "grad_norm": 1.0533383942856744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107340 + }, + { + "epoch": 0.5206281000954202, + "grad_norm": 1.0514180814880092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107350 + }, + { + "epoch": 0.5206765982882563, + "grad_norm": 8.488082414714881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107360 + }, + { + "epoch": 0.5207250964810923, + "grad_norm": 9.002644674183102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107370 + }, + { + "epoch": 0.5207735946739285, + "grad_norm": 8.581405097629613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107380 + }, + { + "epoch": 0.5208220928667645, + "grad_norm": 1.068593888930991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107390 + }, + { + "epoch": 0.5208705910596007, + "grad_norm": 1.1457223081379198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107400 + }, + { + "epoch": 0.5209190892524367, + "grad_norm": 8.690417274692663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107410 + }, + { + "epoch": 0.5209675874452728, + "grad_norm": 8.620093439049015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107420 + }, + { + "epoch": 0.5210160856381089, + "grad_norm": 8.717926647250351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107430 + }, + { + "epoch": 0.521064583830945, + "grad_norm": 1.0229496183455922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107440 + }, + { + "epoch": 0.521113082023781, + "grad_norm": 1.0515540083133601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107450 + }, + { + "epoch": 0.5211615802166172, + "grad_norm": 8.840004994681294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107460 + }, + { + "epoch": 0.5212100784094532, + "grad_norm": 8.505994486540658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107470 + }, + { + "epoch": 0.5212585766022894, + "grad_norm": 8.661859141056993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107480 + }, + { + "epoch": 0.5213070747951254, + "grad_norm": 1.0160561458860684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107490 + }, + { + "epoch": 0.5213555729879615, + "grad_norm": 9.695889247041123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107500 + }, + { + "epoch": 0.5214040711807977, + "grad_norm": 8.515682026200011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107510 + }, + { + "epoch": 0.5214525693736337, + "grad_norm": 8.221421410326002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107520 + }, + { + "epoch": 0.5215010675664699, + "grad_norm": 8.754726366078103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107530 + }, + { + "epoch": 0.5215495657593059, + "grad_norm": 9.70165956459823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107540 + }, + { + "epoch": 0.521598063952142, + "grad_norm": 1.0562769858779575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107550 + }, + { + "epoch": 0.5216465621449781, + "grad_norm": 9.079658269683932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107560 + }, + { + "epoch": 0.5216950603378142, + "grad_norm": 8.651924332525596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107570 + }, + { + "epoch": 0.5217435585306502, + "grad_norm": 1.3993019365443615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107580 + }, + { + "epoch": 0.5217920567234864, + "grad_norm": 9.970455039365334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107590 + }, + { + "epoch": 0.5218405549163224, + "grad_norm": 1.013230388480224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107600 + }, + { + "epoch": 0.5218890531091586, + "grad_norm": 8.453051947299173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107610 + }, + { + "epoch": 0.5219375513019946, + "grad_norm": 8.311874211130998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107620 + }, + { + "epoch": 0.5219860494948307, + "grad_norm": 8.725368161321967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107630 + }, + { + "epoch": 0.5220345476876668, + "grad_norm": 9.825927804740786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107640 + }, + { + "epoch": 0.5220830458805029, + "grad_norm": 9.579322579611471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107650 + }, + { + "epoch": 0.522131544073339, + "grad_norm": 8.104862558866444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107660 + }, + { + "epoch": 0.5221800422661751, + "grad_norm": 7.824931458344508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107670 + }, + { + "epoch": 0.5222285404590111, + "grad_norm": 8.104544235720823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107680 + }, + { + "epoch": 0.5222770386518473, + "grad_norm": 9.818005253237061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107690 + }, + { + "epoch": 0.5223255368446833, + "grad_norm": 9.800323397257671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107700 + }, + { + "epoch": 0.5223740350375194, + "grad_norm": 8.13749352346349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107710 + }, + { + "epoch": 0.5224225332303555, + "grad_norm": 8.229345382915199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107720 + }, + { + "epoch": 0.5224710314231916, + "grad_norm": 8.454529165646818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107730 + }, + { + "epoch": 0.5225195296160277, + "grad_norm": 1.0520404458702615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107740 + }, + { + "epoch": 0.5225680278088638, + "grad_norm": 9.649488674767781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107750 + }, + { + "epoch": 0.5226165260016998, + "grad_norm": 8.005167728697415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107760 + }, + { + "epoch": 0.522665024194536, + "grad_norm": 8.442419385801259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107770 + }, + { + "epoch": 0.522713522387372, + "grad_norm": 7.890810849175978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107780 + }, + { + "epoch": 0.5227620205802082, + "grad_norm": 9.82750236744323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107790 + }, + { + "epoch": 0.5228105187730442, + "grad_norm": 9.312218196555477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107800 + }, + { + "epoch": 0.5228590169658803, + "grad_norm": 8.009448038137634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107810 + }, + { + "epoch": 0.5229075151587164, + "grad_norm": 7.984898786617123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107820 + }, + { + "epoch": 0.5229560133515525, + "grad_norm": 7.684011649189415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107830 + }, + { + "epoch": 0.5230045115443885, + "grad_norm": 9.356313768194013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107840 + }, + { + "epoch": 0.5230530097372247, + "grad_norm": 9.322529592736828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107850 + }, + { + "epoch": 0.5231015079300607, + "grad_norm": 7.996218442940517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107860 + }, + { + "epoch": 0.5231500061228969, + "grad_norm": 7.980827376741217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107870 + }, + { + "epoch": 0.5231985043157329, + "grad_norm": 7.838409032956406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107880 + }, + { + "epoch": 0.523247002508569, + "grad_norm": 9.545823331791325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107890 + }, + { + "epoch": 0.5232955007014051, + "grad_norm": 9.525435729074161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107900 + }, + { + "epoch": 0.5233439988942412, + "grad_norm": 9.46873015550409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107910 + }, + { + "epoch": 0.5233924970870772, + "grad_norm": 7.958367831406576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107920 + }, + { + "epoch": 0.5234409952799134, + "grad_norm": 8.117458349943263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107930 + }, + { + "epoch": 0.5234894934727494, + "grad_norm": 1.0236983172262626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107940 + }, + { + "epoch": 0.5235379916655856, + "grad_norm": 8.920751781715808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107950 + }, + { + "epoch": 0.5235864898584216, + "grad_norm": 7.966844606244194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107960 + }, + { + "epoch": 0.5236349880512577, + "grad_norm": 8.259203099214574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107970 + }, + { + "epoch": 0.5236834862440938, + "grad_norm": 7.369087029474031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107980 + }, + { + "epoch": 0.5237319844369299, + "grad_norm": 8.890699376706834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 107990 + }, + { + "epoch": 0.523780482629766, + "grad_norm": 9.177531268278472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108000 + }, + { + "epoch": 0.5238289808226021, + "grad_norm": 7.675319579902862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108010 + }, + { + "epoch": 0.5238774790154382, + "grad_norm": 8.138279383729241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108020 + }, + { + "epoch": 0.5239259772082743, + "grad_norm": 7.668808166272356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108030 + }, + { + "epoch": 0.5239744754011104, + "grad_norm": 9.567298064894203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108040 + }, + { + "epoch": 0.5240229735939465, + "grad_norm": 9.143239054765218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108050 + }, + { + "epoch": 0.5240714717867826, + "grad_norm": 7.39660634963002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108060 + }, + { + "epoch": 0.5241199699796186, + "grad_norm": 7.758789877243544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108070 + }, + { + "epoch": 0.5241684681724548, + "grad_norm": 1.0067920896972282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108080 + }, + { + "epoch": 0.5242169663652908, + "grad_norm": 9.063678874099423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108090 + }, + { + "epoch": 0.524265464558127, + "grad_norm": 9.721490812353295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108100 + }, + { + "epoch": 0.524313962750963, + "grad_norm": 7.256595324633963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108110 + }, + { + "epoch": 0.5243624609437991, + "grad_norm": 7.676825930502673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108120 + }, + { + "epoch": 0.5244109591366352, + "grad_norm": 8.405432794234002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108130 + }, + { + "epoch": 0.5244594573294713, + "grad_norm": 8.99859458058927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108140 + }, + { + "epoch": 0.5245079555223073, + "grad_norm": 8.918851790440385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108150 + }, + { + "epoch": 0.5245564537151435, + "grad_norm": 1.93629347222668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108160 + }, + { + "epoch": 0.5246049519079795, + "grad_norm": 7.640623778115696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108170 + }, + { + "epoch": 0.5246534501008157, + "grad_norm": 7.68171162235376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108180 + }, + { + "epoch": 0.5247019482936517, + "grad_norm": 1.0182394305502385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108190 + }, + { + "epoch": 0.5247504464864878, + "grad_norm": 8.762804526440959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108200 + }, + { + "epoch": 0.5247989446793239, + "grad_norm": 7.973056881382945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108210 + }, + { + "epoch": 0.52484744287216, + "grad_norm": 7.86613369996303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108220 + }, + { + "epoch": 0.524895941064996, + "grad_norm": 8.18539263036655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108230 + }, + { + "epoch": 0.5249444392578322, + "grad_norm": 8.60643112332582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108240 + }, + { + "epoch": 0.5249929374506682, + "grad_norm": 8.553817565370991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108250 + }, + { + "epoch": 0.5250414356435044, + "grad_norm": 7.276030800085209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108260 + }, + { + "epoch": 0.5250899338363404, + "grad_norm": 7.280718250513019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108270 + }, + { + "epoch": 0.5251384320291765, + "grad_norm": 7.918261246686598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108280 + }, + { + "epoch": 0.5251869302220126, + "grad_norm": 9.019097291229627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108290 + }, + { + "epoch": 0.5252354284148487, + "grad_norm": 8.53091890462565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108300 + }, + { + "epoch": 0.5252839266076847, + "grad_norm": 7.223670195344312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108310 + }, + { + "epoch": 0.5253324248005209, + "grad_norm": 7.043588112765065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108320 + }, + { + "epoch": 0.5253809229933569, + "grad_norm": 1.709954631223809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108330 + }, + { + "epoch": 0.5254294211861931, + "grad_norm": 8.507119986234102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108340 + }, + { + "epoch": 0.5254779193790291, + "grad_norm": 8.336863999147681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108350 + }, + { + "epoch": 0.5255264175718652, + "grad_norm": 7.250660871704895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108360 + }, + { + "epoch": 0.5255749157647013, + "grad_norm": 7.368831234089157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108370 + }, + { + "epoch": 0.5256234139575374, + "grad_norm": 6.913967354194028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108380 + }, + { + "epoch": 0.5256719121503735, + "grad_norm": 8.461532985393205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108390 + }, + { + "epoch": 0.5257204103432096, + "grad_norm": 8.23215060563598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108400 + }, + { + "epoch": 0.5257689085360456, + "grad_norm": 7.645257227295588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108410 + }, + { + "epoch": 0.5258174067288818, + "grad_norm": 7.213470354372475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108420 + }, + { + "epoch": 0.5258659049217178, + "grad_norm": 6.801439411674437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108430 + }, + { + "epoch": 0.525914403114554, + "grad_norm": 8.280526486714734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108440 + }, + { + "epoch": 0.52596290130739, + "grad_norm": 8.494326664276741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108450 + }, + { + "epoch": 0.5260113995002261, + "grad_norm": 6.955373521577712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108460 + }, + { + "epoch": 0.5260598976930622, + "grad_norm": 7.2663560501951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108470 + }, + { + "epoch": 0.5261083958858983, + "grad_norm": 7.24213506941851e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108480 + }, + { + "epoch": 0.5261568940787343, + "grad_norm": 8.252550287579652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108490 + }, + { + "epoch": 0.5262053922715705, + "grad_norm": 8.384328253896456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108500 + }, + { + "epoch": 0.5262538904644065, + "grad_norm": 7.13311933964178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108510 + }, + { + "epoch": 0.5263023886572427, + "grad_norm": 7.031795234979654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108520 + }, + { + "epoch": 0.5263508868500788, + "grad_norm": 7.021559156328294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108530 + }, + { + "epoch": 0.5263993850429148, + "grad_norm": 8.746782498292305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108540 + }, + { + "epoch": 0.526447883235751, + "grad_norm": 8.257186578930487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108550 + }, + { + "epoch": 0.526496381428587, + "grad_norm": 7.175872696052465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108560 + }, + { + "epoch": 0.5265448796214232, + "grad_norm": 9.386229038454985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108570 + }, + { + "epoch": 0.5265933778142592, + "grad_norm": 6.978915934041652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108580 + }, + { + "epoch": 0.5266418760070953, + "grad_norm": 7.92399887927786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108590 + }, + { + "epoch": 0.5266903741999314, + "grad_norm": 7.845878258194716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108600 + }, + { + "epoch": 0.5267388723927675, + "grad_norm": 6.778564909382112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108610 + }, + { + "epoch": 0.5267873705856035, + "grad_norm": 7.050313399759034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108620 + }, + { + "epoch": 0.5268358687784397, + "grad_norm": 6.540119557030266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108630 + }, + { + "epoch": 0.5268843669712757, + "grad_norm": 8.075448931776918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108640 + }, + { + "epoch": 0.5269328651641119, + "grad_norm": 8.153136832333985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108650 + }, + { + "epoch": 0.5269813633569479, + "grad_norm": 6.668562235745412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108660 + }, + { + "epoch": 0.527029861549784, + "grad_norm": 6.670436647482347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108670 + }, + { + "epoch": 0.5270783597426201, + "grad_norm": 6.385639750305927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108680 + }, + { + "epoch": 0.5271268579354562, + "grad_norm": 8.088554892538014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108690 + }, + { + "epoch": 0.5271753561282922, + "grad_norm": 9.662721112135841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108700 + }, + { + "epoch": 0.5272238543211284, + "grad_norm": 6.415773867729513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108710 + }, + { + "epoch": 0.5272723525139644, + "grad_norm": 7.148661751443797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108720 + }, + { + "epoch": 0.5273208507068006, + "grad_norm": 6.441990052508118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108730 + }, + { + "epoch": 0.5273693488996366, + "grad_norm": 7.992375827825526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108740 + }, + { + "epoch": 0.5274178470924727, + "grad_norm": 7.945332924919057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108750 + }, + { + "epoch": 0.5274663452853088, + "grad_norm": 6.547746522755915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108760 + }, + { + "epoch": 0.5275148434781449, + "grad_norm": 1.437841490314895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108770 + }, + { + "epoch": 0.527563341670981, + "grad_norm": 8.158421138659833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108780 + }, + { + "epoch": 0.5276118398638171, + "grad_norm": 7.809138224956769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108790 + }, + { + "epoch": 0.5276603380566531, + "grad_norm": 7.953143921213268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108800 + }, + { + "epoch": 0.5277088362494893, + "grad_norm": 6.409312902633246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108810 + }, + { + "epoch": 0.5277573344423253, + "grad_norm": 6.725090173631543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108820 + }, + { + "epoch": 0.5278058326351615, + "grad_norm": 6.569496235897532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108830 + }, + { + "epoch": 0.5278543308279975, + "grad_norm": 8.32548181506354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108840 + }, + { + "epoch": 0.5279028290208336, + "grad_norm": 8.146019325749876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108850 + }, + { + "epoch": 0.5279513272136697, + "grad_norm": 6.619334413926481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108860 + }, + { + "epoch": 0.5279998254065058, + "grad_norm": 6.356843584853777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108870 + }, + { + "epoch": 0.5280483235993418, + "grad_norm": 6.50764775400603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108880 + }, + { + "epoch": 0.528096821792178, + "grad_norm": 7.855164341208365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108890 + }, + { + "epoch": 0.528145319985014, + "grad_norm": 7.828701598100452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108900 + }, + { + "epoch": 0.5281938181778502, + "grad_norm": 6.97664219728722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108910 + }, + { + "epoch": 0.5282423163706862, + "grad_norm": 6.49616467285341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108920 + }, + { + "epoch": 0.5282908145635223, + "grad_norm": 7.519768274732996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108930 + }, + { + "epoch": 0.5283393127563584, + "grad_norm": 7.758430342619249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108940 + }, + { + "epoch": 0.5283878109491945, + "grad_norm": 7.54413775894136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108950 + }, + { + "epoch": 0.5284363091420305, + "grad_norm": 6.613230851826302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108960 + }, + { + "epoch": 0.5284848073348667, + "grad_norm": 9.844204384990007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108970 + }, + { + "epoch": 0.5285333055277027, + "grad_norm": 6.751478309752201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108980 + }, + { + "epoch": 0.5285818037205389, + "grad_norm": 7.727338413587859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 108990 + }, + { + "epoch": 0.5286303019133749, + "grad_norm": 7.909474675216188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109000 + }, + { + "epoch": 0.528678800106211, + "grad_norm": 6.553059250791193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109010 + }, + { + "epoch": 0.5287272982990471, + "grad_norm": 6.460820856091232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109020 + }, + { + "epoch": 0.5287757964918832, + "grad_norm": 6.22060838395555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109030 + }, + { + "epoch": 0.5288242946847194, + "grad_norm": 7.844385407906884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109040 + }, + { + "epoch": 0.5288727928775554, + "grad_norm": 7.487061992605959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109050 + }, + { + "epoch": 0.5289212910703915, + "grad_norm": 6.71841746680002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109060 + }, + { + "epoch": 0.5289697892632276, + "grad_norm": 6.353835146910569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109070 + }, + { + "epoch": 0.5290182874560637, + "grad_norm": 6.541466035514532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109080 + }, + { + "epoch": 0.5290667856488998, + "grad_norm": 7.428515402807534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109090 + }, + { + "epoch": 0.5291152838417359, + "grad_norm": 7.465072116019655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109100 + }, + { + "epoch": 0.5291637820345719, + "grad_norm": 6.07221721793394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109110 + }, + { + "epoch": 0.5292122802274081, + "grad_norm": 6.173632272066243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109120 + }, + { + "epoch": 0.5292607784202441, + "grad_norm": 6.309305433660484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109130 + }, + { + "epoch": 0.5293092766130802, + "grad_norm": 7.441976634936509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109140 + }, + { + "epoch": 0.5293577748059163, + "grad_norm": 7.85714107109925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109150 + }, + { + "epoch": 0.5294062729987524, + "grad_norm": 6.566423138565369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109160 + }, + { + "epoch": 0.5294547711915885, + "grad_norm": 6.143160646843171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109170 + }, + { + "epoch": 0.5295032693844246, + "grad_norm": 6.544436104150009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109180 + }, + { + "epoch": 0.5295517675772606, + "grad_norm": 7.447943772831422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109190 + }, + { + "epoch": 0.5296002657700968, + "grad_norm": 7.51988835645534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109200 + }, + { + "epoch": 0.5296487639629328, + "grad_norm": 5.8918445233757666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109210 + }, + { + "epoch": 0.529697262155769, + "grad_norm": 6.041410927082325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109220 + }, + { + "epoch": 0.529745760348605, + "grad_norm": 6.343232428207557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109230 + }, + { + "epoch": 0.5297942585414411, + "grad_norm": 7.370420007646317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109240 + }, + { + "epoch": 0.5298427567342772, + "grad_norm": 7.6547571836727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109250 + }, + { + "epoch": 0.5298912549271133, + "grad_norm": 6.196591328944123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109260 + }, + { + "epoch": 0.5299397531199493, + "grad_norm": 5.726059981725484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109270 + }, + { + "epoch": 0.5299882513127855, + "grad_norm": 6.525888807118463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109280 + }, + { + "epoch": 0.5300367495056215, + "grad_norm": 7.298980619907525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109290 + }, + { + "epoch": 0.5300852476984577, + "grad_norm": 7.158514137017846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109300 + }, + { + "epoch": 0.5301337458912937, + "grad_norm": 6.024296084206071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109310 + }, + { + "epoch": 0.5301822440841298, + "grad_norm": 6.091163839982983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109320 + }, + { + "epoch": 0.5302307422769659, + "grad_norm": 6.1647774884932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109330 + }, + { + "epoch": 0.530279240469802, + "grad_norm": 7.537882851238464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109340 + }, + { + "epoch": 0.530327738662638, + "grad_norm": 0.06406218558549881, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 109350 + }, + { + "epoch": 0.5303762368554742, + "grad_norm": 0.0001228937180712819, + "learning_rate": 0.0002, + "loss": 0.0025, + "step": 109360 + }, + { + "epoch": 0.5304247350483102, + "grad_norm": 0.00016975509061012417, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 109370 + }, + { + "epoch": 0.5304732332411464, + "grad_norm": 0.00038868296542204916, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109380 + }, + { + "epoch": 0.5305217314339824, + "grad_norm": 4.1440136556047946e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 109390 + }, + { + "epoch": 0.5305702296268185, + "grad_norm": 0.1001141294836998, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 109400 + }, + { + "epoch": 0.5306187278196546, + "grad_norm": 0.011820987798273563, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 109410 + }, + { + "epoch": 0.5306672260124907, + "grad_norm": 0.023291651159524918, + "learning_rate": 0.0002, + "loss": 0.0018, + "step": 109420 + }, + { + "epoch": 0.5307157242053268, + "grad_norm": 0.01274082064628601, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 109430 + }, + { + "epoch": 0.5307642223981629, + "grad_norm": 5.921209594816901e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 109440 + }, + { + "epoch": 0.5308127205909989, + "grad_norm": 0.000211859485716559, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109450 + }, + { + "epoch": 0.5308612187838351, + "grad_norm": 3.4799719287548214e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109460 + }, + { + "epoch": 0.5309097169766711, + "grad_norm": 1.2844542652601376e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109470 + }, + { + "epoch": 0.5309582151695073, + "grad_norm": 2.470863728376571e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109480 + }, + { + "epoch": 0.5310067133623433, + "grad_norm": 9.488607247476466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109490 + }, + { + "epoch": 0.5310552115551794, + "grad_norm": 3.520597601891495e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109500 + }, + { + "epoch": 0.5311037097480155, + "grad_norm": 1.0660085536073893e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109510 + }, + { + "epoch": 0.5311522079408516, + "grad_norm": 2.5939323677448556e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109520 + }, + { + "epoch": 0.5312007061336876, + "grad_norm": 7.254027423186926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109530 + }, + { + "epoch": 0.5312492043265238, + "grad_norm": 9.039364158525132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109540 + }, + { + "epoch": 0.5312977025193599, + "grad_norm": 8.917632840166334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109550 + }, + { + "epoch": 0.531346200712196, + "grad_norm": 6.900447715452174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109560 + }, + { + "epoch": 0.5313946989050321, + "grad_norm": 5.755702659371309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109570 + }, + { + "epoch": 0.5314431970978681, + "grad_norm": 5.899482857785188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109580 + }, + { + "epoch": 0.5314916952907043, + "grad_norm": 1.1112224456155673e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109590 + }, + { + "epoch": 0.5315401934835403, + "grad_norm": 5.6837930060282815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109600 + }, + { + "epoch": 0.5315886916763765, + "grad_norm": 5.9710423556680325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109610 + }, + { + "epoch": 0.5316371898692125, + "grad_norm": 4.898135557596106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109620 + }, + { + "epoch": 0.5316856880620486, + "grad_norm": 4.586180693877395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109630 + }, + { + "epoch": 0.5317341862548847, + "grad_norm": 5.2968152886023745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109640 + }, + { + "epoch": 0.5317826844477208, + "grad_norm": 1.3937638868810609e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109650 + }, + { + "epoch": 0.5318311826405568, + "grad_norm": 4.34983849117998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109660 + }, + { + "epoch": 0.531879680833393, + "grad_norm": 5.048045750299934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109670 + }, + { + "epoch": 0.531928179026229, + "grad_norm": 5.544850409933133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109680 + }, + { + "epoch": 0.5319766772190652, + "grad_norm": 4.2105089050892275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109690 + }, + { + "epoch": 0.5320251754119012, + "grad_norm": 3.926735644199653e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109700 + }, + { + "epoch": 0.5320736736047373, + "grad_norm": 4.086895842192462e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109710 + }, + { + "epoch": 0.5321221717975734, + "grad_norm": 4.001536126452265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109720 + }, + { + "epoch": 0.5321706699904095, + "grad_norm": 3.5133157325617503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109730 + }, + { + "epoch": 0.5322191681832456, + "grad_norm": 6.7944597503810655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109740 + }, + { + "epoch": 0.5322676663760817, + "grad_norm": 7.270977221196517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109750 + }, + { + "epoch": 0.5323161645689177, + "grad_norm": 0.00011915533104911447, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 109760 + }, + { + "epoch": 0.5323646627617539, + "grad_norm": 4.995146355213365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109770 + }, + { + "epoch": 0.5324131609545899, + "grad_norm": 4.883882411377272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109780 + }, + { + "epoch": 0.532461659147426, + "grad_norm": 6.529995516757481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109790 + }, + { + "epoch": 0.5325101573402621, + "grad_norm": 2.235260581073817e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109800 + }, + { + "epoch": 0.5325586555330982, + "grad_norm": 6.1257201195985544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109810 + }, + { + "epoch": 0.5326071537259343, + "grad_norm": 6.98985468261526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109820 + }, + { + "epoch": 0.5326556519187704, + "grad_norm": 4.992109552404145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109830 + }, + { + "epoch": 0.5327041501116064, + "grad_norm": 4.685232852352783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109840 + }, + { + "epoch": 0.5327526483044426, + "grad_norm": 5.095372671348741e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109850 + }, + { + "epoch": 0.5328011464972786, + "grad_norm": 1.8558674128144048e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109860 + }, + { + "epoch": 0.5328496446901148, + "grad_norm": 4.343368345871568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109870 + }, + { + "epoch": 0.5328981428829508, + "grad_norm": 4.6045561248320155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109880 + }, + { + "epoch": 0.5329466410757869, + "grad_norm": 4.11669952882221e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109890 + }, + { + "epoch": 0.532995139268623, + "grad_norm": 4.49253184342524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109900 + }, + { + "epoch": 0.5330436374614591, + "grad_norm": 3.650259714049753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109910 + }, + { + "epoch": 0.5330921356542951, + "grad_norm": 3.831411959254183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109920 + }, + { + "epoch": 0.5331406338471313, + "grad_norm": 3.34746323460422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109930 + }, + { + "epoch": 0.5331891320399673, + "grad_norm": 3.620806410253863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109940 + }, + { + "epoch": 0.5332376302328035, + "grad_norm": 3.392884536879137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109950 + }, + { + "epoch": 0.5332861284256395, + "grad_norm": 3.159744210279314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109960 + }, + { + "epoch": 0.5333346266184756, + "grad_norm": 2.9917578103777487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109970 + }, + { + "epoch": 0.5333831248113117, + "grad_norm": 3.1854647204454523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109980 + }, + { + "epoch": 0.5334316230041478, + "grad_norm": 3.465000645519467e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 109990 + }, + { + "epoch": 0.5334801211969838, + "grad_norm": 3.3884991808008635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110000 + }, + { + "epoch": 0.53352861938982, + "grad_norm": 2.6663212793209823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110010 + }, + { + "epoch": 0.533577117582656, + "grad_norm": 3.018520146724768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110020 + }, + { + "epoch": 0.5336256157754922, + "grad_norm": 2.8040726647304837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110030 + }, + { + "epoch": 0.5336741139683282, + "grad_norm": 3.6930091482645366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110040 + }, + { + "epoch": 0.5337226121611643, + "grad_norm": 3.184263732691761e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110050 + }, + { + "epoch": 0.5337711103540005, + "grad_norm": 2.5952122086891904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110060 + }, + { + "epoch": 0.5338196085468365, + "grad_norm": 2.6280763449904043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110070 + }, + { + "epoch": 0.5338681067396727, + "grad_norm": 2.553009380790172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110080 + }, + { + "epoch": 0.5339166049325087, + "grad_norm": 2.8140200356574496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110090 + }, + { + "epoch": 0.5339651031253448, + "grad_norm": 1.4024979464011267e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110100 + }, + { + "epoch": 0.5340136013181809, + "grad_norm": 2.4759076495683985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110110 + }, + { + "epoch": 0.534062099511017, + "grad_norm": 2.441774086037185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110120 + }, + { + "epoch": 0.534110597703853, + "grad_norm": 2.4619259875180433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110130 + }, + { + "epoch": 0.5341590958966892, + "grad_norm": 2.9044888378848555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110140 + }, + { + "epoch": 0.5342075940895252, + "grad_norm": 2.8049332740920363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110150 + }, + { + "epoch": 0.5342560922823614, + "grad_norm": 2.572736775618978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110160 + }, + { + "epoch": 0.5343045904751974, + "grad_norm": 2.4196679078158922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110170 + }, + { + "epoch": 0.5343530886680335, + "grad_norm": 2.357660150664742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110180 + }, + { + "epoch": 0.5344015868608696, + "grad_norm": 2.696853698580526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110190 + }, + { + "epoch": 0.5344500850537057, + "grad_norm": 2.4516114081052365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110200 + }, + { + "epoch": 0.5344985832465418, + "grad_norm": 2.2829524368717102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110210 + }, + { + "epoch": 0.5345470814393779, + "grad_norm": 2.2812321276433067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110220 + }, + { + "epoch": 0.5345955796322139, + "grad_norm": 2.7114456315757707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110230 + }, + { + "epoch": 0.5346440778250501, + "grad_norm": 3.5667544580064714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110240 + }, + { + "epoch": 0.5346925760178861, + "grad_norm": 3.0552048428944545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110250 + }, + { + "epoch": 0.5347410742107223, + "grad_norm": 2.143288611478056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110260 + }, + { + "epoch": 0.5347895724035583, + "grad_norm": 2.074735220958246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110270 + }, + { + "epoch": 0.5348380705963944, + "grad_norm": 2.0846659936069045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110280 + }, + { + "epoch": 0.5348865687892305, + "grad_norm": 0.0058581288903951645, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 110290 + }, + { + "epoch": 0.5349350669820666, + "grad_norm": 8.279924804810435e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 110300 + }, + { + "epoch": 0.5349835651749026, + "grad_norm": 0.00011293661373201758, + "learning_rate": 0.0002, + "loss": 0.0156, + "step": 110310 + }, + { + "epoch": 0.5350320633677388, + "grad_norm": 0.00026603444712236524, + "learning_rate": 0.0002, + "loss": 0.0021, + "step": 110320 + }, + { + "epoch": 0.5350805615605748, + "grad_norm": 4.993961920263246e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 110330 + }, + { + "epoch": 0.535129059753411, + "grad_norm": 0.004525959026068449, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 110340 + }, + { + "epoch": 0.535177557946247, + "grad_norm": 0.00010054559243144467, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110350 + }, + { + "epoch": 0.5352260561390831, + "grad_norm": 8.659265586175025e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110360 + }, + { + "epoch": 0.5352745543319192, + "grad_norm": 6.783657590858638e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110370 + }, + { + "epoch": 0.5353230525247553, + "grad_norm": 5.934258661000058e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110380 + }, + { + "epoch": 0.5353715507175913, + "grad_norm": 5.102306749904528e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110390 + }, + { + "epoch": 0.5354200489104275, + "grad_norm": 4.45414989371784e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110400 + }, + { + "epoch": 0.5354685471032635, + "grad_norm": 0.0003931533719878644, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 110410 + }, + { + "epoch": 0.5355170452960997, + "grad_norm": 0.00012663903180509806, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 110420 + }, + { + "epoch": 0.5355655434889357, + "grad_norm": 7.862340862629935e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110430 + }, + { + "epoch": 0.5356140416817718, + "grad_norm": 6.941294122952968e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110440 + }, + { + "epoch": 0.5356625398746079, + "grad_norm": 6.647632835665718e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110450 + }, + { + "epoch": 0.535711038067444, + "grad_norm": 5.010474706068635e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110460 + }, + { + "epoch": 0.5357595362602801, + "grad_norm": 3.480811574263498e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110470 + }, + { + "epoch": 0.5358080344531162, + "grad_norm": 3.165694579365663e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110480 + }, + { + "epoch": 0.5358565326459522, + "grad_norm": 3.76733805751428e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110490 + }, + { + "epoch": 0.5359050308387884, + "grad_norm": 2.6063054974656552e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110500 + }, + { + "epoch": 0.5359535290316244, + "grad_norm": 2.2142337911645882e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110510 + }, + { + "epoch": 0.5360020272244606, + "grad_norm": 2.1854266378795728e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110520 + }, + { + "epoch": 0.5360505254172966, + "grad_norm": 1.9754739696509205e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110530 + }, + { + "epoch": 0.5360990236101327, + "grad_norm": 2.032438169408124e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110540 + }, + { + "epoch": 0.5361475218029688, + "grad_norm": 2.1162708435440436e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110550 + }, + { + "epoch": 0.5361960199958049, + "grad_norm": 1.5126115613384172e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110560 + }, + { + "epoch": 0.536244518188641, + "grad_norm": 1.9186742065357976e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110570 + }, + { + "epoch": 0.5362930163814771, + "grad_norm": 1.3756216503679752e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110580 + }, + { + "epoch": 0.5363415145743132, + "grad_norm": 1.5893061572569422e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110590 + }, + { + "epoch": 0.5363900127671493, + "grad_norm": 1.3650019354827236e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110600 + }, + { + "epoch": 0.5364385109599854, + "grad_norm": 1.2475686162360944e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110610 + }, + { + "epoch": 0.5364870091528214, + "grad_norm": 1.2892341146653052e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110620 + }, + { + "epoch": 0.5365355073456576, + "grad_norm": 1.2131131370551884e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110630 + }, + { + "epoch": 0.5365840055384936, + "grad_norm": 1.310742118221242e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110640 + }, + { + "epoch": 0.5366325037313298, + "grad_norm": 1.1534749319253024e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110650 + }, + { + "epoch": 0.5366810019241658, + "grad_norm": 1.0825719073181972e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110660 + }, + { + "epoch": 0.5367295001170019, + "grad_norm": 1.1366660146450158e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110670 + }, + { + "epoch": 0.536777998309838, + "grad_norm": 1.0009656762122177e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110680 + }, + { + "epoch": 0.5368264965026741, + "grad_norm": 1.7077294614864513e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110690 + }, + { + "epoch": 0.5368749946955101, + "grad_norm": 1.4729830581927672e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110700 + }, + { + "epoch": 0.5369234928883463, + "grad_norm": 9.275276170228608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110710 + }, + { + "epoch": 0.5369719910811823, + "grad_norm": 8.49773823574651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110720 + }, + { + "epoch": 0.5370204892740185, + "grad_norm": 1.0194474270974752e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110730 + }, + { + "epoch": 0.5370689874668545, + "grad_norm": 8.968486326921266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110740 + }, + { + "epoch": 0.5371174856596906, + "grad_norm": 1.0032711543317419e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110750 + }, + { + "epoch": 0.5371659838525267, + "grad_norm": 1.929537575051654e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110760 + }, + { + "epoch": 0.5372144820453628, + "grad_norm": 8.048254130699206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110770 + }, + { + "epoch": 0.5372629802381989, + "grad_norm": 9.711826351122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110780 + }, + { + "epoch": 0.537311478431035, + "grad_norm": 7.595017905259738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110790 + }, + { + "epoch": 0.537359976623871, + "grad_norm": 6.820844191679498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110800 + }, + { + "epoch": 0.5374084748167072, + "grad_norm": 7.178263786045136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110810 + }, + { + "epoch": 0.5374569730095432, + "grad_norm": 7.070600986480713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110820 + }, + { + "epoch": 0.5375054712023793, + "grad_norm": 7.051338343444513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110830 + }, + { + "epoch": 0.5375539693952154, + "grad_norm": 6.613873665628489e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110840 + }, + { + "epoch": 0.5376024675880515, + "grad_norm": 6.4857858887990005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110850 + }, + { + "epoch": 0.5376509657808876, + "grad_norm": 6.285931704042014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110860 + }, + { + "epoch": 0.5376994639737237, + "grad_norm": 6.3146590036922134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110870 + }, + { + "epoch": 0.5377479621665597, + "grad_norm": 6.2345743572223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110880 + }, + { + "epoch": 0.5377964603593959, + "grad_norm": 5.771919404651271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110890 + }, + { + "epoch": 0.5378449585522319, + "grad_norm": 5.451932793221204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110900 + }, + { + "epoch": 0.537893456745068, + "grad_norm": 5.800547114631627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110910 + }, + { + "epoch": 0.5379419549379041, + "grad_norm": 5.520506874745479e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110920 + }, + { + "epoch": 0.5379904531307402, + "grad_norm": 5.763320586993359e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110930 + }, + { + "epoch": 0.5380389513235763, + "grad_norm": 5.033027719036909e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110940 + }, + { + "epoch": 0.5380874495164124, + "grad_norm": 5.722070454794448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110950 + }, + { + "epoch": 0.5381359477092484, + "grad_norm": 5.171908469492337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110960 + }, + { + "epoch": 0.5381844459020846, + "grad_norm": 5.098395376990084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110970 + }, + { + "epoch": 0.5382329440949206, + "grad_norm": 5.144765964359976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110980 + }, + { + "epoch": 0.5382814422877568, + "grad_norm": 5.125771167513449e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 110990 + }, + { + "epoch": 0.5383299404805928, + "grad_norm": 5.235198386799311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111000 + }, + { + "epoch": 0.5383784386734289, + "grad_norm": 5.355764187697787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111010 + }, + { + "epoch": 0.538426936866265, + "grad_norm": 4.574236754706362e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111020 + }, + { + "epoch": 0.5384754350591011, + "grad_norm": 4.56491443401319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111030 + }, + { + "epoch": 0.5385239332519371, + "grad_norm": 3.992453912360361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111040 + }, + { + "epoch": 0.5385724314447733, + "grad_norm": 4.103781520825578e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111050 + }, + { + "epoch": 0.5386209296376093, + "grad_norm": 5.5930163398443256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111060 + }, + { + "epoch": 0.5386694278304455, + "grad_norm": 4.389485638967017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111070 + }, + { + "epoch": 0.5387179260232816, + "grad_norm": 4.4663443077297416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111080 + }, + { + "epoch": 0.5387664242161176, + "grad_norm": 3.868169642373687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111090 + }, + { + "epoch": 0.5388149224089538, + "grad_norm": 4.029992396681337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111100 + }, + { + "epoch": 0.5388634206017898, + "grad_norm": 4.5462747948477045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111110 + }, + { + "epoch": 0.538911918794626, + "grad_norm": 4.053035354445456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111120 + }, + { + "epoch": 0.538960416987462, + "grad_norm": 3.984750037488993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111130 + }, + { + "epoch": 0.5390089151802981, + "grad_norm": 3.857009687635582e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111140 + }, + { + "epoch": 0.5390574133731342, + "grad_norm": 4.043269200337818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111150 + }, + { + "epoch": 0.5391059115659703, + "grad_norm": 3.7610393519571517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111160 + }, + { + "epoch": 0.5391544097588064, + "grad_norm": 3.90126933780266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111170 + }, + { + "epoch": 0.5392029079516425, + "grad_norm": 3.908503458660562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111180 + }, + { + "epoch": 0.5392514061444785, + "grad_norm": 3.6213211842550663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111190 + }, + { + "epoch": 0.5392999043373147, + "grad_norm": 3.724439693542081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111200 + }, + { + "epoch": 0.5393484025301507, + "grad_norm": 4.078618530911626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111210 + }, + { + "epoch": 0.5393969007229868, + "grad_norm": 3.3389494547009235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111220 + }, + { + "epoch": 0.5394453989158229, + "grad_norm": 3.536999201969593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111230 + }, + { + "epoch": 0.539493897108659, + "grad_norm": 3.3689043448248412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111240 + }, + { + "epoch": 0.5395423953014951, + "grad_norm": 3.392571443328052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111250 + }, + { + "epoch": 0.5395908934943312, + "grad_norm": 3.4414654237480136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111260 + }, + { + "epoch": 0.5396393916871672, + "grad_norm": 3.5909661164623685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111270 + }, + { + "epoch": 0.5396878898800034, + "grad_norm": 3.876985374517972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111280 + }, + { + "epoch": 0.5397363880728394, + "grad_norm": 3.2688794817659073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111290 + }, + { + "epoch": 0.5397848862656756, + "grad_norm": 3.2687805742170895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111300 + }, + { + "epoch": 0.5398333844585116, + "grad_norm": 3.578492396627553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111310 + }, + { + "epoch": 0.5398818826513477, + "grad_norm": 3.182459067829768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111320 + }, + { + "epoch": 0.5399303808441838, + "grad_norm": 3.0638811949756928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111330 + }, + { + "epoch": 0.5399788790370199, + "grad_norm": 3.267375859650201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111340 + }, + { + "epoch": 0.5400273772298559, + "grad_norm": 1.638222784094978e-05, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 111350 + }, + { + "epoch": 0.5400758754226921, + "grad_norm": 9.676162153482437e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111360 + }, + { + "epoch": 0.5401243736155281, + "grad_norm": 0.00012574852735269815, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111370 + }, + { + "epoch": 0.5401728718083643, + "grad_norm": 6.75319679430686e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111380 + }, + { + "epoch": 0.5402213700012003, + "grad_norm": 6.0499402025016025e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111390 + }, + { + "epoch": 0.5402698681940364, + "grad_norm": 3.5793134884443134e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111400 + }, + { + "epoch": 0.5403183663868725, + "grad_norm": 2.4765788111835718e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111410 + }, + { + "epoch": 0.5403668645797086, + "grad_norm": 1.7214253603015095e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111420 + }, + { + "epoch": 0.5404153627725446, + "grad_norm": 1.5608697140123695e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111430 + }, + { + "epoch": 0.5404638609653808, + "grad_norm": 1.7420699805370532e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111440 + }, + { + "epoch": 0.5405123591582168, + "grad_norm": 1.495133892603917e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111450 + }, + { + "epoch": 0.540560857351053, + "grad_norm": 1.0917362487816717e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111460 + }, + { + "epoch": 0.540609355543889, + "grad_norm": 1.1112225365650374e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111470 + }, + { + "epoch": 0.5406578537367251, + "grad_norm": 1.5265133697539568e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111480 + }, + { + "epoch": 0.5407063519295612, + "grad_norm": 1.1436246495577507e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111490 + }, + { + "epoch": 0.5407548501223973, + "grad_norm": 1.0060084605356678e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111500 + }, + { + "epoch": 0.5408033483152334, + "grad_norm": 7.812700459908228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111510 + }, + { + "epoch": 0.5408518465080695, + "grad_norm": 8.339908163179643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111520 + }, + { + "epoch": 0.5409003447009055, + "grad_norm": 7.457745141437044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111530 + }, + { + "epoch": 0.5409488428937417, + "grad_norm": 8.504620382154826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111540 + }, + { + "epoch": 0.5409973410865777, + "grad_norm": 8.541230272385292e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111550 + }, + { + "epoch": 0.5410458392794139, + "grad_norm": 6.122786089690635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111560 + }, + { + "epoch": 0.5410943374722499, + "grad_norm": 6.881834906380391e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111570 + }, + { + "epoch": 0.541142835665086, + "grad_norm": 5.844428415002767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111580 + }, + { + "epoch": 0.5411913338579222, + "grad_norm": 7.352628472290235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111590 + }, + { + "epoch": 0.5412398320507582, + "grad_norm": 6.351413503580261e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111600 + }, + { + "epoch": 0.5412883302435944, + "grad_norm": 5.3101089179108385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111610 + }, + { + "epoch": 0.5413368284364304, + "grad_norm": 5.188790510146646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111620 + }, + { + "epoch": 0.5413853266292665, + "grad_norm": 5.120366040500812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111630 + }, + { + "epoch": 0.5414338248221026, + "grad_norm": 5.856708867213456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111640 + }, + { + "epoch": 0.5414823230149387, + "grad_norm": 5.726764356950298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111650 + }, + { + "epoch": 0.5415308212077747, + "grad_norm": 4.533310857368633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111660 + }, + { + "epoch": 0.5415793194006109, + "grad_norm": 4.445983449841151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111670 + }, + { + "epoch": 0.5416278175934469, + "grad_norm": 4.637525307771284e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111680 + }, + { + "epoch": 0.5416763157862831, + "grad_norm": 5.0670023483689874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111690 + }, + { + "epoch": 0.5417248139791191, + "grad_norm": 5.036871243646601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111700 + }, + { + "epoch": 0.5417733121719552, + "grad_norm": 4.236012500768993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111710 + }, + { + "epoch": 0.5418218103647913, + "grad_norm": 4.093591542186914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111720 + }, + { + "epoch": 0.5418703085576274, + "grad_norm": 3.796522605625796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111730 + }, + { + "epoch": 0.5419188067504634, + "grad_norm": 4.500256181927398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111740 + }, + { + "epoch": 0.5419673049432996, + "grad_norm": 4.14749956689775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111750 + }, + { + "epoch": 0.5420158031361356, + "grad_norm": 3.440490445427713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111760 + }, + { + "epoch": 0.5420643013289718, + "grad_norm": 3.4230781693622703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111770 + }, + { + "epoch": 0.5421127995218078, + "grad_norm": 3.195133331246325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111780 + }, + { + "epoch": 0.5421612977146439, + "grad_norm": 4.086739863851108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111790 + }, + { + "epoch": 0.54220979590748, + "grad_norm": 3.6419712614588207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111800 + }, + { + "epoch": 0.5422582941003161, + "grad_norm": 3.132991423626663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111810 + }, + { + "epoch": 0.5423067922931522, + "grad_norm": 3.0899977900844533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111820 + }, + { + "epoch": 0.5423552904859883, + "grad_norm": 3.1005363325675717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111830 + }, + { + "epoch": 0.5424037886788243, + "grad_norm": 3.356991555847344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111840 + }, + { + "epoch": 0.5424522868716605, + "grad_norm": 3.23094536724966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111850 + }, + { + "epoch": 0.5425007850644965, + "grad_norm": 3.015822358065634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111860 + }, + { + "epoch": 0.5425492832573326, + "grad_norm": 2.9766281386400806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111870 + }, + { + "epoch": 0.5425977814501687, + "grad_norm": 2.9013638140895637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111880 + }, + { + "epoch": 0.5426462796430048, + "grad_norm": 3.5422465316514717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111890 + }, + { + "epoch": 0.5426947778358409, + "grad_norm": 2.9759005428786622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111900 + }, + { + "epoch": 0.542743276028677, + "grad_norm": 2.6836044071387732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111910 + }, + { + "epoch": 0.542791774221513, + "grad_norm": 2.6218440325465053e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111920 + }, + { + "epoch": 0.5428402724143492, + "grad_norm": 2.5658400772954337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111930 + }, + { + "epoch": 0.5428887706071852, + "grad_norm": 2.885412641262519e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111940 + }, + { + "epoch": 0.5429372688000214, + "grad_norm": 2.920589395216666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111950 + }, + { + "epoch": 0.5429857669928574, + "grad_norm": 2.522011300243321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111960 + }, + { + "epoch": 0.5430342651856935, + "grad_norm": 2.3502959720644867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111970 + }, + { + "epoch": 0.5430827633785296, + "grad_norm": 2.3417385364155052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111980 + }, + { + "epoch": 0.5431312615713657, + "grad_norm": 2.6833940864889883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 111990 + }, + { + "epoch": 0.5431797597642017, + "grad_norm": 2.6846505534194876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112000 + }, + { + "epoch": 0.5432282579570379, + "grad_norm": 2.3451218567061005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112010 + }, + { + "epoch": 0.5432767561498739, + "grad_norm": 2.2428762349591125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112020 + }, + { + "epoch": 0.5433252543427101, + "grad_norm": 2.3547049750050064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112030 + }, + { + "epoch": 0.5433737525355461, + "grad_norm": 2.297147375429631e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112040 + }, + { + "epoch": 0.5434222507283822, + "grad_norm": 2.356426875849138e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112050 + }, + { + "epoch": 0.5434707489212183, + "grad_norm": 2.2920294213690795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112060 + }, + { + "epoch": 0.5435192471140544, + "grad_norm": 2.231792905149632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112070 + }, + { + "epoch": 0.5435677453068904, + "grad_norm": 2.1343055323086446e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112080 + }, + { + "epoch": 0.5436162434997266, + "grad_norm": 2.355221795369289e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112090 + }, + { + "epoch": 0.5436647416925627, + "grad_norm": 2.1129137621755945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112100 + }, + { + "epoch": 0.5437132398853988, + "grad_norm": 2.088736209771014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112110 + }, + { + "epoch": 0.5437617380782349, + "grad_norm": 1.9846897885145154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112120 + }, + { + "epoch": 0.543810236271071, + "grad_norm": 2.0122563455515774e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112130 + }, + { + "epoch": 0.5438587344639071, + "grad_norm": 2.844801201717928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112140 + }, + { + "epoch": 0.5439072326567431, + "grad_norm": 2.5134543193416903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112150 + }, + { + "epoch": 0.5439557308495793, + "grad_norm": 1.8338120071348385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112160 + }, + { + "epoch": 0.5440042290424153, + "grad_norm": 1.9215406155126402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112170 + }, + { + "epoch": 0.5440527272352514, + "grad_norm": 1.8579978586785728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112180 + }, + { + "epoch": 0.5441012254280875, + "grad_norm": 1.8996084918398992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112190 + }, + { + "epoch": 0.5441497236209236, + "grad_norm": 1.890114162961254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112200 + }, + { + "epoch": 0.5441982218137597, + "grad_norm": 1.843768927756173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112210 + }, + { + "epoch": 0.5442467200065958, + "grad_norm": 1.7352112990920432e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112220 + }, + { + "epoch": 0.5442952181994318, + "grad_norm": 1.817460201891663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112230 + }, + { + "epoch": 0.544343716392268, + "grad_norm": 1.78435800535226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112240 + }, + { + "epoch": 0.544392214585104, + "grad_norm": 1.5781350839461084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112250 + }, + { + "epoch": 0.5444407127779402, + "grad_norm": 1.6246589211732498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112260 + }, + { + "epoch": 0.5444892109707762, + "grad_norm": 1.638952653593151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112270 + }, + { + "epoch": 0.5445377091636123, + "grad_norm": 1.772755126694392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112280 + }, + { + "epoch": 0.5445862073564484, + "grad_norm": 1.6263451243503368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112290 + }, + { + "epoch": 0.5446347055492845, + "grad_norm": 1.6984736248559784e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112300 + }, + { + "epoch": 0.5446832037421205, + "grad_norm": 1.5536943465122022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112310 + }, + { + "epoch": 0.5447317019349567, + "grad_norm": 1.5105973716345034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112320 + }, + { + "epoch": 0.5447802001277927, + "grad_norm": 1.4765780633752001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112330 + }, + { + "epoch": 0.5448286983206289, + "grad_norm": 1.566696028021397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112340 + }, + { + "epoch": 0.5448771965134649, + "grad_norm": 1.7114482488977956e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112350 + }, + { + "epoch": 0.544925694706301, + "grad_norm": 1.8983232621394563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112360 + }, + { + "epoch": 0.5449741928991371, + "grad_norm": 1.4372856185218552e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112370 + }, + { + "epoch": 0.5450226910919732, + "grad_norm": 1.5640013089068816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112380 + }, + { + "epoch": 0.5450711892848092, + "grad_norm": 1.4565156334356288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112390 + }, + { + "epoch": 0.5451196874776454, + "grad_norm": 1.3909868812334025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112400 + }, + { + "epoch": 0.5451681856704814, + "grad_norm": 1.4823627907389891e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112410 + }, + { + "epoch": 0.5452166838633176, + "grad_norm": 1.5017379837445333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112420 + }, + { + "epoch": 0.5452651820561536, + "grad_norm": 1.3876991715733311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112430 + }, + { + "epoch": 0.5453136802489897, + "grad_norm": 1.3989817944093375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112440 + }, + { + "epoch": 0.5453621784418258, + "grad_norm": 1.4395421885637916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112450 + }, + { + "epoch": 0.5454106766346619, + "grad_norm": 1.4478501952908118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112460 + }, + { + "epoch": 0.545459174827498, + "grad_norm": 1.4029999420017703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112470 + }, + { + "epoch": 0.5455076730203341, + "grad_norm": 1.3493588539859047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112480 + }, + { + "epoch": 0.5455561712131701, + "grad_norm": 2.564827582318685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112490 + }, + { + "epoch": 0.5456046694060063, + "grad_norm": 1.2938969575770898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112500 + }, + { + "epoch": 0.5456531675988423, + "grad_norm": 1.3257255204734975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112510 + }, + { + "epoch": 0.5457016657916784, + "grad_norm": 1.3893572941015009e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112520 + }, + { + "epoch": 0.5457501639845145, + "grad_norm": 1.2999757927900646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112530 + }, + { + "epoch": 0.5457986621773506, + "grad_norm": 1.184851271318621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112540 + }, + { + "epoch": 0.5458471603701867, + "grad_norm": 1.3025878615735564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112550 + }, + { + "epoch": 0.5458956585630228, + "grad_norm": 1.1717934285115916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112560 + }, + { + "epoch": 0.5459441567558588, + "grad_norm": 1.2262620430192328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112570 + }, + { + "epoch": 0.545992654948695, + "grad_norm": 1.2865491498814663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112580 + }, + { + "epoch": 0.546041153141531, + "grad_norm": 1.2047939890180714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112590 + }, + { + "epoch": 0.5460896513343672, + "grad_norm": 1.1622639704000903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112600 + }, + { + "epoch": 0.5461381495272032, + "grad_norm": 1.1505220527396887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112610 + }, + { + "epoch": 0.5461866477200393, + "grad_norm": 1.9953661194449523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112620 + }, + { + "epoch": 0.5462351459128755, + "grad_norm": 1.1231545613554772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112630 + }, + { + "epoch": 0.5462836441057115, + "grad_norm": 1.133471982939227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112640 + }, + { + "epoch": 0.5463321422985477, + "grad_norm": 1.2377067832858302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112650 + }, + { + "epoch": 0.5463806404913837, + "grad_norm": 1.1655060916382354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112660 + }, + { + "epoch": 0.5464291386842198, + "grad_norm": 1.0260447425025632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112670 + }, + { + "epoch": 0.5464776368770559, + "grad_norm": 1.0359062798670493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112680 + }, + { + "epoch": 0.546526135069892, + "grad_norm": 1.0884232324315235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112690 + }, + { + "epoch": 0.546574633262728, + "grad_norm": 1.096493065233517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112700 + }, + { + "epoch": 0.5466231314555642, + "grad_norm": 1.0030969406216173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112710 + }, + { + "epoch": 0.5466716296484002, + "grad_norm": 9.972197858587606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112720 + }, + { + "epoch": 0.5467201278412364, + "grad_norm": 1.0116290241057868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112730 + }, + { + "epoch": 0.5467686260340724, + "grad_norm": 1.0336887044104515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112740 + }, + { + "epoch": 0.5468171242269085, + "grad_norm": 9.485709711043455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112750 + }, + { + "epoch": 0.5468656224197446, + "grad_norm": 1.0026139989349758e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112760 + }, + { + "epoch": 0.5469141206125807, + "grad_norm": 1.0071809128930909e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112770 + }, + { + "epoch": 0.5469626188054167, + "grad_norm": 9.48925674038037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112780 + }, + { + "epoch": 0.5470111169982529, + "grad_norm": 3.2521566026844084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112790 + }, + { + "epoch": 0.5470596151910889, + "grad_norm": 1.0457191592649906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112800 + }, + { + "epoch": 0.5471081133839251, + "grad_norm": 9.442240411772218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112810 + }, + { + "epoch": 0.5471566115767611, + "grad_norm": 9.526491453470953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112820 + }, + { + "epoch": 0.5472051097695972, + "grad_norm": 9.89187583400053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112830 + }, + { + "epoch": 0.5472536079624333, + "grad_norm": 9.571723467161064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112840 + }, + { + "epoch": 0.5473021061552694, + "grad_norm": 1.025251322062104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112850 + }, + { + "epoch": 0.5473506043481055, + "grad_norm": 9.459377565690374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112860 + }, + { + "epoch": 0.5473991025409416, + "grad_norm": 8.96160315733141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112870 + }, + { + "epoch": 0.5474476007337776, + "grad_norm": 8.691584980624611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112880 + }, + { + "epoch": 0.5474960989266138, + "grad_norm": 8.394117116949928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112890 + }, + { + "epoch": 0.5475445971194498, + "grad_norm": 8.783466114437033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112900 + }, + { + "epoch": 0.547593095312286, + "grad_norm": 8.123428756334761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112910 + }, + { + "epoch": 0.547641593505122, + "grad_norm": 8.910502060643921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112920 + }, + { + "epoch": 0.5476900916979581, + "grad_norm": 8.365921075892402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112930 + }, + { + "epoch": 0.5477385898907942, + "grad_norm": 8.404346658608119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112940 + }, + { + "epoch": 0.5477870880836303, + "grad_norm": 1.1154700132465223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112950 + }, + { + "epoch": 0.5478355862764663, + "grad_norm": 8.746594062358781e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112960 + }, + { + "epoch": 0.5478840844693025, + "grad_norm": 8.995864959615574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112970 + }, + { + "epoch": 0.5479325826621385, + "grad_norm": 1.1247680049564224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112980 + }, + { + "epoch": 0.5479810808549747, + "grad_norm": 8.755044405006629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 112990 + }, + { + "epoch": 0.5480295790478107, + "grad_norm": 8.470975103591627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113000 + }, + { + "epoch": 0.5480780772406468, + "grad_norm": 7.503743404413399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113010 + }, + { + "epoch": 0.5481265754334829, + "grad_norm": 7.835564019842423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113020 + }, + { + "epoch": 0.548175073626319, + "grad_norm": 8.145328251885076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113030 + }, + { + "epoch": 0.548223571819155, + "grad_norm": 7.958914238770376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113040 + }, + { + "epoch": 0.5482720700119912, + "grad_norm": 7.579621978948126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113050 + }, + { + "epoch": 0.5483205682048272, + "grad_norm": 8.259650030595367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113060 + }, + { + "epoch": 0.5483690663976634, + "grad_norm": 8.077622055679967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113070 + }, + { + "epoch": 0.5484175645904994, + "grad_norm": 7.43011582926556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113080 + }, + { + "epoch": 0.5484660627833355, + "grad_norm": 7.285636343112856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113090 + }, + { + "epoch": 0.5485145609761716, + "grad_norm": 7.414226956825587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113100 + }, + { + "epoch": 0.5485630591690077, + "grad_norm": 6.999016477493569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113110 + }, + { + "epoch": 0.5486115573618437, + "grad_norm": 7.309194529625529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113120 + }, + { + "epoch": 0.5486600555546799, + "grad_norm": 6.916157531122735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113130 + }, + { + "epoch": 0.548708553747516, + "grad_norm": 7.247727467074583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113140 + }, + { + "epoch": 0.5487570519403521, + "grad_norm": 7.090922053976101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113150 + }, + { + "epoch": 0.5488055501331882, + "grad_norm": 6.981817932683043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113160 + }, + { + "epoch": 0.5488540483260242, + "grad_norm": 6.96995584803517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113170 + }, + { + "epoch": 0.5489025465188604, + "grad_norm": 6.713024731652695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113180 + }, + { + "epoch": 0.5489510447116964, + "grad_norm": 6.923490900589968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113190 + }, + { + "epoch": 0.5489995429045326, + "grad_norm": 7.157244681366137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113200 + }, + { + "epoch": 0.5490480410973686, + "grad_norm": 7.37115783522313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113210 + }, + { + "epoch": 0.5490965392902047, + "grad_norm": 6.408203034879989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113220 + }, + { + "epoch": 0.5491450374830408, + "grad_norm": 6.473571261267352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113230 + }, + { + "epoch": 0.5491935356758769, + "grad_norm": 7.153938099691004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113240 + }, + { + "epoch": 0.549242033868713, + "grad_norm": 6.881498393340735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113250 + }, + { + "epoch": 0.5492905320615491, + "grad_norm": 6.266360514928238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113260 + }, + { + "epoch": 0.5493390302543851, + "grad_norm": 6.244324595172657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113270 + }, + { + "epoch": 0.5493875284472213, + "grad_norm": 6.107475201133639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113280 + }, + { + "epoch": 0.5494360266400573, + "grad_norm": 7.671883963666914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113290 + }, + { + "epoch": 0.5494845248328935, + "grad_norm": 6.434613624151098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113300 + }, + { + "epoch": 0.5495330230257295, + "grad_norm": 6.293993806139042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113310 + }, + { + "epoch": 0.5495815212185656, + "grad_norm": 6.155275400487881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113320 + }, + { + "epoch": 0.5496300194114017, + "grad_norm": 6.219584633981867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113330 + }, + { + "epoch": 0.5496785176042378, + "grad_norm": 6.25816312549432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113340 + }, + { + "epoch": 0.5497270157970738, + "grad_norm": 7.60847626679606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113350 + }, + { + "epoch": 0.54977551398991, + "grad_norm": 5.907223794565652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113360 + }, + { + "epoch": 0.549824012182746, + "grad_norm": 5.986995574858156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113370 + }, + { + "epoch": 0.5498725103755822, + "grad_norm": 5.590404157373996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113380 + }, + { + "epoch": 0.5499210085684182, + "grad_norm": 6.295708772086073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113390 + }, + { + "epoch": 0.5499695067612543, + "grad_norm": 5.364193498280656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113400 + }, + { + "epoch": 0.5500180049540904, + "grad_norm": 5.489832233251946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113410 + }, + { + "epoch": 0.5500665031469265, + "grad_norm": 5.881364018023305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113420 + }, + { + "epoch": 0.5501150013397625, + "grad_norm": 5.781382697023218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113430 + }, + { + "epoch": 0.5501634995325987, + "grad_norm": 6.462047963395889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113440 + }, + { + "epoch": 0.5502119977254347, + "grad_norm": 6.017499458721431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113450 + }, + { + "epoch": 0.5502604959182709, + "grad_norm": 5.024261326980195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113460 + }, + { + "epoch": 0.5503089941111069, + "grad_norm": 5.468886001835926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113470 + }, + { + "epoch": 0.550357492303943, + "grad_norm": 5.71982695873885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113480 + }, + { + "epoch": 0.5504059904967791, + "grad_norm": 5.943555265730538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113490 + }, + { + "epoch": 0.5504544886896152, + "grad_norm": 5.75346518871811e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113500 + }, + { + "epoch": 0.5505029868824513, + "grad_norm": 5.087043177809392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113510 + }, + { + "epoch": 0.5505514850752874, + "grad_norm": 5.38224639967666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113520 + }, + { + "epoch": 0.5505999832681234, + "grad_norm": 5.023700850870227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113530 + }, + { + "epoch": 0.5506484814609596, + "grad_norm": 5.206408673075202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113540 + }, + { + "epoch": 0.5506969796537956, + "grad_norm": 5.109749281473341e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113550 + }, + { + "epoch": 0.5507454778466317, + "grad_norm": 5.326622840584605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113560 + }, + { + "epoch": 0.5507939760394678, + "grad_norm": 5.313763722369913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113570 + }, + { + "epoch": 0.5508424742323039, + "grad_norm": 5.608756623587396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113580 + }, + { + "epoch": 0.55089097242514, + "grad_norm": 1.9688395695993677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113590 + }, + { + "epoch": 0.5509394706179761, + "grad_norm": 6.691854537166364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113600 + }, + { + "epoch": 0.5509879688108121, + "grad_norm": 4.6047685486882983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113610 + }, + { + "epoch": 0.5510364670036483, + "grad_norm": 5.320072204995085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113620 + }, + { + "epoch": 0.5510849651964843, + "grad_norm": 4.757899034757429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113630 + }, + { + "epoch": 0.5511334633893205, + "grad_norm": 5.436696142169239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113640 + }, + { + "epoch": 0.5511819615821566, + "grad_norm": 5.134012326379889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113650 + }, + { + "epoch": 0.5512304597749926, + "grad_norm": 4.932175556859875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113660 + }, + { + "epoch": 0.5512789579678288, + "grad_norm": 4.726116742403974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113670 + }, + { + "epoch": 0.5513274561606648, + "grad_norm": 4.871561714026029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113680 + }, + { + "epoch": 0.551375954353501, + "grad_norm": 4.920415221931762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113690 + }, + { + "epoch": 0.551424452546337, + "grad_norm": 5.178789592719113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113700 + }, + { + "epoch": 0.5514729507391731, + "grad_norm": 4.908508799417177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113710 + }, + { + "epoch": 0.5515214489320092, + "grad_norm": 4.6146652721290593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113720 + }, + { + "epoch": 0.5515699471248453, + "grad_norm": 4.799189810000826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113730 + }, + { + "epoch": 0.5516184453176813, + "grad_norm": 4.984007091479725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113740 + }, + { + "epoch": 0.5516669435105175, + "grad_norm": 1.5561367035843432e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113750 + }, + { + "epoch": 0.5517154417033535, + "grad_norm": 4.453237067991722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113760 + }, + { + "epoch": 0.5517639398961897, + "grad_norm": 4.386033367609343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113770 + }, + { + "epoch": 0.5518124380890257, + "grad_norm": 4.4553948441716784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113780 + }, + { + "epoch": 0.5518609362818618, + "grad_norm": 5.278864136926131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113790 + }, + { + "epoch": 0.5519094344746979, + "grad_norm": 4.799185830961505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113800 + }, + { + "epoch": 0.551957932667534, + "grad_norm": 4.509636539751227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113810 + }, + { + "epoch": 0.55200643086037, + "grad_norm": 4.2957151435984997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113820 + }, + { + "epoch": 0.5520549290532062, + "grad_norm": 4.89940475745243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113830 + }, + { + "epoch": 0.5521034272460422, + "grad_norm": 4.603189154295251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113840 + }, + { + "epoch": 0.5521519254388784, + "grad_norm": 4.6003478360034933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113850 + }, + { + "epoch": 0.5522004236317144, + "grad_norm": 4.3862840470865194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113860 + }, + { + "epoch": 0.5522489218245505, + "grad_norm": 4.241024953444139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113870 + }, + { + "epoch": 0.5522974200173866, + "grad_norm": 4.257232433246827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113880 + }, + { + "epoch": 0.5523459182102227, + "grad_norm": 7.996525255293818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113890 + }, + { + "epoch": 0.5523944164030588, + "grad_norm": 4.458392481865303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113900 + }, + { + "epoch": 0.5524429145958949, + "grad_norm": 4.056321074585867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113910 + }, + { + "epoch": 0.5524914127887309, + "grad_norm": 4.895710503660666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113920 + }, + { + "epoch": 0.5525399109815671, + "grad_norm": 4.151614803049597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113930 + }, + { + "epoch": 0.5525884091744031, + "grad_norm": 4.085000000486616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113940 + }, + { + "epoch": 0.5526369073672392, + "grad_norm": 4.033494178656838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113950 + }, + { + "epoch": 0.5526854055600753, + "grad_norm": 5.110734946356388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113960 + }, + { + "epoch": 0.5527339037529114, + "grad_norm": 3.819779124114575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113970 + }, + { + "epoch": 0.5527824019457475, + "grad_norm": 3.833563653188321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113980 + }, + { + "epoch": 0.5528309001385836, + "grad_norm": 4.404675735258934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 113990 + }, + { + "epoch": 0.5528793983314196, + "grad_norm": 3.8815269931546936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114000 + }, + { + "epoch": 0.5529278965242558, + "grad_norm": 3.821681389126752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114010 + }, + { + "epoch": 0.5529763947170918, + "grad_norm": 4.102150796825299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114020 + }, + { + "epoch": 0.553024892909928, + "grad_norm": 3.948186133584386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114030 + }, + { + "epoch": 0.553073391102764, + "grad_norm": 3.996106840986613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114040 + }, + { + "epoch": 0.5531218892956001, + "grad_norm": 3.7116873841114284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114050 + }, + { + "epoch": 0.5531703874884362, + "grad_norm": 3.910900829851016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114060 + }, + { + "epoch": 0.5532188856812723, + "grad_norm": 3.5968659517493506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114070 + }, + { + "epoch": 0.5532673838741083, + "grad_norm": 3.6473460340857855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114080 + }, + { + "epoch": 0.5533158820669445, + "grad_norm": 4.184003898899391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114090 + }, + { + "epoch": 0.5533643802597805, + "grad_norm": 3.9629770753890625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114100 + }, + { + "epoch": 0.5534128784526167, + "grad_norm": 3.649874429356714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114110 + }, + { + "epoch": 0.5534613766454527, + "grad_norm": 3.778597204018297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114120 + }, + { + "epoch": 0.5535098748382888, + "grad_norm": 3.490854965093604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114130 + }, + { + "epoch": 0.5535583730311249, + "grad_norm": 3.607701444252598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114140 + }, + { + "epoch": 0.553606871223961, + "grad_norm": 3.586424384138809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114150 + }, + { + "epoch": 0.5536553694167972, + "grad_norm": 4.5529804992838763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114160 + }, + { + "epoch": 0.5537038676096332, + "grad_norm": 3.891994140303723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114170 + }, + { + "epoch": 0.5537523658024693, + "grad_norm": 3.5175835932932387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114180 + }, + { + "epoch": 0.5538008639953054, + "grad_norm": 3.42320191748513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114190 + }, + { + "epoch": 0.5538493621881415, + "grad_norm": 3.7640614891643054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114200 + }, + { + "epoch": 0.5538978603809775, + "grad_norm": 3.468345823876007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114210 + }, + { + "epoch": 0.5539463585738137, + "grad_norm": 3.500470597828098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114220 + }, + { + "epoch": 0.5539948567666497, + "grad_norm": 3.906515360085905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114230 + }, + { + "epoch": 0.5540433549594859, + "grad_norm": 3.7014345366515045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114240 + }, + { + "epoch": 0.5540918531523219, + "grad_norm": 3.239580337321968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114250 + }, + { + "epoch": 0.554140351345158, + "grad_norm": 3.4597502462929697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114260 + }, + { + "epoch": 0.5541888495379941, + "grad_norm": 3.4179140584456036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114270 + }, + { + "epoch": 0.5542373477308302, + "grad_norm": 3.7220235071799834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114280 + }, + { + "epoch": 0.5542858459236663, + "grad_norm": 3.5579751056502573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114290 + }, + { + "epoch": 0.5543343441165024, + "grad_norm": 3.489025743874663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114300 + }, + { + "epoch": 0.5543828423093384, + "grad_norm": 3.414813818380935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114310 + }, + { + "epoch": 0.5544313405021746, + "grad_norm": 3.18433762913628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114320 + }, + { + "epoch": 0.5544798386950106, + "grad_norm": 3.447530900757556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114330 + }, + { + "epoch": 0.5545283368878468, + "grad_norm": 3.184697163760575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114340 + }, + { + "epoch": 0.5545768350806828, + "grad_norm": 3.17766875923553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114350 + }, + { + "epoch": 0.5546253332735189, + "grad_norm": 3.140276589874702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114360 + }, + { + "epoch": 0.554673831466355, + "grad_norm": 3.2806525496198446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114370 + }, + { + "epoch": 0.5547223296591911, + "grad_norm": 3.397016143935616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114380 + }, + { + "epoch": 0.5547708278520271, + "grad_norm": 3.2280146911034535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114390 + }, + { + "epoch": 0.5548193260448633, + "grad_norm": 3.018796519427269e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114400 + }, + { + "epoch": 0.5548678242376993, + "grad_norm": 3.173496452291147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114410 + }, + { + "epoch": 0.5549163224305355, + "grad_norm": 3.05005158907079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114420 + }, + { + "epoch": 0.5549648206233715, + "grad_norm": 3.1415783041666145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114430 + }, + { + "epoch": 0.5550133188162076, + "grad_norm": 4.043136527798197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114440 + }, + { + "epoch": 0.5550618170090437, + "grad_norm": 3.242753052745684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114450 + }, + { + "epoch": 0.5551103152018798, + "grad_norm": 3.211895602817094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114460 + }, + { + "epoch": 0.5551588133947158, + "grad_norm": 3.10333518882544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114470 + }, + { + "epoch": 0.555207311587552, + "grad_norm": 3.209736973985855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114480 + }, + { + "epoch": 0.555255809780388, + "grad_norm": 3.253652778312244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114490 + }, + { + "epoch": 0.5553043079732242, + "grad_norm": 2.8922659112140536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114500 + }, + { + "epoch": 0.5553528061660602, + "grad_norm": 3.0296274644570076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114510 + }, + { + "epoch": 0.5554013043588963, + "grad_norm": 3.118894653653115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114520 + }, + { + "epoch": 0.5554498025517324, + "grad_norm": 3.09932090658549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114530 + }, + { + "epoch": 0.5554983007445685, + "grad_norm": 2.9240507615213573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114540 + }, + { + "epoch": 0.5555467989374046, + "grad_norm": 2.75152274298307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114550 + }, + { + "epoch": 0.5555952971302407, + "grad_norm": 3.292558687917335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114560 + }, + { + "epoch": 0.5556437953230767, + "grad_norm": 2.9440266757774225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114570 + }, + { + "epoch": 0.5556922935159129, + "grad_norm": 3.071346270644426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114580 + }, + { + "epoch": 0.5557407917087489, + "grad_norm": 2.733519295361475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114590 + }, + { + "epoch": 0.555789289901585, + "grad_norm": 2.701010544114979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114600 + }, + { + "epoch": 0.5558377880944211, + "grad_norm": 3.0155032959555683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114610 + }, + { + "epoch": 0.5558862862872572, + "grad_norm": 3.2166371966013685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114620 + }, + { + "epoch": 0.5559347844800933, + "grad_norm": 2.958803690944478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114630 + }, + { + "epoch": 0.5559832826729294, + "grad_norm": 3.134290125217376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114640 + }, + { + "epoch": 0.5560317808657654, + "grad_norm": 9.315570537182793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114650 + }, + { + "epoch": 0.5560802790586016, + "grad_norm": 2.9585021366074216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114660 + }, + { + "epoch": 0.5561287772514377, + "grad_norm": 3.105957375737489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114670 + }, + { + "epoch": 0.5561772754442738, + "grad_norm": 2.9752592922704935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114680 + }, + { + "epoch": 0.5562257736371099, + "grad_norm": 2.6574286948743975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114690 + }, + { + "epoch": 0.5562742718299459, + "grad_norm": 2.6122327767552633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114700 + }, + { + "epoch": 0.5563227700227821, + "grad_norm": 2.710669093630713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114710 + }, + { + "epoch": 0.5563712682156181, + "grad_norm": 2.65720501602118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114720 + }, + { + "epoch": 0.5564197664084543, + "grad_norm": 2.8878207558591384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114730 + }, + { + "epoch": 0.5564682646012903, + "grad_norm": 2.606335556265549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114740 + }, + { + "epoch": 0.5565167627941264, + "grad_norm": 2.604377300485794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114750 + }, + { + "epoch": 0.5565652609869625, + "grad_norm": 5.490093144544517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114760 + }, + { + "epoch": 0.5566137591797986, + "grad_norm": 2.7092718823951145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114770 + }, + { + "epoch": 0.5566622573726346, + "grad_norm": 2.7202182195651403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114780 + }, + { + "epoch": 0.5567107555654708, + "grad_norm": 2.5668799708000734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114790 + }, + { + "epoch": 0.5567592537583068, + "grad_norm": 2.7562614945964015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114800 + }, + { + "epoch": 0.556807751951143, + "grad_norm": 2.7005182801076444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114810 + }, + { + "epoch": 0.556856250143979, + "grad_norm": 2.7902339638785634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114820 + }, + { + "epoch": 0.5569047483368151, + "grad_norm": 2.5266135139645485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114830 + }, + { + "epoch": 0.5569532465296512, + "grad_norm": 2.387878055287729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114840 + }, + { + "epoch": 0.5570017447224873, + "grad_norm": 2.479567342561495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114850 + }, + { + "epoch": 0.5570502429153233, + "grad_norm": 2.600723121304327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114860 + }, + { + "epoch": 0.5570987411081595, + "grad_norm": 2.469323874265683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114870 + }, + { + "epoch": 0.5571472393009955, + "grad_norm": 2.391034854554164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114880 + }, + { + "epoch": 0.5571957374938317, + "grad_norm": 2.4830723077684524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114890 + }, + { + "epoch": 0.5572442356866677, + "grad_norm": 2.3219217837322503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114900 + }, + { + "epoch": 0.5572927338795038, + "grad_norm": 2.4982395530059875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114910 + }, + { + "epoch": 0.5573412320723399, + "grad_norm": 2.523527484754595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114920 + }, + { + "epoch": 0.557389730265176, + "grad_norm": 2.468926538767846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114930 + }, + { + "epoch": 0.557438228458012, + "grad_norm": 2.456517051996343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114940 + }, + { + "epoch": 0.5574867266508482, + "grad_norm": 2.3696840401044028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114950 + }, + { + "epoch": 0.5575352248436842, + "grad_norm": 2.887458379063901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114960 + }, + { + "epoch": 0.5575837230365204, + "grad_norm": 2.6222261340080877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114970 + }, + { + "epoch": 0.5576322212293564, + "grad_norm": 2.549887767600012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114980 + }, + { + "epoch": 0.5576807194221926, + "grad_norm": 2.1269399042012083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 114990 + }, + { + "epoch": 0.5577292176150286, + "grad_norm": 2.2155717260829988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115000 + }, + { + "epoch": 0.5577777158078647, + "grad_norm": 2.5210607645931304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115010 + }, + { + "epoch": 0.5578262140007008, + "grad_norm": 2.3553731409720058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115020 + }, + { + "epoch": 0.5578747121935369, + "grad_norm": 2.3490377998314216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115030 + }, + { + "epoch": 0.5579232103863729, + "grad_norm": 2.2051047210425168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115040 + }, + { + "epoch": 0.5579717085792091, + "grad_norm": 2.0687150481535355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115050 + }, + { + "epoch": 0.5580202067720451, + "grad_norm": 2.595064643173828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115060 + }, + { + "epoch": 0.5580687049648813, + "grad_norm": 2.479935403698619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115070 + }, + { + "epoch": 0.5581172031577173, + "grad_norm": 2.3802299153885542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115080 + }, + { + "epoch": 0.5581657013505534, + "grad_norm": 2.15601488662287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115090 + }, + { + "epoch": 0.5582141995433895, + "grad_norm": 2.1091197766054393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115100 + }, + { + "epoch": 0.5582626977362256, + "grad_norm": 2.345061602682108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115110 + }, + { + "epoch": 0.5583111959290616, + "grad_norm": 2.2029674084933504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115120 + }, + { + "epoch": 0.5583596941218978, + "grad_norm": 2.3397700488203554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115130 + }, + { + "epoch": 0.5584081923147338, + "grad_norm": 2.0198206129862228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115140 + }, + { + "epoch": 0.55845669050757, + "grad_norm": 2.020313303319199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115150 + }, + { + "epoch": 0.558505188700406, + "grad_norm": 2.245026564651198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115160 + }, + { + "epoch": 0.5585536868932421, + "grad_norm": 2.259001803395222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115170 + }, + { + "epoch": 0.5586021850860783, + "grad_norm": 2.3141805627346912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115180 + }, + { + "epoch": 0.5586506832789143, + "grad_norm": 1.9493408842663484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115190 + }, + { + "epoch": 0.5586991814717505, + "grad_norm": 1.8704847093431454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115200 + }, + { + "epoch": 0.5587476796645865, + "grad_norm": 2.2948675848510902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115210 + }, + { + "epoch": 0.5587961778574226, + "grad_norm": 5.696942935173865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115220 + }, + { + "epoch": 0.5588446760502587, + "grad_norm": 2.1758253865300503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115230 + }, + { + "epoch": 0.5588931742430948, + "grad_norm": 1.8835328319255495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115240 + }, + { + "epoch": 0.5589416724359308, + "grad_norm": 1.9974467591055145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115250 + }, + { + "epoch": 0.558990170628767, + "grad_norm": 2.2820505307663552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115260 + }, + { + "epoch": 0.559038668821603, + "grad_norm": 2.1969317742787098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115270 + }, + { + "epoch": 0.5590871670144392, + "grad_norm": 2.1381509895945783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115280 + }, + { + "epoch": 0.5591356652072752, + "grad_norm": 1.9522546779171535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115290 + }, + { + "epoch": 0.5591841634001113, + "grad_norm": 1.9849318277920247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115300 + }, + { + "epoch": 0.5592326615929474, + "grad_norm": 2.2858090176214318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115310 + }, + { + "epoch": 0.5592811597857835, + "grad_norm": 2.0761937946645048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115320 + }, + { + "epoch": 0.5593296579786196, + "grad_norm": 2.0396034017267084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115330 + }, + { + "epoch": 0.5593781561714557, + "grad_norm": 1.7742851809998683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115340 + }, + { + "epoch": 0.5594266543642917, + "grad_norm": 1.926181596445531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115350 + }, + { + "epoch": 0.5594751525571279, + "grad_norm": 1.9982809362772969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115360 + }, + { + "epoch": 0.5595236507499639, + "grad_norm": 2.0682423951257078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115370 + }, + { + "epoch": 0.5595721489428, + "grad_norm": 2.1751273493464396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115380 + }, + { + "epoch": 0.5596206471356361, + "grad_norm": 1.7793837514545885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115390 + }, + { + "epoch": 0.5596691453284722, + "grad_norm": 1.7732523360791674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115400 + }, + { + "epoch": 0.5597176435213083, + "grad_norm": 2.0156620905709133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115410 + }, + { + "epoch": 0.5597661417141444, + "grad_norm": 2.0347529527953157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115420 + }, + { + "epoch": 0.5598146399069804, + "grad_norm": 2.0439458126020327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115430 + }, + { + "epoch": 0.5598631380998166, + "grad_norm": 1.7673947638741083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115440 + }, + { + "epoch": 0.5599116362926526, + "grad_norm": 1.788914545386433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115450 + }, + { + "epoch": 0.5599601344854888, + "grad_norm": 1.8539391533067828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115460 + }, + { + "epoch": 0.5600086326783248, + "grad_norm": 2.099779976560967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115470 + }, + { + "epoch": 0.5600571308711609, + "grad_norm": 2.0020391389152792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115480 + }, + { + "epoch": 0.560105629063997, + "grad_norm": 1.7677901098522852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115490 + }, + { + "epoch": 0.5601541272568331, + "grad_norm": 1.8160220349727751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115500 + }, + { + "epoch": 0.5602026254496691, + "grad_norm": 2.060997843500445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115510 + }, + { + "epoch": 0.5602511236425053, + "grad_norm": 1.9473853285489895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115520 + }, + { + "epoch": 0.5602996218353413, + "grad_norm": 1.8798424150645587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115530 + }, + { + "epoch": 0.5603481200281775, + "grad_norm": 1.664538302748042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115540 + }, + { + "epoch": 0.5603966182210135, + "grad_norm": 1.5471808012534893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115550 + }, + { + "epoch": 0.5604451164138496, + "grad_norm": 2.2069116312195547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115560 + }, + { + "epoch": 0.5604936146066857, + "grad_norm": 1.9618101987362024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115570 + }, + { + "epoch": 0.5605421127995218, + "grad_norm": 1.900803709986576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115580 + }, + { + "epoch": 0.5605906109923579, + "grad_norm": 1.6725070395295916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115590 + }, + { + "epoch": 0.560639109185194, + "grad_norm": 1.5444267376096832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115600 + }, + { + "epoch": 0.56068760737803, + "grad_norm": 1.848683979233101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115610 + }, + { + "epoch": 0.5607361055708662, + "grad_norm": 1.9433748832398123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115620 + }, + { + "epoch": 0.5607846037637022, + "grad_norm": 1.781508132125964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115630 + }, + { + "epoch": 0.5608331019565383, + "grad_norm": 1.5611227865974797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115640 + }, + { + "epoch": 0.5608816001493744, + "grad_norm": 1.61100032869399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115650 + }, + { + "epoch": 0.5609300983422105, + "grad_norm": 1.772723834392309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115660 + }, + { + "epoch": 0.5609785965350466, + "grad_norm": 2.2502570118376752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115670 + }, + { + "epoch": 0.5610270947278827, + "grad_norm": 1.7228640558641928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115680 + }, + { + "epoch": 0.5610755929207188, + "grad_norm": 1.687678405914994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115690 + }, + { + "epoch": 0.5611240911135549, + "grad_norm": 1.5525090191204072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115700 + }, + { + "epoch": 0.561172589306391, + "grad_norm": 1.902143225152031e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115710 + }, + { + "epoch": 0.5612210874992271, + "grad_norm": 1.7835601795468392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115720 + }, + { + "epoch": 0.5612695856920632, + "grad_norm": 1.8289206593635754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115730 + }, + { + "epoch": 0.5613180838848992, + "grad_norm": 2.3932668113957334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115740 + }, + { + "epoch": 0.5613665820777354, + "grad_norm": 1.9454556365872122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115750 + }, + { + "epoch": 0.5614150802705714, + "grad_norm": 1.872555941417886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115760 + }, + { + "epoch": 0.5614635784634076, + "grad_norm": 1.8044488570012618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115770 + }, + { + "epoch": 0.5615120766562436, + "grad_norm": 1.7430252796657442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115780 + }, + { + "epoch": 0.5615605748490797, + "grad_norm": 1.4893562649831438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115790 + }, + { + "epoch": 0.5616090730419158, + "grad_norm": 1.443399781919652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115800 + }, + { + "epoch": 0.5616575712347519, + "grad_norm": 1.7516249783966487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115810 + }, + { + "epoch": 0.5617060694275879, + "grad_norm": 1.6914260925204871e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115820 + }, + { + "epoch": 0.5617545676204241, + "grad_norm": 1.7468140356413642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115830 + }, + { + "epoch": 0.5618030658132601, + "grad_norm": 1.4106652201917314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115840 + }, + { + "epoch": 0.5618515640060963, + "grad_norm": 1.4400471570752416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115850 + }, + { + "epoch": 0.5619000621989323, + "grad_norm": 1.6786390233392012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115860 + }, + { + "epoch": 0.5619485603917684, + "grad_norm": 1.7470016189236048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115870 + }, + { + "epoch": 0.5619970585846045, + "grad_norm": 1.6343690845133096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115880 + }, + { + "epoch": 0.5620455567774406, + "grad_norm": 3.4846559060497384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115890 + }, + { + "epoch": 0.5620940549702766, + "grad_norm": 1.620758780518372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115900 + }, + { + "epoch": 0.5621425531631128, + "grad_norm": 1.7973709987018083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115910 + }, + { + "epoch": 0.5621910513559488, + "grad_norm": 1.6948739300914895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115920 + }, + { + "epoch": 0.562239549548785, + "grad_norm": 1.6452952422696399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115930 + }, + { + "epoch": 0.562288047741621, + "grad_norm": 1.5672129904942267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115940 + }, + { + "epoch": 0.5623365459344571, + "grad_norm": 1.4860228247925988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115950 + }, + { + "epoch": 0.5623850441272932, + "grad_norm": 1.6349646614344238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115960 + }, + { + "epoch": 0.5624335423201293, + "grad_norm": 1.6667105739998078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115970 + }, + { + "epoch": 0.5624820405129654, + "grad_norm": 1.6204943165121222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115980 + }, + { + "epoch": 0.5625305387058015, + "grad_norm": 1.5272040343461413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 115990 + }, + { + "epoch": 0.5625790368986375, + "grad_norm": 1.5412412324167235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116000 + }, + { + "epoch": 0.5626275350914737, + "grad_norm": 1.539565488428707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116010 + }, + { + "epoch": 0.5626760332843097, + "grad_norm": 1.6394533020047675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116020 + }, + { + "epoch": 0.5627245314771459, + "grad_norm": 1.8553635072748875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116030 + }, + { + "epoch": 0.5627730296699819, + "grad_norm": 1.515156924369876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116040 + }, + { + "epoch": 0.562821527862818, + "grad_norm": 5.726608947043133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116050 + }, + { + "epoch": 0.5628700260556541, + "grad_norm": 1.5406516240545898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116060 + }, + { + "epoch": 0.5629185242484902, + "grad_norm": 1.5187470125965774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116070 + }, + { + "epoch": 0.5629670224413262, + "grad_norm": 1.5951765419686126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116080 + }, + { + "epoch": 0.5630155206341624, + "grad_norm": 3.7583856737910537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116090 + }, + { + "epoch": 0.5630640188269984, + "grad_norm": 1.3298603107614326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116100 + }, + { + "epoch": 0.5631125170198346, + "grad_norm": 1.577421357978892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116110 + }, + { + "epoch": 0.5631610152126706, + "grad_norm": 1.6244338496562705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116120 + }, + { + "epoch": 0.5632095134055067, + "grad_norm": 1.6450623263608577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116130 + }, + { + "epoch": 0.5632580115983428, + "grad_norm": 1.534489513232984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116140 + }, + { + "epoch": 0.5633065097911789, + "grad_norm": 1.32197456537142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116150 + }, + { + "epoch": 0.5633550079840149, + "grad_norm": 1.595495717765516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116160 + }, + { + "epoch": 0.5634035061768511, + "grad_norm": 1.5339803383085382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116170 + }, + { + "epoch": 0.5634520043696871, + "grad_norm": 1.5303423595014465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116180 + }, + { + "epoch": 0.5635005025625233, + "grad_norm": 1.3369478324420925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116190 + }, + { + "epoch": 0.5635490007553594, + "grad_norm": 1.3207218785282748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116200 + }, + { + "epoch": 0.5635974989481954, + "grad_norm": 1.6055098228662246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116210 + }, + { + "epoch": 0.5636459971410316, + "grad_norm": 1.5620403814864403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116220 + }, + { + "epoch": 0.5636944953338676, + "grad_norm": 1.6055463447628426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116230 + }, + { + "epoch": 0.5637429935267038, + "grad_norm": 1.3025328371440992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116240 + }, + { + "epoch": 0.5637914917195398, + "grad_norm": 1.49888947476029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116250 + }, + { + "epoch": 0.5638399899123759, + "grad_norm": 1.5809372655439802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116260 + }, + { + "epoch": 0.563888488105212, + "grad_norm": 1.4748279397736042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116270 + }, + { + "epoch": 0.5639369862980481, + "grad_norm": 1.4870074949158152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116280 + }, + { + "epoch": 0.5639854844908841, + "grad_norm": 1.3791067488000408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116290 + }, + { + "epoch": 0.5640339826837203, + "grad_norm": 1.2899799628485198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116300 + }, + { + "epoch": 0.5640824808765563, + "grad_norm": 1.4661054592579603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116310 + }, + { + "epoch": 0.5641309790693925, + "grad_norm": 1.4666535719243257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116320 + }, + { + "epoch": 0.5641794772622285, + "grad_norm": 1.5205654335659347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116330 + }, + { + "epoch": 0.5642279754550646, + "grad_norm": 1.3644029195347684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116340 + }, + { + "epoch": 0.5642764736479007, + "grad_norm": 1.5296831179512083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116350 + }, + { + "epoch": 0.5643249718407368, + "grad_norm": 1.448344022492165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116360 + }, + { + "epoch": 0.5643734700335729, + "grad_norm": 1.5144723874982446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116370 + }, + { + "epoch": 0.564421968226409, + "grad_norm": 1.4478308685283992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116380 + }, + { + "epoch": 0.564470466419245, + "grad_norm": 1.2970711793514056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116390 + }, + { + "epoch": 0.5645189646120812, + "grad_norm": 1.3690916489395022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116400 + }, + { + "epoch": 0.5645674628049172, + "grad_norm": 0.001275762915611267, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116410 + }, + { + "epoch": 0.5646159609977534, + "grad_norm": 1.3786738861654158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116420 + }, + { + "epoch": 0.5646644591905894, + "grad_norm": 1.3825912503762083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116430 + }, + { + "epoch": 0.5647129573834255, + "grad_norm": 2.752396710548055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116440 + }, + { + "epoch": 0.5647614555762616, + "grad_norm": 1.4679454807264847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116450 + }, + { + "epoch": 0.5648099537690977, + "grad_norm": 1.3870590009901207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116460 + }, + { + "epoch": 0.5648584519619337, + "grad_norm": 1.3847171942416026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116470 + }, + { + "epoch": 0.5649069501547699, + "grad_norm": 0.002089801710098982, + "learning_rate": 0.0002, + "loss": 0.0036, + "step": 116480 + }, + { + "epoch": 0.5649554483476059, + "grad_norm": 0.0005416262429207563, + "learning_rate": 0.0002, + "loss": 0.0093, + "step": 116490 + }, + { + "epoch": 0.5650039465404421, + "grad_norm": 0.0006125083309598267, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 116500 + }, + { + "epoch": 0.5650524447332781, + "grad_norm": 0.00023626521578989923, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 116510 + }, + { + "epoch": 0.5651009429261142, + "grad_norm": 0.0007188890594989061, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 116520 + }, + { + "epoch": 0.5651494411189503, + "grad_norm": 0.21914972364902496, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 116530 + }, + { + "epoch": 0.5651979393117864, + "grad_norm": 4.966763299307786e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116540 + }, + { + "epoch": 0.5652464375046224, + "grad_norm": 2.9555698347394355e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116550 + }, + { + "epoch": 0.5652949356974586, + "grad_norm": 5.787422924186103e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116560 + }, + { + "epoch": 0.5653434338902946, + "grad_norm": 1.4483081940852571e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116570 + }, + { + "epoch": 0.5653919320831308, + "grad_norm": 1.2626290299522225e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116580 + }, + { + "epoch": 0.5654404302759668, + "grad_norm": 2.1067640773253515e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116590 + }, + { + "epoch": 0.5654889284688029, + "grad_norm": 1.954490289790556e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116600 + }, + { + "epoch": 0.565537426661639, + "grad_norm": 2.5921503038262017e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 116610 + }, + { + "epoch": 0.5655859248544751, + "grad_norm": 4.959717989549972e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116620 + }, + { + "epoch": 0.5656344230473112, + "grad_norm": 5.772781514679082e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116630 + }, + { + "epoch": 0.5656829212401473, + "grad_norm": 6.74636903568171e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116640 + }, + { + "epoch": 0.5657314194329833, + "grad_norm": 5.564597086049616e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116650 + }, + { + "epoch": 0.5657799176258195, + "grad_norm": 3.829619527095929e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116660 + }, + { + "epoch": 0.5658284158186555, + "grad_norm": 2.4485543690389022e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116670 + }, + { + "epoch": 0.5658769140114916, + "grad_norm": 2.1689404093194753e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116680 + }, + { + "epoch": 0.5659254122043277, + "grad_norm": 2.6845293177757412e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116690 + }, + { + "epoch": 0.5659739103971638, + "grad_norm": 2.291376586072147e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116700 + }, + { + "epoch": 0.56602240859, + "grad_norm": 1.4953079698898364e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116710 + }, + { + "epoch": 0.566070906782836, + "grad_norm": 1.4499551980406977e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116720 + }, + { + "epoch": 0.5661194049756721, + "grad_norm": 1.2260834409971721e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116730 + }, + { + "epoch": 0.5661679031685082, + "grad_norm": 1.8349568563280627e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116740 + }, + { + "epoch": 0.5662164013613443, + "grad_norm": 1.6551579392398708e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116750 + }, + { + "epoch": 0.5662648995541804, + "grad_norm": 1.2613949365913868e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116760 + }, + { + "epoch": 0.5663133977470165, + "grad_norm": 1.0419249520055018e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116770 + }, + { + "epoch": 0.5663618959398525, + "grad_norm": 9.943135410139803e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116780 + }, + { + "epoch": 0.5664103941326887, + "grad_norm": 1.3258716535347048e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116790 + }, + { + "epoch": 0.5664588923255247, + "grad_norm": 1.2299834452278446e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116800 + }, + { + "epoch": 0.5665073905183609, + "grad_norm": 8.203344805224333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116810 + }, + { + "epoch": 0.5665558887111969, + "grad_norm": 7.2562452260171995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116820 + }, + { + "epoch": 0.566604386904033, + "grad_norm": 7.6123378676129505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116830 + }, + { + "epoch": 0.5666528850968691, + "grad_norm": 1.0092524462379515e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116840 + }, + { + "epoch": 0.5667013832897052, + "grad_norm": 9.537578080198728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116850 + }, + { + "epoch": 0.5667498814825412, + "grad_norm": 7.0477444751304574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116860 + }, + { + "epoch": 0.5667983796753774, + "grad_norm": 6.0978391047683544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116870 + }, + { + "epoch": 0.5668468778682134, + "grad_norm": 6.475549525930546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116880 + }, + { + "epoch": 0.5668953760610496, + "grad_norm": 9.117968147620559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116890 + }, + { + "epoch": 0.5669438742538856, + "grad_norm": 8.593269740231335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116900 + }, + { + "epoch": 0.5669923724467217, + "grad_norm": 5.783167580375448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116910 + }, + { + "epoch": 0.5670408706395578, + "grad_norm": 5.563159902521875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116920 + }, + { + "epoch": 0.5670893688323939, + "grad_norm": 5.285658062348375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116930 + }, + { + "epoch": 0.56713786702523, + "grad_norm": 7.4739341471286025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116940 + }, + { + "epoch": 0.5671863652180661, + "grad_norm": 7.126407126634149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116950 + }, + { + "epoch": 0.5672348634109021, + "grad_norm": 4.978187462256756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116960 + }, + { + "epoch": 0.5672833616037383, + "grad_norm": 4.189450464764377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116970 + }, + { + "epoch": 0.5673318597965743, + "grad_norm": 4.444771548151039e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116980 + }, + { + "epoch": 0.5673803579894104, + "grad_norm": 6.465203114203177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 116990 + }, + { + "epoch": 0.5674288561822465, + "grad_norm": 6.05723880653386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117000 + }, + { + "epoch": 0.5674773543750826, + "grad_norm": 4.271288617019309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117010 + }, + { + "epoch": 0.5675258525679187, + "grad_norm": 4.057005753566045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117020 + }, + { + "epoch": 0.5675743507607548, + "grad_norm": 3.822797680186341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117030 + }, + { + "epoch": 0.5676228489535908, + "grad_norm": 5.5964969760680106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117040 + }, + { + "epoch": 0.567671347146427, + "grad_norm": 7.557831395388348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117050 + }, + { + "epoch": 0.567719845339263, + "grad_norm": 3.7420247736008605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117060 + }, + { + "epoch": 0.5677683435320992, + "grad_norm": 4.087014985998394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117070 + }, + { + "epoch": 0.5678168417249352, + "grad_norm": 3.512893272272777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117080 + }, + { + "epoch": 0.5678653399177713, + "grad_norm": 4.667082066589501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117090 + }, + { + "epoch": 0.5679138381106074, + "grad_norm": 6.528517133119749e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 117100 + }, + { + "epoch": 0.5679623363034435, + "grad_norm": 6.986044900259003e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117110 + }, + { + "epoch": 0.5680108344962795, + "grad_norm": 0.0001393822458339855, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117120 + }, + { + "epoch": 0.5680593326891157, + "grad_norm": 8.926497685024515e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117130 + }, + { + "epoch": 0.5681078308819517, + "grad_norm": 8.363172673853114e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117140 + }, + { + "epoch": 0.5681563290747879, + "grad_norm": 4.952583913109265e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117150 + }, + { + "epoch": 0.5682048272676239, + "grad_norm": 2.0110986952204257e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117160 + }, + { + "epoch": 0.56825332546046, + "grad_norm": 1.7224876501131803e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117170 + }, + { + "epoch": 0.5683018236532961, + "grad_norm": 1.463192529627122e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117180 + }, + { + "epoch": 0.5683503218461322, + "grad_norm": 2.007411967497319e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117190 + }, + { + "epoch": 0.5683988200389682, + "grad_norm": 1.813292328733951e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117200 + }, + { + "epoch": 0.5684473182318044, + "grad_norm": 1.011261520034168e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117210 + }, + { + "epoch": 0.5684958164246405, + "grad_norm": 1.0421530532767065e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117220 + }, + { + "epoch": 0.5685443146174766, + "grad_norm": 8.747732863412239e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117230 + }, + { + "epoch": 0.5685928128103127, + "grad_norm": 1.3581776329374406e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117240 + }, + { + "epoch": 0.5686413110031487, + "grad_norm": 1.381055153615307e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117250 + }, + { + "epoch": 0.5686898091959849, + "grad_norm": 7.376514076895546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117260 + }, + { + "epoch": 0.5687383073888209, + "grad_norm": 9.441276233701501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117270 + }, + { + "epoch": 0.5687868055816571, + "grad_norm": 6.185328857100103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117280 + }, + { + "epoch": 0.5688353037744931, + "grad_norm": 1.0523054697841872e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117290 + }, + { + "epoch": 0.5688838019673292, + "grad_norm": 9.969884558813646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117300 + }, + { + "epoch": 0.5689323001601653, + "grad_norm": 6.2387698562815785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117310 + }, + { + "epoch": 0.5689807983530014, + "grad_norm": 5.185266218177276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117320 + }, + { + "epoch": 0.5690292965458374, + "grad_norm": 4.724096470454242e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117330 + }, + { + "epoch": 0.5690777947386736, + "grad_norm": 0.00014919618843123317, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 117340 + }, + { + "epoch": 0.5691262929315096, + "grad_norm": 0.0003700850938912481, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117350 + }, + { + "epoch": 0.5691747911243458, + "grad_norm": 0.00010787721839733422, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117360 + }, + { + "epoch": 0.5692232893171818, + "grad_norm": 3.187150650774129e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117370 + }, + { + "epoch": 0.569271787510018, + "grad_norm": 1.789474299584981e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117380 + }, + { + "epoch": 0.569320285702854, + "grad_norm": 1.2064535440003965e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117390 + }, + { + "epoch": 0.5693687838956901, + "grad_norm": 1.0905692761298269e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117400 + }, + { + "epoch": 0.5694172820885262, + "grad_norm": 1.0109313734574243e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117410 + }, + { + "epoch": 0.5694657802813623, + "grad_norm": 9.675085493654478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117420 + }, + { + "epoch": 0.5695142784741983, + "grad_norm": 8.328388503286988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117430 + }, + { + "epoch": 0.5695627766670345, + "grad_norm": 8.92848038347438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117440 + }, + { + "epoch": 0.5696112748598705, + "grad_norm": 8.749490007176064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117450 + }, + { + "epoch": 0.5696597730527067, + "grad_norm": 7.486853519367287e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117460 + }, + { + "epoch": 0.5697082712455427, + "grad_norm": 6.488875442300923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117470 + }, + { + "epoch": 0.5697567694383788, + "grad_norm": 6.475177542597521e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117480 + }, + { + "epoch": 0.5698052676312149, + "grad_norm": 6.9367565629363526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117490 + }, + { + "epoch": 0.569853765824051, + "grad_norm": 6.714550181641243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117500 + }, + { + "epoch": 0.569902264016887, + "grad_norm": 5.706432148144813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117510 + }, + { + "epoch": 0.5699507622097232, + "grad_norm": 5.581755431194324e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117520 + }, + { + "epoch": 0.5699992604025592, + "grad_norm": 5.277830950944917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117530 + }, + { + "epoch": 0.5700477585953954, + "grad_norm": 5.0072344492946286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117540 + }, + { + "epoch": 0.5700962567882314, + "grad_norm": 8.34336424304638e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 117550 + }, + { + "epoch": 0.5701447549810675, + "grad_norm": 0.00322324107401073, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117560 + }, + { + "epoch": 0.5701932531739036, + "grad_norm": 4.6464407205348834e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117570 + }, + { + "epoch": 0.5702417513667397, + "grad_norm": 2.2906200683792122e-05, + "learning_rate": 0.0002, + "loss": 0.0016, + "step": 117580 + }, + { + "epoch": 0.5702902495595757, + "grad_norm": 0.0009279194055125117, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 117590 + }, + { + "epoch": 0.5703387477524119, + "grad_norm": 0.0005566533654928207, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117600 + }, + { + "epoch": 0.5703872459452479, + "grad_norm": 0.0026575522497296333, + "learning_rate": 0.0002, + "loss": 0.0711, + "step": 117610 + }, + { + "epoch": 0.5704357441380841, + "grad_norm": 0.00011430789163568988, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117620 + }, + { + "epoch": 0.5704842423309201, + "grad_norm": 4.587982766679488e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117630 + }, + { + "epoch": 0.5705327405237562, + "grad_norm": 5.6853150454116985e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 117640 + }, + { + "epoch": 0.5705812387165923, + "grad_norm": 4.808493031305261e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117650 + }, + { + "epoch": 0.5706297369094284, + "grad_norm": 3.920274684787728e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117660 + }, + { + "epoch": 0.5706782351022645, + "grad_norm": 3.222877057851292e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117670 + }, + { + "epoch": 0.5707267332951006, + "grad_norm": 0.0002994221285916865, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117680 + }, + { + "epoch": 0.5707752314879366, + "grad_norm": 2.570626929809805e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117690 + }, + { + "epoch": 0.5708237296807728, + "grad_norm": 2.2971402358962223e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117700 + }, + { + "epoch": 0.5708722278736088, + "grad_norm": 2.0354003936517984e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117710 + }, + { + "epoch": 0.570920726066445, + "grad_norm": 1.7660539015196264e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117720 + }, + { + "epoch": 0.5709692242592811, + "grad_norm": 0.048456307500600815, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117730 + }, + { + "epoch": 0.5710177224521171, + "grad_norm": 0.00013809320807922632, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 117740 + }, + { + "epoch": 0.5710662206449533, + "grad_norm": 0.001608795253559947, + "learning_rate": 0.0002, + "loss": 0.0021, + "step": 117750 + }, + { + "epoch": 0.5711147188377893, + "grad_norm": 0.00035941103124059737, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117760 + }, + { + "epoch": 0.5711632170306254, + "grad_norm": 6.718368967995048e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117770 + }, + { + "epoch": 0.5712117152234615, + "grad_norm": 4.6418412239290774e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117780 + }, + { + "epoch": 0.5712602134162976, + "grad_norm": 5.4774867749074474e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117790 + }, + { + "epoch": 0.5713087116091337, + "grad_norm": 5.111736027174629e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117800 + }, + { + "epoch": 0.5713572098019698, + "grad_norm": 3.377635221113451e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117810 + }, + { + "epoch": 0.5714057079948058, + "grad_norm": 5.316715396475047e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117820 + }, + { + "epoch": 0.571454206187642, + "grad_norm": 4.116209674975835e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117830 + }, + { + "epoch": 0.571502704380478, + "grad_norm": 5.305504600983113e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117840 + }, + { + "epoch": 0.5715512025733142, + "grad_norm": 4.615009675035253e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117850 + }, + { + "epoch": 0.5715997007661502, + "grad_norm": 1.702795270830393e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117860 + }, + { + "epoch": 0.5716481989589863, + "grad_norm": 2.7963365937466733e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117870 + }, + { + "epoch": 0.5716966971518224, + "grad_norm": 1.4544539226335473e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117880 + }, + { + "epoch": 0.5717451953446585, + "grad_norm": 2.6297513613826595e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117890 + }, + { + "epoch": 0.5717936935374945, + "grad_norm": 2.9891876693000086e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117900 + }, + { + "epoch": 0.5718421917303307, + "grad_norm": 1.2908344615425449e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117910 + }, + { + "epoch": 0.5718906899231667, + "grad_norm": 1.3744023817707784e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117920 + }, + { + "epoch": 0.5719391881160029, + "grad_norm": 1.7889968148665503e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117930 + }, + { + "epoch": 0.5719876863088389, + "grad_norm": 1.9210801838198677e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117940 + }, + { + "epoch": 0.572036184501675, + "grad_norm": 1.5934447219478898e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117950 + }, + { + "epoch": 0.5720846826945111, + "grad_norm": 1.1265005014138296e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117960 + }, + { + "epoch": 0.5721331808873472, + "grad_norm": 1.080802030628547e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117970 + }, + { + "epoch": 0.5721816790801832, + "grad_norm": 1.0772916539281141e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117980 + }, + { + "epoch": 0.5722301772730194, + "grad_norm": 1.467865331505891e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 117990 + }, + { + "epoch": 0.5722786754658554, + "grad_norm": 1.2369811884127557e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118000 + }, + { + "epoch": 0.5723271736586916, + "grad_norm": 8.657813850732055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118010 + }, + { + "epoch": 0.5723756718515276, + "grad_norm": 9.45813735597767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118020 + }, + { + "epoch": 0.5724241700443637, + "grad_norm": 8.770389285928104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118030 + }, + { + "epoch": 0.5724726682371998, + "grad_norm": 1.1800753782154061e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118040 + }, + { + "epoch": 0.5725211664300359, + "grad_norm": 1.2225600585225038e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118050 + }, + { + "epoch": 0.572569664622872, + "grad_norm": 8.252079169324134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118060 + }, + { + "epoch": 0.5726181628157081, + "grad_norm": 7.339269814110594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118070 + }, + { + "epoch": 0.5726666610085441, + "grad_norm": 7.812826879671775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118080 + }, + { + "epoch": 0.5727151592013803, + "grad_norm": 1.2700935258180834e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118090 + }, + { + "epoch": 0.5727636573942163, + "grad_norm": 9.949683771992568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118100 + }, + { + "epoch": 0.5728121555870525, + "grad_norm": 6.7775376919598784e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118110 + }, + { + "epoch": 0.5728606537798885, + "grad_norm": 6.9184402491373476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118120 + }, + { + "epoch": 0.5729091519727246, + "grad_norm": 6.0271772781561594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118130 + }, + { + "epoch": 0.5729576501655607, + "grad_norm": 9.30950864130864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118140 + }, + { + "epoch": 0.5730061483583968, + "grad_norm": 8.405220796703361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118150 + }, + { + "epoch": 0.5730546465512328, + "grad_norm": 1.0728253073466476e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118160 + }, + { + "epoch": 0.573103144744069, + "grad_norm": 5.796361620014068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118170 + }, + { + "epoch": 0.573151642936905, + "grad_norm": 5.907023478357587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118180 + }, + { + "epoch": 0.5732001411297412, + "grad_norm": 8.075653568084817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118190 + }, + { + "epoch": 0.5732486393225772, + "grad_norm": 7.433329301420599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118200 + }, + { + "epoch": 0.5732971375154133, + "grad_norm": 5.42246971235727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118210 + }, + { + "epoch": 0.5733456357082494, + "grad_norm": 5.581444384006318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118220 + }, + { + "epoch": 0.5733941339010855, + "grad_norm": 4.813174200535286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118230 + }, + { + "epoch": 0.5734426320939215, + "grad_norm": 6.966441105760168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118240 + }, + { + "epoch": 0.5734911302867577, + "grad_norm": 6.730207587679615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118250 + }, + { + "epoch": 0.5735396284795938, + "grad_norm": 5.008432708564214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118260 + }, + { + "epoch": 0.5735881266724299, + "grad_norm": 4.837767846765928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118270 + }, + { + "epoch": 0.573636624865266, + "grad_norm": 4.596642156684538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118280 + }, + { + "epoch": 0.573685123058102, + "grad_norm": 6.1920036387164146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118290 + }, + { + "epoch": 0.5737336212509382, + "grad_norm": 5.889060776098631e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118300 + }, + { + "epoch": 0.5737821194437742, + "grad_norm": 4.2807178033399396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118310 + }, + { + "epoch": 0.5738306176366104, + "grad_norm": 4.1454345591773745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118320 + }, + { + "epoch": 0.5738791158294464, + "grad_norm": 4.397795237309765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118330 + }, + { + "epoch": 0.5739276140222825, + "grad_norm": 5.679091827914817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118340 + }, + { + "epoch": 0.5739761122151186, + "grad_norm": 5.701859208784299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118350 + }, + { + "epoch": 0.5740246104079547, + "grad_norm": 4.496926521824207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118360 + }, + { + "epoch": 0.5740731086007907, + "grad_norm": 4.1969110498030204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118370 + }, + { + "epoch": 0.5741216067936269, + "grad_norm": 3.6979563446948305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118380 + }, + { + "epoch": 0.5741701049864629, + "grad_norm": 5.150847755430732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118390 + }, + { + "epoch": 0.5742186031792991, + "grad_norm": 5.510088158189319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118400 + }, + { + "epoch": 0.5742671013721351, + "grad_norm": 3.7576212434942136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118410 + }, + { + "epoch": 0.5743155995649712, + "grad_norm": 3.7837621675862465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118420 + }, + { + "epoch": 0.5743640977578073, + "grad_norm": 3.482776719465619e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118430 + }, + { + "epoch": 0.5744125959506434, + "grad_norm": 4.302994057070464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118440 + }, + { + "epoch": 0.5744610941434795, + "grad_norm": 4.40367148257792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118450 + }, + { + "epoch": 0.5745095923363156, + "grad_norm": 1.5026361324999016e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118460 + }, + { + "epoch": 0.5745580905291516, + "grad_norm": 3.2759389796410687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118470 + }, + { + "epoch": 0.5746065887219878, + "grad_norm": 3.278566737208166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118480 + }, + { + "epoch": 0.5746550869148238, + "grad_norm": 4.694866674981313e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118490 + }, + { + "epoch": 0.57470358510766, + "grad_norm": 4.255446128809126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118500 + }, + { + "epoch": 0.574752083300496, + "grad_norm": 3.4171059724030783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118510 + }, + { + "epoch": 0.5748005814933321, + "grad_norm": 3.2429863949801074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118520 + }, + { + "epoch": 0.5748490796861682, + "grad_norm": 3.0148048608680256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118530 + }, + { + "epoch": 0.5748975778790043, + "grad_norm": 4.219136826577596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118540 + }, + { + "epoch": 0.5749460760718403, + "grad_norm": 4.218070444039768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118550 + }, + { + "epoch": 0.5749945742646765, + "grad_norm": 2.945921323771472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118560 + }, + { + "epoch": 0.5750430724575125, + "grad_norm": 3.038954673684202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118570 + }, + { + "epoch": 0.5750915706503487, + "grad_norm": 2.977473286591703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118580 + }, + { + "epoch": 0.5751400688431847, + "grad_norm": 3.8072325878601987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118590 + }, + { + "epoch": 0.5751885670360208, + "grad_norm": 3.813193643509294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118600 + }, + { + "epoch": 0.5752370652288569, + "grad_norm": 2.61707305071468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118610 + }, + { + "epoch": 0.575285563421693, + "grad_norm": 2.683418642845936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118620 + }, + { + "epoch": 0.575334061614529, + "grad_norm": 2.6541245006228564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118630 + }, + { + "epoch": 0.5753825598073652, + "grad_norm": 3.386169964869623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118640 + }, + { + "epoch": 0.5754310580002012, + "grad_norm": 3.5790144465863705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118650 + }, + { + "epoch": 0.5754795561930374, + "grad_norm": 2.53664393312647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118660 + }, + { + "epoch": 0.5755280543858734, + "grad_norm": 2.5198710318363737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118670 + }, + { + "epoch": 0.5755765525787095, + "grad_norm": 2.465450506861089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118680 + }, + { + "epoch": 0.5756250507715456, + "grad_norm": 5.7540546549716964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118690 + }, + { + "epoch": 0.5756735489643817, + "grad_norm": 3.365443035363569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118700 + }, + { + "epoch": 0.5757220471572178, + "grad_norm": 2.3716866053291596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118710 + }, + { + "epoch": 0.5757705453500539, + "grad_norm": 2.5211600132024614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118720 + }, + { + "epoch": 0.5758190435428899, + "grad_norm": 2.4768983166723046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118730 + }, + { + "epoch": 0.5758675417357261, + "grad_norm": 3.31186947732931e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118740 + }, + { + "epoch": 0.5759160399285621, + "grad_norm": 5.742041594203329e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118750 + }, + { + "epoch": 0.5759645381213983, + "grad_norm": 2.6759005322674057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118760 + }, + { + "epoch": 0.5760130363142344, + "grad_norm": 2.2747944967704825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118770 + }, + { + "epoch": 0.5760615345070704, + "grad_norm": 2.2358456135407323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118780 + }, + { + "epoch": 0.5761100326999066, + "grad_norm": 5.302611953084124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118790 + }, + { + "epoch": 0.5761585308927426, + "grad_norm": 2.951823717012303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118800 + }, + { + "epoch": 0.5762070290855787, + "grad_norm": 2.1039431885583326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118810 + }, + { + "epoch": 0.5762555272784148, + "grad_norm": 1.036120011121966e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118820 + }, + { + "epoch": 0.5763040254712509, + "grad_norm": 2.159777295673848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118830 + }, + { + "epoch": 0.576352523664087, + "grad_norm": 2.7195394523005234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118840 + }, + { + "epoch": 0.5764010218569231, + "grad_norm": 2.7763935577240773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118850 + }, + { + "epoch": 0.5764495200497591, + "grad_norm": 8.509006875101477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118860 + }, + { + "epoch": 0.5764980182425953, + "grad_norm": 2.0654088075389154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118870 + }, + { + "epoch": 0.5765465164354313, + "grad_norm": 2.151561602659058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118880 + }, + { + "epoch": 0.5765950146282675, + "grad_norm": 2.6818258902494563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118890 + }, + { + "epoch": 0.5766435128211035, + "grad_norm": 2.494402224328951e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118900 + }, + { + "epoch": 0.5766920110139396, + "grad_norm": 3.466128646323341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118910 + }, + { + "epoch": 0.5767405092067757, + "grad_norm": 1.9362692000868265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118920 + }, + { + "epoch": 0.5767890073996118, + "grad_norm": 1.8764986862152e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118930 + }, + { + "epoch": 0.5768375055924478, + "grad_norm": 2.3088412035576766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118940 + }, + { + "epoch": 0.576886003785284, + "grad_norm": 5.790252089354908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118950 + }, + { + "epoch": 0.57693450197812, + "grad_norm": 1.8083336499330471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118960 + }, + { + "epoch": 0.5769830001709562, + "grad_norm": 1.8352549204792012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118970 + }, + { + "epoch": 0.5770314983637922, + "grad_norm": 1.8262561525261845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118980 + }, + { + "epoch": 0.5770799965566283, + "grad_norm": 2.3455727387045044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 118990 + }, + { + "epoch": 0.5771284947494644, + "grad_norm": 2.27480859393836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119000 + }, + { + "epoch": 0.5771769929423005, + "grad_norm": 1.7372800584780634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119010 + }, + { + "epoch": 0.5772254911351365, + "grad_norm": 1.7989347043112502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119020 + }, + { + "epoch": 0.5772739893279727, + "grad_norm": 1.754227696437738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119030 + }, + { + "epoch": 0.5773224875208087, + "grad_norm": 2.109700744767906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119040 + }, + { + "epoch": 0.5773709857136449, + "grad_norm": 2.1737478164141066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119050 + }, + { + "epoch": 0.5774194839064809, + "grad_norm": 1.587482302056742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119060 + }, + { + "epoch": 0.577467982099317, + "grad_norm": 1.577145326336904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119070 + }, + { + "epoch": 0.5775164802921531, + "grad_norm": 2.157646804334945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119080 + }, + { + "epoch": 0.5775649784849892, + "grad_norm": 2.2561764581041643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119090 + }, + { + "epoch": 0.5776134766778253, + "grad_norm": 2.6050561245938297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119100 + }, + { + "epoch": 0.5776619748706614, + "grad_norm": 1.7349304926028708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119110 + }, + { + "epoch": 0.5777104730634974, + "grad_norm": 1.6958653077381314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119120 + }, + { + "epoch": 0.5777589712563336, + "grad_norm": 1.5109482092157123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119130 + }, + { + "epoch": 0.5778074694491696, + "grad_norm": 1.959908104254282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119140 + }, + { + "epoch": 0.5778559676420058, + "grad_norm": 2.185802713938756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119150 + }, + { + "epoch": 0.5779044658348418, + "grad_norm": 1.5797545529494528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119160 + }, + { + "epoch": 0.5779529640276779, + "grad_norm": 1.5430498478963273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119170 + }, + { + "epoch": 0.578001462220514, + "grad_norm": 1.4994024013503804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119180 + }, + { + "epoch": 0.5780499604133501, + "grad_norm": 2.402453901595436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119190 + }, + { + "epoch": 0.5780984586061861, + "grad_norm": 1.854851575444627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119200 + }, + { + "epoch": 0.5781469567990223, + "grad_norm": 1.45226226777595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119210 + }, + { + "epoch": 0.5781954549918583, + "grad_norm": 1.593988145032199e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119220 + }, + { + "epoch": 0.5782439531846945, + "grad_norm": 1.6260629536191118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119230 + }, + { + "epoch": 0.5782924513775305, + "grad_norm": 1.9345318378327647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119240 + }, + { + "epoch": 0.5783409495703666, + "grad_norm": 1.791349518498464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119250 + }, + { + "epoch": 0.5783894477632027, + "grad_norm": 1.7089176935769501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119260 + }, + { + "epoch": 0.5784379459560388, + "grad_norm": 1.4394420304597588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119270 + }, + { + "epoch": 0.578486444148875, + "grad_norm": 1.2888058336102404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119280 + }, + { + "epoch": 0.578534942341711, + "grad_norm": 1.7523733504276606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119290 + }, + { + "epoch": 0.5785834405345471, + "grad_norm": 1.892535010483698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119300 + }, + { + "epoch": 0.5786319387273832, + "grad_norm": 1.3617321883430122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119310 + }, + { + "epoch": 0.5786804369202193, + "grad_norm": 1.3415952935247333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119320 + }, + { + "epoch": 0.5787289351130553, + "grad_norm": 1.317315877713554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119330 + }, + { + "epoch": 0.5787774333058915, + "grad_norm": 1.650653189244622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119340 + }, + { + "epoch": 0.5788259314987275, + "grad_norm": 1.7035796417985694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119350 + }, + { + "epoch": 0.5788744296915637, + "grad_norm": 1.345543637398805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119360 + }, + { + "epoch": 0.5789229278843997, + "grad_norm": 1.310903940066055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119370 + }, + { + "epoch": 0.5789714260772358, + "grad_norm": 1.2121562349420856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119380 + }, + { + "epoch": 0.5790199242700719, + "grad_norm": 1.748641807353124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119390 + }, + { + "epoch": 0.579068422462908, + "grad_norm": 1.5684274785598973e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119400 + }, + { + "epoch": 0.579116920655744, + "grad_norm": 1.3194421626394615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119410 + }, + { + "epoch": 0.5791654188485802, + "grad_norm": 1.1535038311194512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119420 + }, + { + "epoch": 0.5792139170414162, + "grad_norm": 1.2670178648477304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119430 + }, + { + "epoch": 0.5792624152342524, + "grad_norm": 1.5086635585248587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119440 + }, + { + "epoch": 0.5793109134270884, + "grad_norm": 1.7819703543864307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119450 + }, + { + "epoch": 0.5793594116199245, + "grad_norm": 1.192433728647302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119460 + }, + { + "epoch": 0.5794079098127606, + "grad_norm": 1.25095721159596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119470 + }, + { + "epoch": 0.5794564080055967, + "grad_norm": 1.2297861076149275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119480 + }, + { + "epoch": 0.5795049061984328, + "grad_norm": 1.482275024500268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119490 + }, + { + "epoch": 0.5795534043912689, + "grad_norm": 1.4430902410822455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119500 + }, + { + "epoch": 0.5796019025841049, + "grad_norm": 1.1114635753983748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119510 + }, + { + "epoch": 0.5796504007769411, + "grad_norm": 1.0438250228617107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119520 + }, + { + "epoch": 0.5796988989697771, + "grad_norm": 1.0705668955779402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119530 + }, + { + "epoch": 0.5797473971626133, + "grad_norm": 1.5607433851982933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119540 + }, + { + "epoch": 0.5797958953554493, + "grad_norm": 2.036952082562493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119550 + }, + { + "epoch": 0.5798443935482854, + "grad_norm": 1.1129518497909885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119560 + }, + { + "epoch": 0.5798928917411215, + "grad_norm": 1.138667016675754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119570 + }, + { + "epoch": 0.5799413899339576, + "grad_norm": 1.0658757219061954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119580 + }, + { + "epoch": 0.5799898881267936, + "grad_norm": 1.40863244268985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119590 + }, + { + "epoch": 0.5800383863196298, + "grad_norm": 1.3711386372960988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119600 + }, + { + "epoch": 0.5800868845124658, + "grad_norm": 9.59385943133384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119610 + }, + { + "epoch": 0.580135382705302, + "grad_norm": 1.0230427278656862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119620 + }, + { + "epoch": 0.580183880898138, + "grad_norm": 1.0973616326737101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119630 + }, + { + "epoch": 0.5802323790909741, + "grad_norm": 1.4093477602727944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119640 + }, + { + "epoch": 0.5802808772838102, + "grad_norm": 1.2919086884721764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119650 + }, + { + "epoch": 0.5803293754766463, + "grad_norm": 9.880618563329335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119660 + }, + { + "epoch": 0.5803778736694823, + "grad_norm": 9.400581006957509e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119670 + }, + { + "epoch": 0.5804263718623185, + "grad_norm": 1.3551633628594573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119680 + }, + { + "epoch": 0.5804748700551545, + "grad_norm": 1.2316027095948812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119690 + }, + { + "epoch": 0.5805233682479907, + "grad_norm": 1.3409540997599834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119700 + }, + { + "epoch": 0.5805718664408267, + "grad_norm": 9.415966246706375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119710 + }, + { + "epoch": 0.5806203646336628, + "grad_norm": 9.54021857069165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119720 + }, + { + "epoch": 0.5806688628264989, + "grad_norm": 9.319949185737642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119730 + }, + { + "epoch": 0.580717361019335, + "grad_norm": 8.134165000228677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119740 + }, + { + "epoch": 0.580765859212171, + "grad_norm": 2.0407658212207025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119750 + }, + { + "epoch": 0.5808143574050072, + "grad_norm": 9.454956853005569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119760 + }, + { + "epoch": 0.5808628555978432, + "grad_norm": 8.79081824223249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119770 + }, + { + "epoch": 0.5809113537906794, + "grad_norm": 9.75359398580622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119780 + }, + { + "epoch": 0.5809598519835155, + "grad_norm": 1.3320208154254942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119790 + }, + { + "epoch": 0.5810083501763516, + "grad_norm": 1.1612592061283067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119800 + }, + { + "epoch": 0.5810568483691877, + "grad_norm": 9.245050023309886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119810 + }, + { + "epoch": 0.5811053465620237, + "grad_norm": 8.654471344016201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119820 + }, + { + "epoch": 0.5811538447548599, + "grad_norm": 9.271080330108816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119830 + }, + { + "epoch": 0.5812023429476959, + "grad_norm": 1.0754059758255607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119840 + }, + { + "epoch": 0.581250841140532, + "grad_norm": 1.2341557749095955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119850 + }, + { + "epoch": 0.5812993393333681, + "grad_norm": 8.007660312614462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119860 + }, + { + "epoch": 0.5813478375262042, + "grad_norm": 8.904156629796489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119870 + }, + { + "epoch": 0.5813963357190403, + "grad_norm": 7.961651249388524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119880 + }, + { + "epoch": 0.5814448339118764, + "grad_norm": 1.012194957183965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119890 + }, + { + "epoch": 0.5814933321047124, + "grad_norm": 1.0121673312823987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119900 + }, + { + "epoch": 0.5815418302975486, + "grad_norm": 8.554171131436306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119910 + }, + { + "epoch": 0.5815903284903846, + "grad_norm": 8.29273972158262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119920 + }, + { + "epoch": 0.5816388266832208, + "grad_norm": 7.889546509431966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119930 + }, + { + "epoch": 0.5816873248760568, + "grad_norm": 9.8691737093759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119940 + }, + { + "epoch": 0.5817358230688929, + "grad_norm": 1.0157365295526688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119950 + }, + { + "epoch": 0.581784321261729, + "grad_norm": 8.001391620382492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119960 + }, + { + "epoch": 0.5818328194545651, + "grad_norm": 8.219158189604059e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119970 + }, + { + "epoch": 0.5818813176474011, + "grad_norm": 8.083532065938925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119980 + }, + { + "epoch": 0.5819298158402373, + "grad_norm": 9.031010108628834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 119990 + }, + { + "epoch": 0.5819783140330733, + "grad_norm": 9.514187127024343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120000 + }, + { + "epoch": 0.5820268122259095, + "grad_norm": 7.786094329276239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120010 + }, + { + "epoch": 0.5820753104187455, + "grad_norm": 1.1542186939550447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120020 + }, + { + "epoch": 0.5821238086115816, + "grad_norm": 7.482368005184981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120030 + }, + { + "epoch": 0.5821723068044177, + "grad_norm": 8.961044386524009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120040 + }, + { + "epoch": 0.5822208049972538, + "grad_norm": 9.027812097883725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120050 + }, + { + "epoch": 0.5822693031900898, + "grad_norm": 7.471173262274533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120060 + }, + { + "epoch": 0.582317801382926, + "grad_norm": 7.243217510222166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120070 + }, + { + "epoch": 0.582366299575762, + "grad_norm": 7.504683026127168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120080 + }, + { + "epoch": 0.5824147977685982, + "grad_norm": 8.243108027272683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120090 + }, + { + "epoch": 0.5824632959614342, + "grad_norm": 8.735309506846534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120100 + }, + { + "epoch": 0.5825117941542703, + "grad_norm": 7.254006959556136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120110 + }, + { + "epoch": 0.5825602923471064, + "grad_norm": 7.087181188580871e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120120 + }, + { + "epoch": 0.5826087905399425, + "grad_norm": 7.170047524596157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120130 + }, + { + "epoch": 0.5826572887327786, + "grad_norm": 8.488127605232876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120140 + }, + { + "epoch": 0.5827057869256147, + "grad_norm": 1.0577014109003358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120150 + }, + { + "epoch": 0.5827542851184507, + "grad_norm": 7.702140010223957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120160 + }, + { + "epoch": 0.5828027833112869, + "grad_norm": 7.168327442741429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120170 + }, + { + "epoch": 0.5828512815041229, + "grad_norm": 7.135170108085731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120180 + }, + { + "epoch": 0.582899779696959, + "grad_norm": 4.847919626627117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120190 + }, + { + "epoch": 0.5829482778897951, + "grad_norm": 8.774030106906139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120200 + }, + { + "epoch": 0.5829967760826312, + "grad_norm": 6.404416694749671e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120210 + }, + { + "epoch": 0.5830452742754673, + "grad_norm": 6.511171477541211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120220 + }, + { + "epoch": 0.5830937724683034, + "grad_norm": 7.631571179445018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120230 + }, + { + "epoch": 0.5831422706611394, + "grad_norm": 8.35067282878299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120240 + }, + { + "epoch": 0.5831907688539756, + "grad_norm": 7.696973511883698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120250 + }, + { + "epoch": 0.5832392670468116, + "grad_norm": 9.594234597898321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120260 + }, + { + "epoch": 0.5832877652396478, + "grad_norm": 6.849858209534432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120270 + }, + { + "epoch": 0.5833362634324838, + "grad_norm": 1.639810648157436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120280 + }, + { + "epoch": 0.5833847616253199, + "grad_norm": 7.735845315437473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120290 + }, + { + "epoch": 0.5834332598181561, + "grad_norm": 7.279614351318742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120300 + }, + { + "epoch": 0.5834817580109921, + "grad_norm": 6.203678708516236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120310 + }, + { + "epoch": 0.5835302562038283, + "grad_norm": 5.948394914412347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120320 + }, + { + "epoch": 0.5835787543966643, + "grad_norm": 6.12686051226774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120330 + }, + { + "epoch": 0.5836272525895004, + "grad_norm": 7.104604264895897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120340 + }, + { + "epoch": 0.5836757507823365, + "grad_norm": 1.063254899236199e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120350 + }, + { + "epoch": 0.5837242489751726, + "grad_norm": 9.346869660475932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120360 + }, + { + "epoch": 0.5837727471680086, + "grad_norm": 6.249822490644874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120370 + }, + { + "epoch": 0.5838212453608448, + "grad_norm": 5.598783445748268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120380 + }, + { + "epoch": 0.5838697435536808, + "grad_norm": 7.119400038391177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120390 + }, + { + "epoch": 0.583918241746517, + "grad_norm": 7.146640541577653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120400 + }, + { + "epoch": 0.583966739939353, + "grad_norm": 7.787930371705443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120410 + }, + { + "epoch": 0.5840152381321891, + "grad_norm": 6.776540999453573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120420 + }, + { + "epoch": 0.5840637363250252, + "grad_norm": 6.34691673440102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120430 + }, + { + "epoch": 0.5841122345178613, + "grad_norm": 7.105986128408404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120440 + }, + { + "epoch": 0.5841607327106974, + "grad_norm": 6.755064987373771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120450 + }, + { + "epoch": 0.5842092309035335, + "grad_norm": 5.563052809520741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120460 + }, + { + "epoch": 0.5842577290963695, + "grad_norm": 6.175518478812592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120470 + }, + { + "epoch": 0.5843062272892057, + "grad_norm": 5.197856012273405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120480 + }, + { + "epoch": 0.5843547254820417, + "grad_norm": 6.924769877514336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120490 + }, + { + "epoch": 0.5844032236748778, + "grad_norm": 7.000739969953429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120500 + }, + { + "epoch": 0.5844517218677139, + "grad_norm": 5.673313125953428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120510 + }, + { + "epoch": 0.58450022006055, + "grad_norm": 5.676552063960116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120520 + }, + { + "epoch": 0.5845487182533861, + "grad_norm": 5.569601171373506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120530 + }, + { + "epoch": 0.5845972164462222, + "grad_norm": 6.303934014795232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120540 + }, + { + "epoch": 0.5846457146390582, + "grad_norm": 6.718701683894324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120550 + }, + { + "epoch": 0.5846942128318944, + "grad_norm": 5.52932078790036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120560 + }, + { + "epoch": 0.5847427110247304, + "grad_norm": 7.396907335532887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120570 + }, + { + "epoch": 0.5847912092175666, + "grad_norm": 5.234341529103403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120580 + }, + { + "epoch": 0.5848397074104026, + "grad_norm": 6.563177521456964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120590 + }, + { + "epoch": 0.5848882056032387, + "grad_norm": 8.114446359286376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120600 + }, + { + "epoch": 0.5849367037960748, + "grad_norm": 5.273926149129693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120610 + }, + { + "epoch": 0.5849852019889109, + "grad_norm": 5.395861535362201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120620 + }, + { + "epoch": 0.5850337001817469, + "grad_norm": 5.170337544768699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120630 + }, + { + "epoch": 0.5850821983745831, + "grad_norm": 5.757135568273952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120640 + }, + { + "epoch": 0.5851306965674191, + "grad_norm": 8.517794185536331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120650 + }, + { + "epoch": 0.5851791947602553, + "grad_norm": 5.187667397876794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120660 + }, + { + "epoch": 0.5852276929530913, + "grad_norm": 5.096467816656514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120670 + }, + { + "epoch": 0.5852761911459274, + "grad_norm": 1.9382530354050687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120680 + }, + { + "epoch": 0.5853246893387635, + "grad_norm": 5.671891472047719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120690 + }, + { + "epoch": 0.5853731875315996, + "grad_norm": 5.615835334538133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120700 + }, + { + "epoch": 0.5854216857244356, + "grad_norm": 5.077698688182863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120710 + }, + { + "epoch": 0.5854701839172718, + "grad_norm": 5.207589310884941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120720 + }, + { + "epoch": 0.5855186821101078, + "grad_norm": 4.759668570386566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120730 + }, + { + "epoch": 0.585567180302944, + "grad_norm": 5.329003442966496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120740 + }, + { + "epoch": 0.58561567849578, + "grad_norm": 5.503567876985471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120750 + }, + { + "epoch": 0.5856641766886161, + "grad_norm": 5.341544806469756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120760 + }, + { + "epoch": 0.5857126748814522, + "grad_norm": 4.520044285527547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120770 + }, + { + "epoch": 0.5857611730742883, + "grad_norm": 4.790545631294663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120780 + }, + { + "epoch": 0.5858096712671244, + "grad_norm": 5.485113092618121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120790 + }, + { + "epoch": 0.5858581694599605, + "grad_norm": 6.321692467281537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120800 + }, + { + "epoch": 0.5859066676527966, + "grad_norm": 4.6266674758044246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120810 + }, + { + "epoch": 0.5859551658456327, + "grad_norm": 4.3896915258301306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120820 + }, + { + "epoch": 0.5860036640384688, + "grad_norm": 3.965163273278449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120830 + }, + { + "epoch": 0.5860521622313049, + "grad_norm": 5.473276587508735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120840 + }, + { + "epoch": 0.586100660424141, + "grad_norm": 5.4517323633263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120850 + }, + { + "epoch": 0.586149158616977, + "grad_norm": 4.2909914554911666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120860 + }, + { + "epoch": 0.5861976568098132, + "grad_norm": 4.3901019353143056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120870 + }, + { + "epoch": 0.5862461550026492, + "grad_norm": 5.209293476582388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120880 + }, + { + "epoch": 0.5862946531954853, + "grad_norm": 4.993577817913319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120890 + }, + { + "epoch": 0.5863431513883214, + "grad_norm": 5.926629569330544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120900 + }, + { + "epoch": 0.5863916495811575, + "grad_norm": 2.904537268477725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120910 + }, + { + "epoch": 0.5864401477739936, + "grad_norm": 4.87488989620033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120920 + }, + { + "epoch": 0.5864886459668297, + "grad_norm": 3.9820264419176965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120930 + }, + { + "epoch": 0.5865371441596657, + "grad_norm": 5.229633757153351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120940 + }, + { + "epoch": 0.5865856423525019, + "grad_norm": 5.146502530806174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120950 + }, + { + "epoch": 0.5866341405453379, + "grad_norm": 4.0344295371141925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120960 + }, + { + "epoch": 0.5866826387381741, + "grad_norm": 3.9116386574278295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120970 + }, + { + "epoch": 0.5867311369310101, + "grad_norm": 4.594065501350997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120980 + }, + { + "epoch": 0.5867796351238462, + "grad_norm": 5.653329253618722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 120990 + }, + { + "epoch": 0.5868281333166823, + "grad_norm": 5.13308066274476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121000 + }, + { + "epoch": 0.5868766315095184, + "grad_norm": 3.808329438470537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121010 + }, + { + "epoch": 0.5869251297023544, + "grad_norm": 3.846804190743569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121020 + }, + { + "epoch": 0.5869736278951906, + "grad_norm": 3.842610851734207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121030 + }, + { + "epoch": 0.5870221260880266, + "grad_norm": 5.943602445768192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121040 + }, + { + "epoch": 0.5870706242808628, + "grad_norm": 4.843620899919188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121050 + }, + { + "epoch": 0.5871191224736988, + "grad_norm": 4.042605041831848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121060 + }, + { + "epoch": 0.5871676206665349, + "grad_norm": 3.790675009440747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121070 + }, + { + "epoch": 0.587216118859371, + "grad_norm": 3.7429137478284247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121080 + }, + { + "epoch": 0.5872646170522071, + "grad_norm": 4.4443370939006854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121090 + }, + { + "epoch": 0.5873131152450431, + "grad_norm": 4.5602320142279495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121100 + }, + { + "epoch": 0.5873616134378793, + "grad_norm": 3.6022933613821806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121110 + }, + { + "epoch": 0.5874101116307153, + "grad_norm": 5.68677648971061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121120 + }, + { + "epoch": 0.5874586098235515, + "grad_norm": 3.4197023524029646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121130 + }, + { + "epoch": 0.5875071080163875, + "grad_norm": 4.519388028256799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121140 + }, + { + "epoch": 0.5875556062092236, + "grad_norm": 4.3030249230469053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121150 + }, + { + "epoch": 0.5876041044020597, + "grad_norm": 3.746831112039217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121160 + }, + { + "epoch": 0.5876526025948958, + "grad_norm": 3.677204745144991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121170 + }, + { + "epoch": 0.5877011007877319, + "grad_norm": 3.521871860812098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121180 + }, + { + "epoch": 0.587749598980568, + "grad_norm": 4.5253077018969634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121190 + }, + { + "epoch": 0.587798097173404, + "grad_norm": 4.249265259659296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121200 + }, + { + "epoch": 0.5878465953662402, + "grad_norm": 3.5818229093820264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121210 + }, + { + "epoch": 0.5878950935590762, + "grad_norm": 3.6629066357818374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121220 + }, + { + "epoch": 0.5879435917519124, + "grad_norm": 3.664513315015938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121230 + }, + { + "epoch": 0.5879920899447484, + "grad_norm": 3.95562921085002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121240 + }, + { + "epoch": 0.5880405881375845, + "grad_norm": 4.243208593379677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121250 + }, + { + "epoch": 0.5880890863304206, + "grad_norm": 3.2708976505091414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121260 + }, + { + "epoch": 0.5881375845232567, + "grad_norm": 3.356954039190896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121270 + }, + { + "epoch": 0.5881860827160927, + "grad_norm": 3.5911671147914603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121280 + }, + { + "epoch": 0.5882345809089289, + "grad_norm": 4.658377292798832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121290 + }, + { + "epoch": 0.5882830791017649, + "grad_norm": 4.2189753912680317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121300 + }, + { + "epoch": 0.5883315772946011, + "grad_norm": 3.4861795938923024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121310 + }, + { + "epoch": 0.5883800754874372, + "grad_norm": 4.0669661416359304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121320 + }, + { + "epoch": 0.5884285736802732, + "grad_norm": 3.19445746299607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121330 + }, + { + "epoch": 0.5884770718731094, + "grad_norm": 5.336369781616668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121340 + }, + { + "epoch": 0.5885255700659454, + "grad_norm": 3.7432960198202636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121350 + }, + { + "epoch": 0.5885740682587816, + "grad_norm": 2.881533589516039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121360 + }, + { + "epoch": 0.5886225664516176, + "grad_norm": 2.89517913643067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121370 + }, + { + "epoch": 0.5886710646444537, + "grad_norm": 3.061381335101032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121380 + }, + { + "epoch": 0.5887195628372898, + "grad_norm": 3.759102185085794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121390 + }, + { + "epoch": 0.5887680610301259, + "grad_norm": 3.655947580227803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121400 + }, + { + "epoch": 0.5888165592229619, + "grad_norm": 3.247565700803534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121410 + }, + { + "epoch": 0.5888650574157981, + "grad_norm": 3.1883334372651007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121420 + }, + { + "epoch": 0.5889135556086341, + "grad_norm": 3.321407291423384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121430 + }, + { + "epoch": 0.5889620538014703, + "grad_norm": 3.7781370565426187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121440 + }, + { + "epoch": 0.5890105519943063, + "grad_norm": 3.333124141136068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121450 + }, + { + "epoch": 0.5890590501871424, + "grad_norm": 2.8848884880972037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121460 + }, + { + "epoch": 0.5891075483799785, + "grad_norm": 3.287798620021931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121470 + }, + { + "epoch": 0.5891560465728146, + "grad_norm": 2.965190901704773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121480 + }, + { + "epoch": 0.5892045447656507, + "grad_norm": 3.822344183390669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121490 + }, + { + "epoch": 0.5892530429584868, + "grad_norm": 3.624369071530964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121500 + }, + { + "epoch": 0.5893015411513228, + "grad_norm": 2.7642644795378146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121510 + }, + { + "epoch": 0.589350039344159, + "grad_norm": 3.1295238045458973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121520 + }, + { + "epoch": 0.589398537536995, + "grad_norm": 2.997447268171527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121530 + }, + { + "epoch": 0.5894470357298311, + "grad_norm": 3.2417878514934273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121540 + }, + { + "epoch": 0.5894955339226672, + "grad_norm": 3.6073532783120754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121550 + }, + { + "epoch": 0.5895440321155033, + "grad_norm": 5.456305984807841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121560 + }, + { + "epoch": 0.5895925303083394, + "grad_norm": 2.8100069471292954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121570 + }, + { + "epoch": 0.5896410285011755, + "grad_norm": 2.9168612059038423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121580 + }, + { + "epoch": 0.5896895266940115, + "grad_norm": 3.3818332667578943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121590 + }, + { + "epoch": 0.5897380248868477, + "grad_norm": 3.1796452049093205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121600 + }, + { + "epoch": 0.5897865230796837, + "grad_norm": 2.6775251171784475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121610 + }, + { + "epoch": 0.5898350212725199, + "grad_norm": 2.8655500727836625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121620 + }, + { + "epoch": 0.5898835194653559, + "grad_norm": 2.6758385729408474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121630 + }, + { + "epoch": 0.589932017658192, + "grad_norm": 3.3719993552949745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121640 + }, + { + "epoch": 0.5899805158510281, + "grad_norm": 3.158046979478968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121650 + }, + { + "epoch": 0.5900290140438642, + "grad_norm": 1.1337074283801485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121660 + }, + { + "epoch": 0.5900775122367002, + "grad_norm": 2.670055607723043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121670 + }, + { + "epoch": 0.5901260104295364, + "grad_norm": 2.6262813435096177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121680 + }, + { + "epoch": 0.5901745086223724, + "grad_norm": 3.126677938780631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121690 + }, + { + "epoch": 0.5902230068152086, + "grad_norm": 3.0393567840292235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121700 + }, + { + "epoch": 0.5902715050080446, + "grad_norm": 2.638140585986548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121710 + }, + { + "epoch": 0.5903200032008807, + "grad_norm": 2.549247426486545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121720 + }, + { + "epoch": 0.5903685013937168, + "grad_norm": 2.6488089588383446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121730 + }, + { + "epoch": 0.5904169995865529, + "grad_norm": 2.877644078580488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121740 + }, + { + "epoch": 0.590465497779389, + "grad_norm": 3.302750712919078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121750 + }, + { + "epoch": 0.5905139959722251, + "grad_norm": 2.4838792000991816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121760 + }, + { + "epoch": 0.5905624941650611, + "grad_norm": 2.56144573995698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121770 + }, + { + "epoch": 0.5906109923578973, + "grad_norm": 2.665331066964427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121780 + }, + { + "epoch": 0.5906594905507333, + "grad_norm": 3.772960610604059e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121790 + }, + { + "epoch": 0.5907079887435694, + "grad_norm": 4.396304689180397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121800 + }, + { + "epoch": 0.5907564869364055, + "grad_norm": 3.0036622433726734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121810 + }, + { + "epoch": 0.5908049851292416, + "grad_norm": 2.633136375607137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121820 + }, + { + "epoch": 0.5908534833220778, + "grad_norm": 2.6012463649749407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121830 + }, + { + "epoch": 0.5909019815149138, + "grad_norm": 2.983559852509643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121840 + }, + { + "epoch": 0.5909504797077499, + "grad_norm": 2.799791900542914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121850 + }, + { + "epoch": 0.590998977900586, + "grad_norm": 2.5595099373276753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121860 + }, + { + "epoch": 0.5910474760934221, + "grad_norm": 2.3251767800047674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121870 + }, + { + "epoch": 0.5910959742862582, + "grad_norm": 2.4018027033889666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121880 + }, + { + "epoch": 0.5911444724790943, + "grad_norm": 2.8071502811144455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121890 + }, + { + "epoch": 0.5911929706719303, + "grad_norm": 2.9446465532600996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121900 + }, + { + "epoch": 0.5912414688647665, + "grad_norm": 2.3617425881639065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121910 + }, + { + "epoch": 0.5912899670576025, + "grad_norm": 2.532955534206849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121920 + }, + { + "epoch": 0.5913384652504386, + "grad_norm": 2.436584907172801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121930 + }, + { + "epoch": 0.5913869634432747, + "grad_norm": 1.3229640671852394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121940 + }, + { + "epoch": 0.5914354616361108, + "grad_norm": 2.759173867161735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121950 + }, + { + "epoch": 0.5914839598289469, + "grad_norm": 2.2148640255181817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121960 + }, + { + "epoch": 0.591532458021783, + "grad_norm": 2.2063224491830624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121970 + }, + { + "epoch": 0.591580956214619, + "grad_norm": 2.1973785635509557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121980 + }, + { + "epoch": 0.5916294544074552, + "grad_norm": 2.6141287889913656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 121990 + }, + { + "epoch": 0.5916779526002912, + "grad_norm": 3.6282068549553514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122000 + }, + { + "epoch": 0.5917264507931274, + "grad_norm": 2.173000126504121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122010 + }, + { + "epoch": 0.5917749489859634, + "grad_norm": 2.256666817856967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122020 + }, + { + "epoch": 0.5918234471787995, + "grad_norm": 2.4107279728013964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122030 + }, + { + "epoch": 0.5918719453716356, + "grad_norm": 2.5635728206907515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122040 + }, + { + "epoch": 0.5919204435644717, + "grad_norm": 2.7515145006873354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122050 + }, + { + "epoch": 0.5919689417573077, + "grad_norm": 2.891019335038436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122060 + }, + { + "epoch": 0.5920174399501439, + "grad_norm": 4.136363429552148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122070 + }, + { + "epoch": 0.5920659381429799, + "grad_norm": 2.46572938067402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122080 + }, + { + "epoch": 0.5921144363358161, + "grad_norm": 2.612165133086819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122090 + }, + { + "epoch": 0.5921629345286521, + "grad_norm": 2.4496915784766315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122100 + }, + { + "epoch": 0.5922114327214882, + "grad_norm": 2.5612592935431167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122110 + }, + { + "epoch": 0.5922599309143243, + "grad_norm": 2.1294464147558756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122120 + }, + { + "epoch": 0.5923084291071604, + "grad_norm": 2.2637807717273972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122130 + }, + { + "epoch": 0.5923569272999964, + "grad_norm": 2.3800664905593294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122140 + }, + { + "epoch": 0.5924054254928326, + "grad_norm": 2.3814023109025584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122150 + }, + { + "epoch": 0.5924539236856686, + "grad_norm": 2.1093637769808993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122160 + }, + { + "epoch": 0.5925024218785048, + "grad_norm": 2.1604114408546593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122170 + }, + { + "epoch": 0.5925509200713408, + "grad_norm": 2.0744157325225387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122180 + }, + { + "epoch": 0.592599418264177, + "grad_norm": 2.3150191452714353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122190 + }, + { + "epoch": 0.592647916457013, + "grad_norm": 2.3280115613033558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122200 + }, + { + "epoch": 0.5926964146498491, + "grad_norm": 2.101436393786571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122210 + }, + { + "epoch": 0.5927449128426852, + "grad_norm": 2.3206112587104144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122220 + }, + { + "epoch": 0.5927934110355213, + "grad_norm": 2.1110906800458906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122230 + }, + { + "epoch": 0.5928419092283573, + "grad_norm": 2.312583831098891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122240 + }, + { + "epoch": 0.5928904074211935, + "grad_norm": 2.3684125949330337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122250 + }, + { + "epoch": 0.5929389056140295, + "grad_norm": 2.0699702929505293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122260 + }, + { + "epoch": 0.5929874038068657, + "grad_norm": 2.1802318883601401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122270 + }, + { + "epoch": 0.5930359019997017, + "grad_norm": 2.0477905593452306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122280 + }, + { + "epoch": 0.5930844001925378, + "grad_norm": 2.465674242557725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122290 + }, + { + "epoch": 0.5931328983853739, + "grad_norm": 2.392503688497527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122300 + }, + { + "epoch": 0.59318139657821, + "grad_norm": 2.0668427680448076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122310 + }, + { + "epoch": 0.593229894771046, + "grad_norm": 2.0560680979997414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122320 + }, + { + "epoch": 0.5932783929638822, + "grad_norm": 1.938791172051424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122330 + }, + { + "epoch": 0.5933268911567183, + "grad_norm": 4.2343992845417233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122340 + }, + { + "epoch": 0.5933753893495544, + "grad_norm": 2.323821064464937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122350 + }, + { + "epoch": 0.5934238875423905, + "grad_norm": 1.8691891057187604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122360 + }, + { + "epoch": 0.5934723857352265, + "grad_norm": 1.8119828837370733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122370 + }, + { + "epoch": 0.5935208839280627, + "grad_norm": 1.9455514177479927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122380 + }, + { + "epoch": 0.5935693821208987, + "grad_norm": 2.1257771720684104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122390 + }, + { + "epoch": 0.5936178803137349, + "grad_norm": 2.1100741776081122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122400 + }, + { + "epoch": 0.5936663785065709, + "grad_norm": 1.85427111887293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122410 + }, + { + "epoch": 0.593714876699407, + "grad_norm": 1.9557546693249606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122420 + }, + { + "epoch": 0.5937633748922431, + "grad_norm": 1.9703975340235047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122430 + }, + { + "epoch": 0.5938118730850792, + "grad_norm": 2.522061492982175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122440 + }, + { + "epoch": 0.5938603712779152, + "grad_norm": 2.0898639263577934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122450 + }, + { + "epoch": 0.5939088694707514, + "grad_norm": 1.916298941750938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122460 + }, + { + "epoch": 0.5939573676635874, + "grad_norm": 2.0374505993459024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122470 + }, + { + "epoch": 0.5940058658564236, + "grad_norm": 1.8630271370057017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122480 + }, + { + "epoch": 0.5940543640492596, + "grad_norm": 2.1465589838953747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122490 + }, + { + "epoch": 0.5941028622420957, + "grad_norm": 2.2299337842923705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122500 + }, + { + "epoch": 0.5941513604349318, + "grad_norm": 1.8434187154525716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122510 + }, + { + "epoch": 0.5941998586277679, + "grad_norm": 2.0024376112814934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122520 + }, + { + "epoch": 0.594248356820604, + "grad_norm": 2.0025305502713309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122530 + }, + { + "epoch": 0.5942968550134401, + "grad_norm": 2.10357157470753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122540 + }, + { + "epoch": 0.5943453532062761, + "grad_norm": 2.1298919250511972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122550 + }, + { + "epoch": 0.5943938513991123, + "grad_norm": 1.8103369825439586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122560 + }, + { + "epoch": 0.5944423495919483, + "grad_norm": 1.8513578936563135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122570 + }, + { + "epoch": 0.5944908477847844, + "grad_norm": 1.789547496855448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122580 + }, + { + "epoch": 0.5945393459776205, + "grad_norm": 1.8986689553912584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122590 + }, + { + "epoch": 0.5945878441704566, + "grad_norm": 2.0885494222966372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122600 + }, + { + "epoch": 0.5946363423632927, + "grad_norm": 1.7520919470825902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122610 + }, + { + "epoch": 0.5946848405561288, + "grad_norm": 1.7525319151445728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122620 + }, + { + "epoch": 0.5947333387489648, + "grad_norm": 1.9464972922378365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122630 + }, + { + "epoch": 0.594781836941801, + "grad_norm": 1.970773695347816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122640 + }, + { + "epoch": 0.594830335134637, + "grad_norm": 2.203718736382143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122650 + }, + { + "epoch": 0.5948788333274732, + "grad_norm": 1.7835746746186487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122660 + }, + { + "epoch": 0.5949273315203092, + "grad_norm": 1.749680791363062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122670 + }, + { + "epoch": 0.5949758297131453, + "grad_norm": 1.84879979769903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122680 + }, + { + "epoch": 0.5950243279059814, + "grad_norm": 1.8710728966198076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122690 + }, + { + "epoch": 0.5950728260988175, + "grad_norm": 2.182153764351824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122700 + }, + { + "epoch": 0.5951213242916535, + "grad_norm": 1.9919528426726174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122710 + }, + { + "epoch": 0.5951698224844897, + "grad_norm": 1.7103224081438384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122720 + }, + { + "epoch": 0.5952183206773257, + "grad_norm": 1.7390748041634652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122730 + }, + { + "epoch": 0.5952668188701619, + "grad_norm": 2.2809852850969037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122740 + }, + { + "epoch": 0.5953153170629979, + "grad_norm": 1.8625389941462345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122750 + }, + { + "epoch": 0.595363815255834, + "grad_norm": 1.684431794046759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122760 + }, + { + "epoch": 0.5954123134486701, + "grad_norm": 1.7008709107813047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122770 + }, + { + "epoch": 0.5954608116415062, + "grad_norm": 1.6960619575456803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122780 + }, + { + "epoch": 0.5955093098343422, + "grad_norm": 2.146939834801742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122790 + }, + { + "epoch": 0.5955578080271784, + "grad_norm": 1.847375159513831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122800 + }, + { + "epoch": 0.5956063062200144, + "grad_norm": 1.649659111535584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122810 + }, + { + "epoch": 0.5956548044128506, + "grad_norm": 1.9605106160724972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122820 + }, + { + "epoch": 0.5957033026056866, + "grad_norm": 1.722081890420668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122830 + }, + { + "epoch": 0.5957518007985227, + "grad_norm": 1.8929281964119582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122840 + }, + { + "epoch": 0.5958002989913589, + "grad_norm": 1.9288970065645117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122850 + }, + { + "epoch": 0.5958487971841949, + "grad_norm": 1.7393247730979056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122860 + }, + { + "epoch": 0.5958972953770311, + "grad_norm": 2.0687559754151152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122870 + }, + { + "epoch": 0.5959457935698671, + "grad_norm": 1.7033485733009002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122880 + }, + { + "epoch": 0.5959942917627032, + "grad_norm": 1.6978519568056072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122890 + }, + { + "epoch": 0.5960427899555393, + "grad_norm": 1.9106624904452474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122900 + }, + { + "epoch": 0.5960912881483754, + "grad_norm": 1.7361297466322867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122910 + }, + { + "epoch": 0.5961397863412115, + "grad_norm": 1.60705738494471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122920 + }, + { + "epoch": 0.5961882845340476, + "grad_norm": 1.6655333467952005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122930 + }, + { + "epoch": 0.5962367827268836, + "grad_norm": 1.89488559954043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122940 + }, + { + "epoch": 0.5962852809197198, + "grad_norm": 2.016359275103241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122950 + }, + { + "epoch": 0.5963337791125558, + "grad_norm": 1.746120261714168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122960 + }, + { + "epoch": 0.596382277305392, + "grad_norm": 1.628028627465028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122970 + }, + { + "epoch": 0.596430775498228, + "grad_norm": 1.8093375331318384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122980 + }, + { + "epoch": 0.5964792736910641, + "grad_norm": 1.948795471662379e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 122990 + }, + { + "epoch": 0.5965277718839002, + "grad_norm": 1.752606948457469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123000 + }, + { + "epoch": 0.5965762700767363, + "grad_norm": 2.1612082434785407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123010 + }, + { + "epoch": 0.5966247682695723, + "grad_norm": 1.6415633297128807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123020 + }, + { + "epoch": 0.5966732664624085, + "grad_norm": 1.6072786479526258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123030 + }, + { + "epoch": 0.5967217646552445, + "grad_norm": 1.953131203435987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123040 + }, + { + "epoch": 0.5967702628480807, + "grad_norm": 2.416651341263787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123050 + }, + { + "epoch": 0.5968187610409167, + "grad_norm": 1.582396436106137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123060 + }, + { + "epoch": 0.5968672592337528, + "grad_norm": 1.5748820203498326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123070 + }, + { + "epoch": 0.5969157574265889, + "grad_norm": 1.625116681225336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123080 + }, + { + "epoch": 0.596964255619425, + "grad_norm": 1.7847894184797042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123090 + }, + { + "epoch": 0.597012753812261, + "grad_norm": 1.7031590004989994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123100 + }, + { + "epoch": 0.5970612520050972, + "grad_norm": 1.7601639967779192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123110 + }, + { + "epoch": 0.5971097501979332, + "grad_norm": 1.5856221580179408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123120 + }, + { + "epoch": 0.5971582483907694, + "grad_norm": 1.593801925992011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123130 + }, + { + "epoch": 0.5972067465836054, + "grad_norm": 1.7670716090378846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123140 + }, + { + "epoch": 0.5972552447764415, + "grad_norm": 1.729286225327087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123150 + }, + { + "epoch": 0.5973037429692776, + "grad_norm": 1.5531540498159302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123160 + }, + { + "epoch": 0.5973522411621137, + "grad_norm": 1.5745578707537788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123170 + }, + { + "epoch": 0.5974007393549498, + "grad_norm": 1.618948033410561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123180 + }, + { + "epoch": 0.5974492375477859, + "grad_norm": 1.8068577389840357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123190 + }, + { + "epoch": 0.5974977357406219, + "grad_norm": 2.4602630332992703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123200 + }, + { + "epoch": 0.5975462339334581, + "grad_norm": 1.5221928606479196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123210 + }, + { + "epoch": 0.5975947321262941, + "grad_norm": 1.5133514352783095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123220 + }, + { + "epoch": 0.5976432303191302, + "grad_norm": 1.858876004234844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123230 + }, + { + "epoch": 0.5976917285119663, + "grad_norm": 1.7408650876404863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123240 + }, + { + "epoch": 0.5977402267048024, + "grad_norm": 1.6246592338120536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123250 + }, + { + "epoch": 0.5977887248976385, + "grad_norm": 1.4763887179469748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123260 + }, + { + "epoch": 0.5978372230904746, + "grad_norm": 1.534767193334119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123270 + }, + { + "epoch": 0.5978857212833106, + "grad_norm": 1.4626375843818096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123280 + }, + { + "epoch": 0.5979342194761468, + "grad_norm": 1.7669016472154908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123290 + }, + { + "epoch": 0.5979827176689828, + "grad_norm": 1.71624023437289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123300 + }, + { + "epoch": 0.598031215861819, + "grad_norm": 1.7125283591212792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123310 + }, + { + "epoch": 0.598079714054655, + "grad_norm": 1.5640561912277917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123320 + }, + { + "epoch": 0.5981282122474911, + "grad_norm": 1.516669527745762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123330 + }, + { + "epoch": 0.5981767104403272, + "grad_norm": 1.5569365530154755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123340 + }, + { + "epoch": 0.5982252086331633, + "grad_norm": 1.611109752275297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123350 + }, + { + "epoch": 0.5982737068259993, + "grad_norm": 1.537017340069724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123360 + }, + { + "epoch": 0.5983222050188355, + "grad_norm": 1.465971450897996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123370 + }, + { + "epoch": 0.5983707032116716, + "grad_norm": 1.535745894898355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123380 + }, + { + "epoch": 0.5984192014045077, + "grad_norm": 1.744030129202656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123390 + }, + { + "epoch": 0.5984676995973438, + "grad_norm": 1.5844415202082018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123400 + }, + { + "epoch": 0.5985161977901798, + "grad_norm": 1.5619106363828905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123410 + }, + { + "epoch": 0.598564695983016, + "grad_norm": 1.472005521918618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123420 + }, + { + "epoch": 0.598613194175852, + "grad_norm": 1.509136779986875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123430 + }, + { + "epoch": 0.5986616923686882, + "grad_norm": 1.722136033777133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123440 + }, + { + "epoch": 0.5987101905615242, + "grad_norm": 1.557586699618696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123450 + }, + { + "epoch": 0.5987586887543603, + "grad_norm": 1.453191913469709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123460 + }, + { + "epoch": 0.5988071869471964, + "grad_norm": 1.4053681240966398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123470 + }, + { + "epoch": 0.5988556851400325, + "grad_norm": 1.527092621245174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123480 + }, + { + "epoch": 0.5989041833328685, + "grad_norm": 1.5644775430700975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123490 + }, + { + "epoch": 0.5989526815257047, + "grad_norm": 1.603669659289153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123500 + }, + { + "epoch": 0.5990011797185407, + "grad_norm": 1.4142342763534543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123510 + }, + { + "epoch": 0.5990496779113769, + "grad_norm": 1.4589986108148878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123520 + }, + { + "epoch": 0.5990981761042129, + "grad_norm": 1.4471656584191805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123530 + }, + { + "epoch": 0.599146674297049, + "grad_norm": 1.5500337724461133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123540 + }, + { + "epoch": 0.5991951724898851, + "grad_norm": 1.6572901984091004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123550 + }, + { + "epoch": 0.5992436706827212, + "grad_norm": 1.4180459118051658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123560 + }, + { + "epoch": 0.5992921688755573, + "grad_norm": 1.419265913682466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123570 + }, + { + "epoch": 0.5993406670683934, + "grad_norm": 1.4201908982158784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123580 + }, + { + "epoch": 0.5993891652612294, + "grad_norm": 1.548382755345301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123590 + }, + { + "epoch": 0.5994376634540656, + "grad_norm": 1.4974152406921348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123600 + }, + { + "epoch": 0.5994861616469016, + "grad_norm": 1.4347985199947288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123610 + }, + { + "epoch": 0.5995346598397377, + "grad_norm": 1.4218248622910323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123620 + }, + { + "epoch": 0.5995831580325738, + "grad_norm": 2.3511596225489484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123630 + }, + { + "epoch": 0.5996316562254099, + "grad_norm": 1.5818562815184123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123640 + }, + { + "epoch": 0.599680154418246, + "grad_norm": 1.5015625365322194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123650 + }, + { + "epoch": 0.5997286526110821, + "grad_norm": 1.4699317318900285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123660 + }, + { + "epoch": 0.5997771508039181, + "grad_norm": 1.3979713742173772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123670 + }, + { + "epoch": 0.5998256489967543, + "grad_norm": 1.3786362274004205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123680 + }, + { + "epoch": 0.5998741471895903, + "grad_norm": 1.554006416881748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123690 + }, + { + "epoch": 0.5999226453824265, + "grad_norm": 1.5535795228061033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123700 + }, + { + "epoch": 0.5999711435752625, + "grad_norm": 1.41589410418419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123710 + }, + { + "epoch": 0.6000196417680986, + "grad_norm": 1.3735674997406022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123720 + }, + { + "epoch": 0.6000681399609347, + "grad_norm": 1.3968455903068389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123730 + }, + { + "epoch": 0.6001166381537708, + "grad_norm": 1.523583392781802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123740 + }, + { + "epoch": 0.6001651363466068, + "grad_norm": 1.4024557515313063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123750 + }, + { + "epoch": 0.600213634539443, + "grad_norm": 1.403699627644528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123760 + }, + { + "epoch": 0.600262132732279, + "grad_norm": 1.4759528710328595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123770 + }, + { + "epoch": 0.6003106309251152, + "grad_norm": 1.3502885565230827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123780 + }, + { + "epoch": 0.6003591291179512, + "grad_norm": 1.4412158577670198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123790 + }, + { + "epoch": 0.6004076273107873, + "grad_norm": 1.4995229946634936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123800 + }, + { + "epoch": 0.6004561255036234, + "grad_norm": 1.3556010003412666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123810 + }, + { + "epoch": 0.6005046236964595, + "grad_norm": 1.352799756659806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123820 + }, + { + "epoch": 0.6005531218892955, + "grad_norm": 1.3766694451078365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123830 + }, + { + "epoch": 0.6006016200821317, + "grad_norm": 1.5342386916472606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123840 + }, + { + "epoch": 0.6006501182749677, + "grad_norm": 1.481148075299643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123850 + }, + { + "epoch": 0.6006986164678039, + "grad_norm": 1.33030653159949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123860 + }, + { + "epoch": 0.6007471146606399, + "grad_norm": 1.3821707511851855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123870 + }, + { + "epoch": 0.600795612853476, + "grad_norm": 1.3571560941727512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123880 + }, + { + "epoch": 0.6008441110463122, + "grad_norm": 1.407309895284925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123890 + }, + { + "epoch": 0.6008926092391482, + "grad_norm": 1.4118701585630333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123900 + }, + { + "epoch": 0.6009411074319844, + "grad_norm": 1.4276088222686667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123910 + }, + { + "epoch": 0.6009896056248204, + "grad_norm": 1.3415171906672185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123920 + }, + { + "epoch": 0.6010381038176565, + "grad_norm": 1.3159920797534141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123930 + }, + { + "epoch": 0.6010866020104926, + "grad_norm": 1.457621863210079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123940 + }, + { + "epoch": 0.6011351002033287, + "grad_norm": 1.4296682593339938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123950 + }, + { + "epoch": 0.6011835983961648, + "grad_norm": 1.415525758829972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123960 + }, + { + "epoch": 0.6012320965890009, + "grad_norm": 1.330204781879729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123970 + }, + { + "epoch": 0.6012805947818369, + "grad_norm": 1.2697528006810899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123980 + }, + { + "epoch": 0.6013290929746731, + "grad_norm": 1.3816264754495933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 123990 + }, + { + "epoch": 0.6013775911675091, + "grad_norm": 1.4931366365544818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124000 + }, + { + "epoch": 0.6014260893603453, + "grad_norm": 1.5644634743239294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124010 + }, + { + "epoch": 0.6014745875531813, + "grad_norm": 1.3191188941164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124020 + }, + { + "epoch": 0.6015230857460174, + "grad_norm": 1.2842494356846146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124030 + }, + { + "epoch": 0.6015715839388535, + "grad_norm": 1.4119187596861593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124040 + }, + { + "epoch": 0.6016200821316896, + "grad_norm": 1.3450404878767586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124050 + }, + { + "epoch": 0.6016685803245256, + "grad_norm": 1.2844978414250363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124060 + }, + { + "epoch": 0.6017170785173618, + "grad_norm": 1.2582046338138753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124070 + }, + { + "epoch": 0.6017655767101978, + "grad_norm": 1.2951097971836134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124080 + }, + { + "epoch": 0.601814074903034, + "grad_norm": 1.3127768738740997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124090 + }, + { + "epoch": 0.60186257309587, + "grad_norm": 2.3154304926720215e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124100 + }, + { + "epoch": 0.6019110712887061, + "grad_norm": 1.2831947060476523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124110 + }, + { + "epoch": 0.6019595694815422, + "grad_norm": 1.235718656289464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124120 + }, + { + "epoch": 0.6020080676743783, + "grad_norm": 1.228184203228011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124130 + }, + { + "epoch": 0.6020565658672143, + "grad_norm": 1.3297922407673468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124140 + }, + { + "epoch": 0.6021050640600505, + "grad_norm": 1.3395916198533087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124150 + }, + { + "epoch": 0.6021535622528865, + "grad_norm": 1.380988692289975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124160 + }, + { + "epoch": 0.6022020604457227, + "grad_norm": 1.2804173366021132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124170 + }, + { + "epoch": 0.6022505586385587, + "grad_norm": 1.2807318228169606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124180 + }, + { + "epoch": 0.6022990568313948, + "grad_norm": 1.3119064590227936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124190 + }, + { + "epoch": 0.6023475550242309, + "grad_norm": 1.3383488806084642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124200 + }, + { + "epoch": 0.602396053217067, + "grad_norm": 1.2083062017609336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124210 + }, + { + "epoch": 0.602444551409903, + "grad_norm": 1.3235658968824282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124220 + }, + { + "epoch": 0.6024930496027392, + "grad_norm": 1.2720015263312234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124230 + }, + { + "epoch": 0.6025415477955752, + "grad_norm": 1.346321596429334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124240 + }, + { + "epoch": 0.6025900459884114, + "grad_norm": 1.292987406031898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124250 + }, + { + "epoch": 0.6026385441812474, + "grad_norm": 1.374449851709869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124260 + }, + { + "epoch": 0.6026870423740835, + "grad_norm": 1.2572655805342947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124270 + }, + { + "epoch": 0.6027355405669196, + "grad_norm": 1.1907448538295284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124280 + }, + { + "epoch": 0.6027840387597557, + "grad_norm": 1.3526710063160863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124290 + }, + { + "epoch": 0.6028325369525918, + "grad_norm": 1.4316790952761949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124300 + }, + { + "epoch": 0.6028810351454279, + "grad_norm": 1.2182532316273864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124310 + }, + { + "epoch": 0.6029295333382639, + "grad_norm": 1.231960737868576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124320 + }, + { + "epoch": 0.6029780315311001, + "grad_norm": 1.2504621338393918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124330 + }, + { + "epoch": 0.6030265297239361, + "grad_norm": 1.4165985362524225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124340 + }, + { + "epoch": 0.6030750279167723, + "grad_norm": 1.2422171380421787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124350 + }, + { + "epoch": 0.6031235261096083, + "grad_norm": 1.1956069556617877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124360 + }, + { + "epoch": 0.6031720243024444, + "grad_norm": 1.3426961231743917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124370 + }, + { + "epoch": 0.6032205224952805, + "grad_norm": 1.199225039272278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124380 + }, + { + "epoch": 0.6032690206881166, + "grad_norm": 1.333868198116761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124390 + }, + { + "epoch": 0.6033175188809528, + "grad_norm": 1.437846179896951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124400 + }, + { + "epoch": 0.6033660170737888, + "grad_norm": 1.224487959916587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124410 + }, + { + "epoch": 0.6034145152666249, + "grad_norm": 1.1796068832836681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124420 + }, + { + "epoch": 0.603463013459461, + "grad_norm": 1.203482042910764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124430 + }, + { + "epoch": 0.6035115116522971, + "grad_norm": 1.5461668567695597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124440 + }, + { + "epoch": 0.6035600098451331, + "grad_norm": 1.2515302216797863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124450 + }, + { + "epoch": 0.6036085080379693, + "grad_norm": 1.2412908745318418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124460 + }, + { + "epoch": 0.6036570062308053, + "grad_norm": 1.1855471626631697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124470 + }, + { + "epoch": 0.6037055044236415, + "grad_norm": 1.2327774356890586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124480 + }, + { + "epoch": 0.6037540026164775, + "grad_norm": 1.214529561366362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124490 + }, + { + "epoch": 0.6038025008093136, + "grad_norm": 1.2805553240013978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124500 + }, + { + "epoch": 0.6038509990021497, + "grad_norm": 1.1781911268826661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124510 + }, + { + "epoch": 0.6038994971949858, + "grad_norm": 1.137809917395316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124520 + }, + { + "epoch": 0.6039479953878218, + "grad_norm": 1.22145451086908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124530 + }, + { + "epoch": 0.603996493580658, + "grad_norm": 1.1915397379880233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124540 + }, + { + "epoch": 0.604044991773494, + "grad_norm": 1.2854111730575823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124550 + }, + { + "epoch": 0.6040934899663302, + "grad_norm": 1.1644693387324878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124560 + }, + { + "epoch": 0.6041419881591662, + "grad_norm": 1.1503320962447106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124570 + }, + { + "epoch": 0.6041904863520023, + "grad_norm": 1.1698793400682916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124580 + }, + { + "epoch": 0.6042389845448384, + "grad_norm": 1.2542768956791406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124590 + }, + { + "epoch": 0.6042874827376745, + "grad_norm": 1.3471247939378372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124600 + }, + { + "epoch": 0.6043359809305106, + "grad_norm": 1.1822451284615454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124610 + }, + { + "epoch": 0.6043844791233467, + "grad_norm": 1.276146122108912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124620 + }, + { + "epoch": 0.6044329773161827, + "grad_norm": 1.1144773992555201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124630 + }, + { + "epoch": 0.6044814755090189, + "grad_norm": 1.1642769948139176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124640 + }, + { + "epoch": 0.6045299737018549, + "grad_norm": 1.202457440285798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124650 + }, + { + "epoch": 0.604578471894691, + "grad_norm": 1.223717731591023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124660 + }, + { + "epoch": 0.6046269700875271, + "grad_norm": 1.1100631525096105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124670 + }, + { + "epoch": 0.6046754682803632, + "grad_norm": 1.1632452867615939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124680 + }, + { + "epoch": 0.6047239664731993, + "grad_norm": 1.1715236070131141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124690 + }, + { + "epoch": 0.6047724646660354, + "grad_norm": 1.5191687907645246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124700 + }, + { + "epoch": 0.6048209628588714, + "grad_norm": 1.1323756154979492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124710 + }, + { + "epoch": 0.6048694610517076, + "grad_norm": 1.341370818863652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124720 + }, + { + "epoch": 0.6049179592445436, + "grad_norm": 1.1104352637403281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124730 + }, + { + "epoch": 0.6049664574373798, + "grad_norm": 1.1752349848848098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124740 + }, + { + "epoch": 0.6050149556302158, + "grad_norm": 1.1784371167777863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124750 + }, + { + "epoch": 0.6050634538230519, + "grad_norm": 1.1253327869553686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124760 + }, + { + "epoch": 0.605111952015888, + "grad_norm": 1.1855846082653443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124770 + }, + { + "epoch": 0.6051604502087241, + "grad_norm": 1.1185522197365572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124780 + }, + { + "epoch": 0.6052089484015601, + "grad_norm": 1.1475042782649325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124790 + }, + { + "epoch": 0.6052574465943963, + "grad_norm": 1.202719346338199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124800 + }, + { + "epoch": 0.6053059447872323, + "grad_norm": 1.0948095763296806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124810 + }, + { + "epoch": 0.6053544429800685, + "grad_norm": 1.1075069750177136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124820 + }, + { + "epoch": 0.6054029411729045, + "grad_norm": 1.1589109760734573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124830 + }, + { + "epoch": 0.6054514393657406, + "grad_norm": 1.1542162070554696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124840 + }, + { + "epoch": 0.6054999375585767, + "grad_norm": 1.1712685932252498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124850 + }, + { + "epoch": 0.6055484357514128, + "grad_norm": 1.1074421735202122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124860 + }, + { + "epoch": 0.6055969339442488, + "grad_norm": 1.0943060146928474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124870 + }, + { + "epoch": 0.605645432137085, + "grad_norm": 1.0975730901918723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124880 + }, + { + "epoch": 0.605693930329921, + "grad_norm": 1.1402266864024568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124890 + }, + { + "epoch": 0.6057424285227572, + "grad_norm": 1.1837082780630226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124900 + }, + { + "epoch": 0.6057909267155933, + "grad_norm": 1.2451748432340537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124910 + }, + { + "epoch": 0.6058394249084293, + "grad_norm": 1.0562273899950014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124920 + }, + { + "epoch": 0.6058879231012655, + "grad_norm": 1.0778310155501458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124930 + }, + { + "epoch": 0.6059364212941015, + "grad_norm": 1.1215737316661034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124940 + }, + { + "epoch": 0.6059849194869377, + "grad_norm": 1.1521664333713488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124950 + }, + { + "epoch": 0.6060334176797737, + "grad_norm": 1.0801267080751131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124960 + }, + { + "epoch": 0.6060819158726098, + "grad_norm": 1.075452118470821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124970 + }, + { + "epoch": 0.6061304140654459, + "grad_norm": 1.0618303747378377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124980 + }, + { + "epoch": 0.606178912258282, + "grad_norm": 1.2673329763401853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 124990 + }, + { + "epoch": 0.606227410451118, + "grad_norm": 1.1809824229658261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125000 + }, + { + "epoch": 0.6062759086439542, + "grad_norm": 1.0297629415845222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125010 + }, + { + "epoch": 0.6063244068367902, + "grad_norm": 1.0557608476347013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125020 + }, + { + "epoch": 0.6063729050296264, + "grad_norm": 1.09110168011739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125030 + }, + { + "epoch": 0.6064214032224624, + "grad_norm": 1.1779162178982006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125040 + }, + { + "epoch": 0.6064699014152986, + "grad_norm": 1.1337382233023163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125050 + }, + { + "epoch": 0.6065183996081346, + "grad_norm": 1.0277823747628645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125060 + }, + { + "epoch": 0.6065668978009707, + "grad_norm": 1.0452233567548319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125070 + }, + { + "epoch": 0.6066153959938068, + "grad_norm": 1.0465846145280011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125080 + }, + { + "epoch": 0.6066638941866429, + "grad_norm": 1.0847931974922176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125090 + }, + { + "epoch": 0.6067123923794789, + "grad_norm": 1.2397904924910108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125100 + }, + { + "epoch": 0.6067608905723151, + "grad_norm": 1.0555645246768108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125110 + }, + { + "epoch": 0.6068093887651511, + "grad_norm": 1.0519119086893625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125120 + }, + { + "epoch": 0.6068578869579873, + "grad_norm": 1.0399667615956787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125130 + }, + { + "epoch": 0.6069063851508233, + "grad_norm": 1.3700213230549707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125140 + }, + { + "epoch": 0.6069548833436594, + "grad_norm": 1.0540453843077557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125150 + }, + { + "epoch": 0.6070033815364955, + "grad_norm": 1.0046476006664307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125160 + }, + { + "epoch": 0.6070518797293316, + "grad_norm": 1.0364350799818567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125170 + }, + { + "epoch": 0.6071003779221676, + "grad_norm": 9.832707803525409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125180 + }, + { + "epoch": 0.6071488761150038, + "grad_norm": 1.0835697139555123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125190 + }, + { + "epoch": 0.6071973743078398, + "grad_norm": 1.1313436232285312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125200 + }, + { + "epoch": 0.607245872500676, + "grad_norm": 1.3173739432659204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125210 + }, + { + "epoch": 0.607294370693512, + "grad_norm": 1.0118014159843369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125220 + }, + { + "epoch": 0.6073428688863481, + "grad_norm": 1.004865737286309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125230 + }, + { + "epoch": 0.6073913670791842, + "grad_norm": 1.1881262906854317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125240 + }, + { + "epoch": 0.6074398652720203, + "grad_norm": 1.0821876372801853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125250 + }, + { + "epoch": 0.6074883634648564, + "grad_norm": 9.85337251790952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125260 + }, + { + "epoch": 0.6075368616576925, + "grad_norm": 1.017954289750378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125270 + }, + { + "epoch": 0.6075853598505285, + "grad_norm": 1.0168087527517855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125280 + }, + { + "epoch": 0.6076338580433647, + "grad_norm": 1.109494789375276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125290 + }, + { + "epoch": 0.6076823562362007, + "grad_norm": 1.089233450102256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125300 + }, + { + "epoch": 0.6077308544290368, + "grad_norm": 9.862714023256558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125310 + }, + { + "epoch": 0.6077793526218729, + "grad_norm": 9.904351117029364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125320 + }, + { + "epoch": 0.607827850814709, + "grad_norm": 1.0176324849453522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125330 + }, + { + "epoch": 0.6078763490075451, + "grad_norm": 1.0710760278698217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125340 + }, + { + "epoch": 0.6079248472003812, + "grad_norm": 1.0904131642064385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125350 + }, + { + "epoch": 0.6079733453932172, + "grad_norm": 9.789795285541913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125360 + }, + { + "epoch": 0.6080218435860534, + "grad_norm": 9.883470397653582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125370 + }, + { + "epoch": 0.6080703417788894, + "grad_norm": 9.799624933748419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125380 + }, + { + "epoch": 0.6081188399717256, + "grad_norm": 1.0800564353985465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125390 + }, + { + "epoch": 0.6081673381645616, + "grad_norm": 8.020816153475607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125400 + }, + { + "epoch": 0.6082158363573977, + "grad_norm": 9.820040247632278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125410 + }, + { + "epoch": 0.6082643345502339, + "grad_norm": 9.668654854522174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125420 + }, + { + "epoch": 0.6083128327430699, + "grad_norm": 1.1381806785948356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125430 + }, + { + "epoch": 0.608361330935906, + "grad_norm": 1.0599276123457457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125440 + }, + { + "epoch": 0.6084098291287421, + "grad_norm": 1.0377290493579494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125450 + }, + { + "epoch": 0.6084583273215782, + "grad_norm": 9.73000169324223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125460 + }, + { + "epoch": 0.6085068255144143, + "grad_norm": 9.520154264919256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125470 + }, + { + "epoch": 0.6085553237072504, + "grad_norm": 9.376327625432168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125480 + }, + { + "epoch": 0.6086038219000864, + "grad_norm": 1.0609898737357071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125490 + }, + { + "epoch": 0.6086523200929226, + "grad_norm": 1.0350429135996819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125500 + }, + { + "epoch": 0.6087008182857586, + "grad_norm": 9.960623970073357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125510 + }, + { + "epoch": 0.6087493164785948, + "grad_norm": 5.791038120150915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125520 + }, + { + "epoch": 0.6087978146714308, + "grad_norm": 9.589479077476426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125530 + }, + { + "epoch": 0.6088463128642669, + "grad_norm": 1.005189673719542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125540 + }, + { + "epoch": 0.608894811057103, + "grad_norm": 1.0995942290037419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125550 + }, + { + "epoch": 0.6089433092499391, + "grad_norm": 9.200895334515735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125560 + }, + { + "epoch": 0.6089918074427751, + "grad_norm": 9.494483776961715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125570 + }, + { + "epoch": 0.6090403056356113, + "grad_norm": 9.892860219906652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125580 + }, + { + "epoch": 0.6090888038284473, + "grad_norm": 1.0522817461833256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125590 + }, + { + "epoch": 0.6091373020212835, + "grad_norm": 1.0442633424645464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125600 + }, + { + "epoch": 0.6091858002141195, + "grad_norm": 9.395374434006953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125610 + }, + { + "epoch": 0.6092342984069556, + "grad_norm": 9.365432873664759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125620 + }, + { + "epoch": 0.6092827965997917, + "grad_norm": 9.980277582144481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125630 + }, + { + "epoch": 0.6093312947926278, + "grad_norm": 1.0002391803709543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125640 + }, + { + "epoch": 0.6093797929854639, + "grad_norm": 9.998147021406112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125650 + }, + { + "epoch": 0.6094282911783, + "grad_norm": 9.142328849520709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125660 + }, + { + "epoch": 0.609476789371136, + "grad_norm": 1.2948231642440078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125670 + }, + { + "epoch": 0.6095252875639722, + "grad_norm": 9.405963652397986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125680 + }, + { + "epoch": 0.6095737857568082, + "grad_norm": 1.0195545030455833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125690 + }, + { + "epoch": 0.6096222839496444, + "grad_norm": 1.077976108376788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125700 + }, + { + "epoch": 0.6096707821424804, + "grad_norm": 9.048992666293998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125710 + }, + { + "epoch": 0.6097192803353165, + "grad_norm": 8.952133612183388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125720 + }, + { + "epoch": 0.6097677785281526, + "grad_norm": 9.056164174126025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125730 + }, + { + "epoch": 0.6098162767209887, + "grad_norm": 9.917278731563783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125740 + }, + { + "epoch": 0.6098647749138247, + "grad_norm": 9.597252415005642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125750 + }, + { + "epoch": 0.6099132731066609, + "grad_norm": 9.005731271827244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125760 + }, + { + "epoch": 0.6099617712994969, + "grad_norm": 9.264289246857516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125770 + }, + { + "epoch": 0.6100102694923331, + "grad_norm": 9.095872144371242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125780 + }, + { + "epoch": 0.6100587676851691, + "grad_norm": 9.734639405678536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125790 + }, + { + "epoch": 0.6101072658780052, + "grad_norm": 9.601687622762256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125800 + }, + { + "epoch": 0.6101557640708413, + "grad_norm": 8.829945841171138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125810 + }, + { + "epoch": 0.6102042622636774, + "grad_norm": 8.825079333973918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125820 + }, + { + "epoch": 0.6102527604565134, + "grad_norm": 8.949862717599899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125830 + }, + { + "epoch": 0.6103012586493496, + "grad_norm": 1.010361003750404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125840 + }, + { + "epoch": 0.6103497568421856, + "grad_norm": 9.99497657971915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125850 + }, + { + "epoch": 0.6103982550350218, + "grad_norm": 8.609846702256618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125860 + }, + { + "epoch": 0.6104467532278578, + "grad_norm": 8.919022320696968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125870 + }, + { + "epoch": 0.6104952514206939, + "grad_norm": 8.859576894337806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125880 + }, + { + "epoch": 0.61054374961353, + "grad_norm": 9.631330044612696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125890 + }, + { + "epoch": 0.6105922478063661, + "grad_norm": 9.778845111441115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125900 + }, + { + "epoch": 0.6106407459992022, + "grad_norm": 8.787305461055439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125910 + }, + { + "epoch": 0.6106892441920383, + "grad_norm": 8.717356791976272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125920 + }, + { + "epoch": 0.6107377423848744, + "grad_norm": 8.672009244037326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125930 + }, + { + "epoch": 0.6107862405777105, + "grad_norm": 9.468526229738927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125940 + }, + { + "epoch": 0.6108347387705466, + "grad_norm": 1.0033465258629803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125950 + }, + { + "epoch": 0.6108832369633826, + "grad_norm": 1.5262813235494832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125960 + }, + { + "epoch": 0.6109317351562188, + "grad_norm": 8.550816943397876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125970 + }, + { + "epoch": 0.6109802333490548, + "grad_norm": 8.671085538480838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125980 + }, + { + "epoch": 0.611028731541891, + "grad_norm": 9.893174990338593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 125990 + }, + { + "epoch": 0.611077229734727, + "grad_norm": 9.956636404240271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126000 + }, + { + "epoch": 0.6111257279275631, + "grad_norm": 8.849981725234102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126010 + }, + { + "epoch": 0.6111742261203992, + "grad_norm": 8.858470579298228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126020 + }, + { + "epoch": 0.6112227243132353, + "grad_norm": 8.546452789914838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126030 + }, + { + "epoch": 0.6112712225060714, + "grad_norm": 9.371170506256021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126040 + }, + { + "epoch": 0.6113197206989075, + "grad_norm": 9.653683008536973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126050 + }, + { + "epoch": 0.6113682188917435, + "grad_norm": 8.440679977184118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126060 + }, + { + "epoch": 0.6114167170845797, + "grad_norm": 8.41161380549238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126070 + }, + { + "epoch": 0.6114652152774157, + "grad_norm": 8.999886347282882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126080 + }, + { + "epoch": 0.6115137134702519, + "grad_norm": 1.1910596242614702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126090 + }, + { + "epoch": 0.6115622116630879, + "grad_norm": 1.0384313497979747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126100 + }, + { + "epoch": 0.611610709855924, + "grad_norm": 8.45949443828431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126110 + }, + { + "epoch": 0.6116592080487601, + "grad_norm": 8.303913290319542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126120 + }, + { + "epoch": 0.6117077062415962, + "grad_norm": 8.397413608918214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126130 + }, + { + "epoch": 0.6117562044344322, + "grad_norm": 9.393524180723034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126140 + }, + { + "epoch": 0.6118047026272684, + "grad_norm": 9.509714260502733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126150 + }, + { + "epoch": 0.6118532008201044, + "grad_norm": 8.735811718452169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126160 + }, + { + "epoch": 0.6119016990129406, + "grad_norm": 8.471294421497078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126170 + }, + { + "epoch": 0.6119501972057766, + "grad_norm": 8.694468789371967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126180 + }, + { + "epoch": 0.6119986953986127, + "grad_norm": 9.379147059007664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126190 + }, + { + "epoch": 0.6120471935914488, + "grad_norm": 9.061643879704206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126200 + }, + { + "epoch": 0.6120956917842849, + "grad_norm": 8.179977584177323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126210 + }, + { + "epoch": 0.612144189977121, + "grad_norm": 8.436428799996065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126220 + }, + { + "epoch": 0.6121926881699571, + "grad_norm": 8.090700021057273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126230 + }, + { + "epoch": 0.6122411863627931, + "grad_norm": 8.979854015933597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126240 + }, + { + "epoch": 0.6122896845556293, + "grad_norm": 9.106062748287513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126250 + }, + { + "epoch": 0.6123381827484653, + "grad_norm": 8.048044009001387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126260 + }, + { + "epoch": 0.6123866809413014, + "grad_norm": 7.950150404667511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126270 + }, + { + "epoch": 0.6124351791341375, + "grad_norm": 8.188768418904147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126280 + }, + { + "epoch": 0.6124836773269736, + "grad_norm": 9.091332486832471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126290 + }, + { + "epoch": 0.6125321755198097, + "grad_norm": 8.735197098985736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126300 + }, + { + "epoch": 0.6125806737126458, + "grad_norm": 8.063702949812068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126310 + }, + { + "epoch": 0.6126291719054818, + "grad_norm": 7.845002159001524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126320 + }, + { + "epoch": 0.612677670098318, + "grad_norm": 7.923235756379654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126330 + }, + { + "epoch": 0.612726168291154, + "grad_norm": 8.970937415142544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126340 + }, + { + "epoch": 0.6127746664839901, + "grad_norm": 9.108559595460974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126350 + }, + { + "epoch": 0.6128231646768262, + "grad_norm": 7.935614831922067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126360 + }, + { + "epoch": 0.6128716628696623, + "grad_norm": 7.804819546208819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126370 + }, + { + "epoch": 0.6129201610624984, + "grad_norm": 7.961230608088954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126380 + }, + { + "epoch": 0.6129686592553345, + "grad_norm": 8.843729659702149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126390 + }, + { + "epoch": 0.6130171574481705, + "grad_norm": 9.312367410529987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126400 + }, + { + "epoch": 0.6130656556410067, + "grad_norm": 8.091187453374005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126410 + }, + { + "epoch": 0.6131141538338427, + "grad_norm": 8.661354655714604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126420 + }, + { + "epoch": 0.6131626520266789, + "grad_norm": 7.66773666782683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126430 + }, + { + "epoch": 0.613211150219515, + "grad_norm": 8.604420287383618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126440 + }, + { + "epoch": 0.613259648412351, + "grad_norm": 8.94315590471706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126450 + }, + { + "epoch": 0.6133081466051872, + "grad_norm": 1.1152028633887312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126460 + }, + { + "epoch": 0.6133566447980232, + "grad_norm": 7.668742796340666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126470 + }, + { + "epoch": 0.6134051429908594, + "grad_norm": 8.35429645462682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126480 + }, + { + "epoch": 0.6134536411836954, + "grad_norm": 8.535613460480818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126490 + }, + { + "epoch": 0.6135021393765315, + "grad_norm": 9.115897370293169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126500 + }, + { + "epoch": 0.6135506375693676, + "grad_norm": 7.787848232965189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126510 + }, + { + "epoch": 0.6135991357622037, + "grad_norm": 9.001147560638856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126520 + }, + { + "epoch": 0.6136476339550397, + "grad_norm": 7.541053292925426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126530 + }, + { + "epoch": 0.6136961321478759, + "grad_norm": 8.652664007513522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126540 + }, + { + "epoch": 0.6137446303407119, + "grad_norm": 8.516313698692102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126550 + }, + { + "epoch": 0.6137931285335481, + "grad_norm": 7.420231895594043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126560 + }, + { + "epoch": 0.6138416267263841, + "grad_norm": 7.886796993261669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126570 + }, + { + "epoch": 0.6138901249192202, + "grad_norm": 7.613790131699716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126580 + }, + { + "epoch": 0.6139386231120563, + "grad_norm": 9.077014340164169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126590 + }, + { + "epoch": 0.6139871213048924, + "grad_norm": 1.209755566833337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126600 + }, + { + "epoch": 0.6140356194977284, + "grad_norm": 7.59418199436368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126610 + }, + { + "epoch": 0.6140841176905646, + "grad_norm": 7.734444551488195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126620 + }, + { + "epoch": 0.6141326158834006, + "grad_norm": 7.6685964245371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126630 + }, + { + "epoch": 0.6141811140762368, + "grad_norm": 8.20074177454444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126640 + }, + { + "epoch": 0.6142296122690728, + "grad_norm": 8.581047694633526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126650 + }, + { + "epoch": 0.6142781104619089, + "grad_norm": 7.857089912022275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126660 + }, + { + "epoch": 0.614326608654745, + "grad_norm": 7.5182661873896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126670 + }, + { + "epoch": 0.6143751068475811, + "grad_norm": 7.632198872897789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126680 + }, + { + "epoch": 0.6144236050404172, + "grad_norm": 2.866703141535254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126690 + }, + { + "epoch": 0.6144721032332533, + "grad_norm": 3.295389205959509e-07, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 126700 + }, + { + "epoch": 0.6145206014260893, + "grad_norm": 3.2059506338555366e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 126710 + }, + { + "epoch": 0.6145690996189255, + "grad_norm": 3.5383138310862705e-05, + "learning_rate": 0.0002, + "loss": 0.0021, + "step": 126720 + }, + { + "epoch": 0.6146175978117615, + "grad_norm": 5.210918970988132e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 126730 + }, + { + "epoch": 0.6146660960045977, + "grad_norm": 0.0021872171200811863, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 126740 + }, + { + "epoch": 0.6147145941974337, + "grad_norm": 0.0003195228346157819, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126750 + }, + { + "epoch": 0.6147630923902698, + "grad_norm": 0.0015930619556456804, + "learning_rate": 0.0002, + "loss": 0.0043, + "step": 126760 + }, + { + "epoch": 0.6148115905831059, + "grad_norm": 0.04461178556084633, + "learning_rate": 0.0002, + "loss": 0.005, + "step": 126770 + }, + { + "epoch": 0.614860088775942, + "grad_norm": 0.0833456888794899, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 126780 + }, + { + "epoch": 0.614908586968778, + "grad_norm": 6.411808863049373e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 126790 + }, + { + "epoch": 0.6149570851616142, + "grad_norm": 3.267780630267225e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126800 + }, + { + "epoch": 0.6150055833544502, + "grad_norm": 8.645550406072289e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126810 + }, + { + "epoch": 0.6150540815472864, + "grad_norm": 0.00033162705949507654, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126820 + }, + { + "epoch": 0.6151025797401224, + "grad_norm": 3.3350563171552494e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126830 + }, + { + "epoch": 0.6151510779329585, + "grad_norm": 7.368863407464232e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126840 + }, + { + "epoch": 0.6151995761257946, + "grad_norm": 1.0432745511934627e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126850 + }, + { + "epoch": 0.6152480743186307, + "grad_norm": 1.3310021131474059e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126860 + }, + { + "epoch": 0.6152965725114667, + "grad_norm": 1.3839724488207139e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126870 + }, + { + "epoch": 0.6153450707043029, + "grad_norm": 1.1767491741920821e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126880 + }, + { + "epoch": 0.6153935688971389, + "grad_norm": 5.719186447095126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126890 + }, + { + "epoch": 0.6154420670899751, + "grad_norm": 5.619966486847261e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126900 + }, + { + "epoch": 0.6154905652828111, + "grad_norm": 1.0174319868383463e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126910 + }, + { + "epoch": 0.6155390634756472, + "grad_norm": 1.030172916216543e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126920 + }, + { + "epoch": 0.6155875616684833, + "grad_norm": 9.18114346859511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126930 + }, + { + "epoch": 0.6156360598613194, + "grad_norm": 0.03958454355597496, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 126940 + }, + { + "epoch": 0.6156845580541556, + "grad_norm": 9.138100722339004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126950 + }, + { + "epoch": 0.6157330562469916, + "grad_norm": 1.9456596419331618e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126960 + }, + { + "epoch": 0.6157815544398277, + "grad_norm": 3.851959263556637e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126970 + }, + { + "epoch": 0.6158300526326638, + "grad_norm": 3.335396104375832e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126980 + }, + { + "epoch": 0.6158785508254999, + "grad_norm": 2.2268097382038832e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 126990 + }, + { + "epoch": 0.615927049018336, + "grad_norm": 1.4922644368198235e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127000 + }, + { + "epoch": 0.6159755472111721, + "grad_norm": 1.9546256226021796e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127010 + }, + { + "epoch": 0.6160240454040081, + "grad_norm": 1.6969566786428913e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127020 + }, + { + "epoch": 0.6160725435968443, + "grad_norm": 1.619183058210183e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127030 + }, + { + "epoch": 0.6161210417896803, + "grad_norm": 1.4047067452338524e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127040 + }, + { + "epoch": 0.6161695399825164, + "grad_norm": 1.1247354450460989e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127050 + }, + { + "epoch": 0.6162180381753525, + "grad_norm": 1.1421023373259231e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127060 + }, + { + "epoch": 0.6162665363681886, + "grad_norm": 1.0931441465800162e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127070 + }, + { + "epoch": 0.6163150345610247, + "grad_norm": 9.914362635754514e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127080 + }, + { + "epoch": 0.6163635327538608, + "grad_norm": 2.3948474336066283e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127090 + }, + { + "epoch": 0.6164120309466968, + "grad_norm": 7.429257493640762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127100 + }, + { + "epoch": 0.616460529139533, + "grad_norm": 1.1499885658849962e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127110 + }, + { + "epoch": 0.616509027332369, + "grad_norm": 9.216836588166188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127120 + }, + { + "epoch": 0.6165575255252052, + "grad_norm": 7.4516442509775516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127130 + }, + { + "epoch": 0.6166060237180412, + "grad_norm": 7.16099339115317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127140 + }, + { + "epoch": 0.6166545219108773, + "grad_norm": 2.4627626771689393e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127150 + }, + { + "epoch": 0.6167030201037134, + "grad_norm": 8.295913175970782e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127160 + }, + { + "epoch": 0.6167515182965495, + "grad_norm": 6.991744157858193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127170 + }, + { + "epoch": 0.6168000164893855, + "grad_norm": 6.294565082498593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127180 + }, + { + "epoch": 0.6168485146822217, + "grad_norm": 6.144799499452347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127190 + }, + { + "epoch": 0.6168970128750577, + "grad_norm": 5.5499672271253075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127200 + }, + { + "epoch": 0.6169455110678939, + "grad_norm": 5.320889613358304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127210 + }, + { + "epoch": 0.6169940092607299, + "grad_norm": 6.06137336944812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127220 + }, + { + "epoch": 0.617042507453566, + "grad_norm": 5.2003028940816876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127230 + }, + { + "epoch": 0.6170910056464021, + "grad_norm": 4.465862730285153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127240 + }, + { + "epoch": 0.6171395038392382, + "grad_norm": 4.726007773570018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127250 + }, + { + "epoch": 0.6171880020320742, + "grad_norm": 4.3268914851068985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127260 + }, + { + "epoch": 0.6172365002249104, + "grad_norm": 5.897731170989573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127270 + }, + { + "epoch": 0.6172849984177464, + "grad_norm": 5.2411651267902926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127280 + }, + { + "epoch": 0.6173334966105826, + "grad_norm": 4.721837740362389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127290 + }, + { + "epoch": 0.6173819948034186, + "grad_norm": 3.696700332511682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127300 + }, + { + "epoch": 0.6174304929962547, + "grad_norm": 4.259426532371435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127310 + }, + { + "epoch": 0.6174789911890908, + "grad_norm": 4.646101842809003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127320 + }, + { + "epoch": 0.6175274893819269, + "grad_norm": 4.396286385599524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127330 + }, + { + "epoch": 0.617575987574763, + "grad_norm": 3.540232683008071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127340 + }, + { + "epoch": 0.6176244857675991, + "grad_norm": 3.432952098592068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127350 + }, + { + "epoch": 0.6176729839604351, + "grad_norm": 4.178371909802081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127360 + }, + { + "epoch": 0.6177214821532713, + "grad_norm": 3.9977826418180484e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 127370 + }, + { + "epoch": 0.6177699803461073, + "grad_norm": 6.397133347491035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127380 + }, + { + "epoch": 0.6178184785389434, + "grad_norm": 8.113023795885965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127390 + }, + { + "epoch": 0.6178669767317795, + "grad_norm": 9.449202480027452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127400 + }, + { + "epoch": 0.6179154749246156, + "grad_norm": 1.0117004421772435e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127410 + }, + { + "epoch": 0.6179639731174517, + "grad_norm": 1.1020714737242088e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127420 + }, + { + "epoch": 0.6180124713102878, + "grad_norm": 1.0831121471710503e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127430 + }, + { + "epoch": 0.6180609695031238, + "grad_norm": 1.99766054720385e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127440 + }, + { + "epoch": 0.61810946769596, + "grad_norm": 8.40845223137876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127450 + }, + { + "epoch": 0.6181579658887961, + "grad_norm": 8.619823347544298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127460 + }, + { + "epoch": 0.6182064640816322, + "grad_norm": 8.380928193219006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127470 + }, + { + "epoch": 0.6182549622744683, + "grad_norm": 8.705666004971135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127480 + }, + { + "epoch": 0.6183034604673043, + "grad_norm": 6.707755801471649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127490 + }, + { + "epoch": 0.6183519586601405, + "grad_norm": 8.123791303660255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127500 + }, + { + "epoch": 0.6184004568529765, + "grad_norm": 9.183098882203922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127510 + }, + { + "epoch": 0.6184489550458127, + "grad_norm": 8.026244358916301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127520 + }, + { + "epoch": 0.6184974532386487, + "grad_norm": 7.735747203696519e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127530 + }, + { + "epoch": 0.6185459514314848, + "grad_norm": 6.101689905335661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127540 + }, + { + "epoch": 0.6185944496243209, + "grad_norm": 5.23195558344014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127550 + }, + { + "epoch": 0.618642947817157, + "grad_norm": 1.0165738785872236e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127560 + }, + { + "epoch": 0.618691446009993, + "grad_norm": 6.797084552090382e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127570 + }, + { + "epoch": 0.6187399442028292, + "grad_norm": 6.660911822109483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127580 + }, + { + "epoch": 0.6187884423956652, + "grad_norm": 4.8745628191682044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127590 + }, + { + "epoch": 0.6188369405885014, + "grad_norm": 5.017967851017602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127600 + }, + { + "epoch": 0.6188854387813374, + "grad_norm": 6.268012384680333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127610 + }, + { + "epoch": 0.6189339369741735, + "grad_norm": 6.20962237007916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127620 + }, + { + "epoch": 0.6189824351670096, + "grad_norm": 6.1896448642073665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127630 + }, + { + "epoch": 0.6190309333598457, + "grad_norm": 4.736011305794818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127640 + }, + { + "epoch": 0.6190794315526817, + "grad_norm": 4.9285526984022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127650 + }, + { + "epoch": 0.6191279297455179, + "grad_norm": 5.759889518230921e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127660 + }, + { + "epoch": 0.6191764279383539, + "grad_norm": 6.098873654991621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127670 + }, + { + "epoch": 0.6192249261311901, + "grad_norm": 5.861557838215958e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127680 + }, + { + "epoch": 0.6192734243240261, + "grad_norm": 3.927923444280168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127690 + }, + { + "epoch": 0.6193219225168622, + "grad_norm": 0.05579191818833351, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 127700 + }, + { + "epoch": 0.6193704207096983, + "grad_norm": 5.9113852330483496e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127710 + }, + { + "epoch": 0.6194189189025344, + "grad_norm": 0.0001170186951640062, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127720 + }, + { + "epoch": 0.6194674170953705, + "grad_norm": 7.198461844382109e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127730 + }, + { + "epoch": 0.6195159152882066, + "grad_norm": 6.090165697969496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127740 + }, + { + "epoch": 0.6195644134810426, + "grad_norm": 5.9208236962149385e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 127750 + }, + { + "epoch": 0.6196129116738788, + "grad_norm": 8.017631625989452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127760 + }, + { + "epoch": 0.6196614098667148, + "grad_norm": 8.878440894477535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127770 + }, + { + "epoch": 0.619709908059551, + "grad_norm": 9.16086992219789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127780 + }, + { + "epoch": 0.619758406252387, + "grad_norm": 1.1511824595800135e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127790 + }, + { + "epoch": 0.6198069044452231, + "grad_norm": 1.0226061931462027e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127800 + }, + { + "epoch": 0.6198554026380592, + "grad_norm": 8.961134881246835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127810 + }, + { + "epoch": 0.6199039008308953, + "grad_norm": 7.633620953129139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127820 + }, + { + "epoch": 0.6199523990237313, + "grad_norm": 7.549199835921172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127830 + }, + { + "epoch": 0.6200008972165675, + "grad_norm": 1.0410666618554387e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127840 + }, + { + "epoch": 0.6200493954094035, + "grad_norm": 1.4806606486672536e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127850 + }, + { + "epoch": 0.6200978936022397, + "grad_norm": 7.196562364697456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127860 + }, + { + "epoch": 0.6201463917950757, + "grad_norm": 6.880259206809569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127870 + }, + { + "epoch": 0.6201948899879118, + "grad_norm": 6.932169526407961e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127880 + }, + { + "epoch": 0.6202433881807479, + "grad_norm": 7.97439042798942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127890 + }, + { + "epoch": 0.620291886373584, + "grad_norm": 1.5024397725937888e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127900 + }, + { + "epoch": 0.62034038456642, + "grad_norm": 5.851244168297853e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127910 + }, + { + "epoch": 0.6203888827592562, + "grad_norm": 8.394058568228502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127920 + }, + { + "epoch": 0.6204373809520922, + "grad_norm": 5.946411874901969e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127930 + }, + { + "epoch": 0.6204858791449284, + "grad_norm": 6.18421927356394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127940 + }, + { + "epoch": 0.6205343773377644, + "grad_norm": 5.0957642088178545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127950 + }, + { + "epoch": 0.6205828755306005, + "grad_norm": 4.864215497946134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127960 + }, + { + "epoch": 0.6206313737234367, + "grad_norm": 4.838036602450302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127970 + }, + { + "epoch": 0.6206798719162727, + "grad_norm": 5.397811492002802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127980 + }, + { + "epoch": 0.6207283701091089, + "grad_norm": 4.995500148652354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 127990 + }, + { + "epoch": 0.6207768683019449, + "grad_norm": 4.128428827243624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128000 + }, + { + "epoch": 0.620825366494781, + "grad_norm": 4.312254986871267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128010 + }, + { + "epoch": 0.6208738646876171, + "grad_norm": 4.341464318713406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128020 + }, + { + "epoch": 0.6209223628804532, + "grad_norm": 4.835675554204499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128030 + }, + { + "epoch": 0.6209708610732892, + "grad_norm": 4.3271638787700795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128040 + }, + { + "epoch": 0.6210193592661254, + "grad_norm": 4.138719759794185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128050 + }, + { + "epoch": 0.6210678574589614, + "grad_norm": 4.392830305732787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128060 + }, + { + "epoch": 0.6211163556517976, + "grad_norm": 4.375094249553513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128070 + }, + { + "epoch": 0.6211648538446336, + "grad_norm": 4.051476935273968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128080 + }, + { + "epoch": 0.6212133520374697, + "grad_norm": 3.6476469631452346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128090 + }, + { + "epoch": 0.6212618502303058, + "grad_norm": 4.027035174658522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128100 + }, + { + "epoch": 0.6213103484231419, + "grad_norm": 3.4931249501823913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128110 + }, + { + "epoch": 0.621358846615978, + "grad_norm": 4.22988068748964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128120 + }, + { + "epoch": 0.6214073448088141, + "grad_norm": 3.652009127108613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128130 + }, + { + "epoch": 0.6214558430016501, + "grad_norm": 3.781804252867005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128140 + }, + { + "epoch": 0.6215043411944863, + "grad_norm": 4.069801434525289e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128150 + }, + { + "epoch": 0.6215528393873223, + "grad_norm": 3.948210178350564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128160 + }, + { + "epoch": 0.6216013375801585, + "grad_norm": 3.47544664691668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128170 + }, + { + "epoch": 0.6216498357729945, + "grad_norm": 3.3565240755706327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128180 + }, + { + "epoch": 0.6216983339658306, + "grad_norm": 3.4192878501926316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128190 + }, + { + "epoch": 0.6217468321586667, + "grad_norm": 3.887201273755636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128200 + }, + { + "epoch": 0.6217953303515028, + "grad_norm": 3.0667040391563205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128210 + }, + { + "epoch": 0.6218438285443388, + "grad_norm": 3.2899552024900913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128220 + }, + { + "epoch": 0.621892326737175, + "grad_norm": 3.947430286643794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128230 + }, + { + "epoch": 0.621940824930011, + "grad_norm": 3.0045059702388244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128240 + }, + { + "epoch": 0.6219893231228472, + "grad_norm": 2.760800043688505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128250 + }, + { + "epoch": 0.6220378213156832, + "grad_norm": 3.252851456636563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128260 + }, + { + "epoch": 0.6220863195085193, + "grad_norm": 3.0501719265885185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128270 + }, + { + "epoch": 0.6221348177013554, + "grad_norm": 3.1548947845294606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128280 + }, + { + "epoch": 0.6221833158941915, + "grad_norm": 2.993192765643471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128290 + }, + { + "epoch": 0.6222318140870275, + "grad_norm": 2.687407686607912e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128300 + }, + { + "epoch": 0.6222803122798637, + "grad_norm": 2.6186005470663076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128310 + }, + { + "epoch": 0.6223288104726997, + "grad_norm": 2.871781362046022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128320 + }, + { + "epoch": 0.6223773086655359, + "grad_norm": 3.4992942801181925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128330 + }, + { + "epoch": 0.6224258068583719, + "grad_norm": 2.7231683361605974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128340 + }, + { + "epoch": 0.622474305051208, + "grad_norm": 2.48448054662731e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128350 + }, + { + "epoch": 0.6225228032440441, + "grad_norm": 1.9272920326329768e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128360 + }, + { + "epoch": 0.6225713014368802, + "grad_norm": 2.747920916590374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128370 + }, + { + "epoch": 0.6226197996297163, + "grad_norm": 2.398520791757619e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128380 + }, + { + "epoch": 0.6226682978225524, + "grad_norm": 2.6199900275969412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128390 + }, + { + "epoch": 0.6227167960153884, + "grad_norm": 0.00016578521172050387, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128400 + }, + { + "epoch": 0.6227652942082246, + "grad_norm": 2.447047108944389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128410 + }, + { + "epoch": 0.6228137924010606, + "grad_norm": 2.621183739393018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128420 + }, + { + "epoch": 0.6228622905938968, + "grad_norm": 2.523582224966958e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128430 + }, + { + "epoch": 0.6229107887867328, + "grad_norm": 2.2705669380229665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128440 + }, + { + "epoch": 0.6229592869795689, + "grad_norm": 2.1246801225061063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128450 + }, + { + "epoch": 0.623007785172405, + "grad_norm": 2.3082579900801647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128460 + }, + { + "epoch": 0.6230562833652411, + "grad_norm": 2.259717348351842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128470 + }, + { + "epoch": 0.6231047815580772, + "grad_norm": 2.3379245703836204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128480 + }, + { + "epoch": 0.6231532797509133, + "grad_norm": 2.0532713733700803e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128490 + }, + { + "epoch": 0.6232017779437494, + "grad_norm": 2.446463895466877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128500 + }, + { + "epoch": 0.6232502761365855, + "grad_norm": 2.1140085664228536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128510 + }, + { + "epoch": 0.6232987743294216, + "grad_norm": 2.029239794865134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128520 + }, + { + "epoch": 0.6233472725222576, + "grad_norm": 2.0886268430331256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128530 + }, + { + "epoch": 0.6233957707150938, + "grad_norm": 1.8663325818124576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128540 + }, + { + "epoch": 0.6234442689079298, + "grad_norm": 1.8708545894696726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128550 + }, + { + "epoch": 0.623492767100766, + "grad_norm": 2.2433641788666137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128560 + }, + { + "epoch": 0.623541265293602, + "grad_norm": 1.8916890667242114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128570 + }, + { + "epoch": 0.6235897634864381, + "grad_norm": 2.0520305952231865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128580 + }, + { + "epoch": 0.6236382616792742, + "grad_norm": 1.856946596490161e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128590 + }, + { + "epoch": 0.6236867598721103, + "grad_norm": 2.0256566131138243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128600 + }, + { + "epoch": 0.6237352580649463, + "grad_norm": 2.2233878098631976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128610 + }, + { + "epoch": 0.6237837562577825, + "grad_norm": 1.860186671365227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128620 + }, + { + "epoch": 0.6238322544506185, + "grad_norm": 1.9915594293706818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128630 + }, + { + "epoch": 0.6238807526434547, + "grad_norm": 1.6604508346063085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128640 + }, + { + "epoch": 0.6239292508362907, + "grad_norm": 2.005732994803111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128650 + }, + { + "epoch": 0.6239777490291268, + "grad_norm": 1.8025217514150427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128660 + }, + { + "epoch": 0.6240262472219629, + "grad_norm": 1.6907130202525877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128670 + }, + { + "epoch": 0.624074745414799, + "grad_norm": 1.8885131112256204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128680 + }, + { + "epoch": 0.624123243607635, + "grad_norm": 1.85971498467552e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128690 + }, + { + "epoch": 0.6241717418004712, + "grad_norm": 1.7673622778602294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128700 + }, + { + "epoch": 0.6242202399933072, + "grad_norm": 2.009273157455027e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128710 + }, + { + "epoch": 0.6242687381861434, + "grad_norm": 1.7521033441880718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128720 + }, + { + "epoch": 0.6243172363789794, + "grad_norm": 1.677767613728065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128730 + }, + { + "epoch": 0.6243657345718155, + "grad_norm": 1.8668932852961007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128740 + }, + { + "epoch": 0.6244142327646516, + "grad_norm": 1.5741860579510103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128750 + }, + { + "epoch": 0.6244627309574877, + "grad_norm": 1.656075596656592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128760 + }, + { + "epoch": 0.6245112291503238, + "grad_norm": 1.6738993053877493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128770 + }, + { + "epoch": 0.6245597273431599, + "grad_norm": 1.5903331131994491e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128780 + }, + { + "epoch": 0.6246082255359959, + "grad_norm": 2.029956249316456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128790 + }, + { + "epoch": 0.6246567237288321, + "grad_norm": 1.5107660829016822e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128800 + }, + { + "epoch": 0.6247052219216681, + "grad_norm": 1.497479047429806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128810 + }, + { + "epoch": 0.6247537201145043, + "grad_norm": 1.5117570910661016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128820 + }, + { + "epoch": 0.6248022183073403, + "grad_norm": 1.4739442804057035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128830 + }, + { + "epoch": 0.6248507165001764, + "grad_norm": 1.356633674731711e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128840 + }, + { + "epoch": 0.6248992146930125, + "grad_norm": 1.4815532267675735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128850 + }, + { + "epoch": 0.6249477128858486, + "grad_norm": 8.906913717510179e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 128860 + }, + { + "epoch": 0.6249962110786846, + "grad_norm": 0.0031454653944820166, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128870 + }, + { + "epoch": 0.6250447092715208, + "grad_norm": 1.6786349078756757e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128880 + }, + { + "epoch": 0.6250932074643568, + "grad_norm": 2.1587411538348533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128890 + }, + { + "epoch": 0.625141705657193, + "grad_norm": 2.936312512247241e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128900 + }, + { + "epoch": 0.625190203850029, + "grad_norm": 5.096631412015995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128910 + }, + { + "epoch": 0.6252387020428651, + "grad_norm": 9.328367923444603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128920 + }, + { + "epoch": 0.6252872002357012, + "grad_norm": 4.761151103593875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128930 + }, + { + "epoch": 0.6253356984285373, + "grad_norm": 2.3594316189701203e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128940 + }, + { + "epoch": 0.6253841966213733, + "grad_norm": 2.4294415652548196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128950 + }, + { + "epoch": 0.6254326948142095, + "grad_norm": 3.848307187581668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128960 + }, + { + "epoch": 0.6254811930070455, + "grad_norm": 3.802297214861028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128970 + }, + { + "epoch": 0.6255296911998817, + "grad_norm": 3.837338226730935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128980 + }, + { + "epoch": 0.6255781893927177, + "grad_norm": 2.601294454507297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 128990 + }, + { + "epoch": 0.6256266875855538, + "grad_norm": 1.807574903978093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129000 + }, + { + "epoch": 0.62567518577839, + "grad_norm": 2.417530140519375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129010 + }, + { + "epoch": 0.625723683971226, + "grad_norm": 2.3232100829773117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129020 + }, + { + "epoch": 0.6257721821640622, + "grad_norm": 6.56979818813852e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129030 + }, + { + "epoch": 0.6258206803568982, + "grad_norm": 1.4192463595463778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129040 + }, + { + "epoch": 0.6258691785497343, + "grad_norm": 1.925855258377851e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129050 + }, + { + "epoch": 0.6259176767425704, + "grad_norm": 2.2023514247848652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129060 + }, + { + "epoch": 0.6259661749354065, + "grad_norm": 3.1597496672475245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129070 + }, + { + "epoch": 0.6260146731282425, + "grad_norm": 2.1803109575557755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129080 + }, + { + "epoch": 0.6260631713210787, + "grad_norm": 1.6633342738714418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129090 + }, + { + "epoch": 0.6261116695139147, + "grad_norm": 1.4993064496593433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129100 + }, + { + "epoch": 0.6261601677067509, + "grad_norm": 1.9046705119762919e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129110 + }, + { + "epoch": 0.6262086658995869, + "grad_norm": 1.4395776815945283e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129120 + }, + { + "epoch": 0.626257164092423, + "grad_norm": 1.8337241272092797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129130 + }, + { + "epoch": 0.6263056622852591, + "grad_norm": 1.4702526414112072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129140 + }, + { + "epoch": 0.6263541604780952, + "grad_norm": 1.454445850868069e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129150 + }, + { + "epoch": 0.6264026586709313, + "grad_norm": 2.529438916099025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129160 + }, + { + "epoch": 0.6264511568637674, + "grad_norm": 3.447040398896206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129170 + }, + { + "epoch": 0.6264996550566034, + "grad_norm": 2.1833354821865214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129180 + }, + { + "epoch": 0.6265481532494396, + "grad_norm": 1.8593974573377636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129190 + }, + { + "epoch": 0.6265966514422756, + "grad_norm": 1.4004556305735605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129200 + }, + { + "epoch": 0.6266451496351118, + "grad_norm": 1.6459737253171625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129210 + }, + { + "epoch": 0.6266936478279478, + "grad_norm": 1.8540910105002695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129220 + }, + { + "epoch": 0.6267421460207839, + "grad_norm": 2.101239942930988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129230 + }, + { + "epoch": 0.62679064421362, + "grad_norm": 1.0603795317365439e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129240 + }, + { + "epoch": 0.6268391424064561, + "grad_norm": 1.559849920340639e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129250 + }, + { + "epoch": 0.6268876405992921, + "grad_norm": 1.6298042737616925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129260 + }, + { + "epoch": 0.6269361387921283, + "grad_norm": 1.41804071063234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129270 + }, + { + "epoch": 0.6269846369849643, + "grad_norm": 1.6605430346317007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129280 + }, + { + "epoch": 0.6270331351778005, + "grad_norm": 1.2578432233567582e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129290 + }, + { + "epoch": 0.6270816333706365, + "grad_norm": 1.1112845186289633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129300 + }, + { + "epoch": 0.6271301315634726, + "grad_norm": 2.8845263386756415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129310 + }, + { + "epoch": 0.6271786297563087, + "grad_norm": 1.6069873254309641e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129320 + }, + { + "epoch": 0.6272271279491448, + "grad_norm": 1.8850876131182304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129330 + }, + { + "epoch": 0.6272756261419808, + "grad_norm": 1.2586154980454012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129340 + }, + { + "epoch": 0.627324124334817, + "grad_norm": 1.2648631582123926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129350 + }, + { + "epoch": 0.627372622527653, + "grad_norm": 1.434784394405142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129360 + }, + { + "epoch": 0.6274211207204892, + "grad_norm": 1.5894897842372302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129370 + }, + { + "epoch": 0.6274696189133252, + "grad_norm": 1.4763229501113528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129380 + }, + { + "epoch": 0.6275181171061613, + "grad_norm": 9.259072726308659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129390 + }, + { + "epoch": 0.6275666152989974, + "grad_norm": 1.0875753559957957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129400 + }, + { + "epoch": 0.6276151134918335, + "grad_norm": 1.4799094287809567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129410 + }, + { + "epoch": 0.6276636116846696, + "grad_norm": 4.982269092579372e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129420 + }, + { + "epoch": 0.6277121098775057, + "grad_norm": 1.415079168509692e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129430 + }, + { + "epoch": 0.6277606080703417, + "grad_norm": 1.5418302155012498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129440 + }, + { + "epoch": 0.6278091062631779, + "grad_norm": 1.7988329545914894e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129450 + }, + { + "epoch": 0.6278576044560139, + "grad_norm": 4.123379312659381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129460 + }, + { + "epoch": 0.62790610264885, + "grad_norm": 1.267036395802279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129470 + }, + { + "epoch": 0.6279546008416861, + "grad_norm": 1.3150112181392615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129480 + }, + { + "epoch": 0.6280030990345222, + "grad_norm": 9.279105483983585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129490 + }, + { + "epoch": 0.6280515972273583, + "grad_norm": 1.0186811323364964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129500 + }, + { + "epoch": 0.6281000954201944, + "grad_norm": 1.2988452908757608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129510 + }, + { + "epoch": 0.6281485936130305, + "grad_norm": 1.3327826309250668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129520 + }, + { + "epoch": 0.6281970918058666, + "grad_norm": 1.00166323591111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129530 + }, + { + "epoch": 0.6282455899987027, + "grad_norm": 9.395848792337347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129540 + }, + { + "epoch": 0.6282940881915388, + "grad_norm": 1.1073312862208695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129550 + }, + { + "epoch": 0.6283425863843749, + "grad_norm": 9.719157105791965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129560 + }, + { + "epoch": 0.6283910845772109, + "grad_norm": 1.0682351785362698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129570 + }, + { + "epoch": 0.6284395827700471, + "grad_norm": 1.017202521325089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129580 + }, + { + "epoch": 0.6284880809628831, + "grad_norm": 7.413918865495361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129590 + }, + { + "epoch": 0.6285365791557193, + "grad_norm": 9.008965093926236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129600 + }, + { + "epoch": 0.6285850773485553, + "grad_norm": 1.3386405726123485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129610 + }, + { + "epoch": 0.6286335755413914, + "grad_norm": 1.7722322809277102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129620 + }, + { + "epoch": 0.6286820737342275, + "grad_norm": 9.341734994450235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129630 + }, + { + "epoch": 0.6287305719270636, + "grad_norm": 1.0087027249028324e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129640 + }, + { + "epoch": 0.6287790701198996, + "grad_norm": 6.514334245366626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129650 + }, + { + "epoch": 0.6288275683127358, + "grad_norm": 1.030705675475474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129660 + }, + { + "epoch": 0.6288760665055718, + "grad_norm": 1.1200646667930414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129670 + }, + { + "epoch": 0.628924564698408, + "grad_norm": 9.130053513217717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129680 + }, + { + "epoch": 0.628973062891244, + "grad_norm": 9.605554396330263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129690 + }, + { + "epoch": 0.6290215610840801, + "grad_norm": 6.901111078150279e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129700 + }, + { + "epoch": 0.6290700592769162, + "grad_norm": 8.912788302950503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129710 + }, + { + "epoch": 0.6291185574697523, + "grad_norm": 9.410267125531391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129720 + }, + { + "epoch": 0.6291670556625883, + "grad_norm": 8.935544997257239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129730 + }, + { + "epoch": 0.6292155538554245, + "grad_norm": 7.771784567012219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129740 + }, + { + "epoch": 0.6292640520482605, + "grad_norm": 9.99185999717156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129750 + }, + { + "epoch": 0.6293125502410967, + "grad_norm": 9.700245300336974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129760 + }, + { + "epoch": 0.6293610484339327, + "grad_norm": 1.1077122508140747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129770 + }, + { + "epoch": 0.6294095466267688, + "grad_norm": 9.377499736729078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129780 + }, + { + "epoch": 0.6294580448196049, + "grad_norm": 6.635121394538146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129790 + }, + { + "epoch": 0.629506543012441, + "grad_norm": 6.726510264343233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129800 + }, + { + "epoch": 0.629555041205277, + "grad_norm": 8.977582410807372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129810 + }, + { + "epoch": 0.6296035393981132, + "grad_norm": 8.66493849116523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129820 + }, + { + "epoch": 0.6296520375909492, + "grad_norm": 8.797076702649065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129830 + }, + { + "epoch": 0.6297005357837854, + "grad_norm": 7.028638151496125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129840 + }, + { + "epoch": 0.6297490339766214, + "grad_norm": 7.522405667259591e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129850 + }, + { + "epoch": 0.6297975321694576, + "grad_norm": 1.027822918331367e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129860 + }, + { + "epoch": 0.6298460303622936, + "grad_norm": 9.125616884375631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129870 + }, + { + "epoch": 0.6298945285551297, + "grad_norm": 8.035358973756956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129880 + }, + { + "epoch": 0.6299430267479658, + "grad_norm": 1.0243796850772924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129890 + }, + { + "epoch": 0.6299915249408019, + "grad_norm": 6.574227313649317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129900 + }, + { + "epoch": 0.6300400231336379, + "grad_norm": 8.399830221605953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129910 + }, + { + "epoch": 0.6300885213264741, + "grad_norm": 1.0342098448745674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129920 + }, + { + "epoch": 0.6301370195193101, + "grad_norm": 2.02020896722388e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129930 + }, + { + "epoch": 0.6301855177121463, + "grad_norm": 8.294655913232418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129940 + }, + { + "epoch": 0.6302340159049823, + "grad_norm": 6.585298706340836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129950 + }, + { + "epoch": 0.6302825140978184, + "grad_norm": 1.7740446764946682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129960 + }, + { + "epoch": 0.6303310122906545, + "grad_norm": 8.249242000601953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129970 + }, + { + "epoch": 0.6303795104834906, + "grad_norm": 7.20738057680137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129980 + }, + { + "epoch": 0.6304280086763266, + "grad_norm": 6.843530968581035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 129990 + }, + { + "epoch": 0.6304765068691628, + "grad_norm": 6.100485734350514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130000 + }, + { + "epoch": 0.6305250050619988, + "grad_norm": 8.045364552344836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130010 + }, + { + "epoch": 0.630573503254835, + "grad_norm": 6.894965167703049e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130020 + }, + { + "epoch": 0.6306220014476711, + "grad_norm": 1.0860214842978166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130030 + }, + { + "epoch": 0.6306704996405071, + "grad_norm": 1.9593126125982963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130040 + }, + { + "epoch": 0.6307189978333433, + "grad_norm": 5.854273581462621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130050 + }, + { + "epoch": 0.6307674960261793, + "grad_norm": 6.87053102410573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130060 + }, + { + "epoch": 0.6308159942190155, + "grad_norm": 6.764871613995638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130070 + }, + { + "epoch": 0.6308644924118515, + "grad_norm": 6.916089319020102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130080 + }, + { + "epoch": 0.6309129906046876, + "grad_norm": 7.104016503944877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130090 + }, + { + "epoch": 0.6309614887975237, + "grad_norm": 6.204162446010741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130100 + }, + { + "epoch": 0.6310099869903598, + "grad_norm": 6.293980163718516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130110 + }, + { + "epoch": 0.6310584851831958, + "grad_norm": 8.095649377537484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130120 + }, + { + "epoch": 0.631106983376032, + "grad_norm": 9.098029636334104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130130 + }, + { + "epoch": 0.631155481568868, + "grad_norm": 7.121156500033976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130140 + }, + { + "epoch": 0.6312039797617042, + "grad_norm": 5.081148515273526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130150 + }, + { + "epoch": 0.6312524779545402, + "grad_norm": 5.568569463321182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130160 + }, + { + "epoch": 0.6313009761473763, + "grad_norm": 5.738471031691006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130170 + }, + { + "epoch": 0.6313494743402124, + "grad_norm": 6.326900461317564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130180 + }, + { + "epoch": 0.6313979725330485, + "grad_norm": 5.319134288583882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130190 + }, + { + "epoch": 0.6314464707258846, + "grad_norm": 4.834245146412286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130200 + }, + { + "epoch": 0.6314949689187207, + "grad_norm": 6.685781386295275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130210 + }, + { + "epoch": 0.6315434671115567, + "grad_norm": 6.838366743977531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130220 + }, + { + "epoch": 0.6315919653043929, + "grad_norm": 5.764848083344987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130230 + }, + { + "epoch": 0.6316404634972289, + "grad_norm": 6.562504495377652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130240 + }, + { + "epoch": 0.631688961690065, + "grad_norm": 4.844534373660281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130250 + }, + { + "epoch": 0.6317374598829011, + "grad_norm": 5.537969514080032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130260 + }, + { + "epoch": 0.6317859580757372, + "grad_norm": 5.98135841300973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130270 + }, + { + "epoch": 0.6318344562685733, + "grad_norm": 5.722398555008112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130280 + }, + { + "epoch": 0.6318829544614094, + "grad_norm": 6.707772399749956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130290 + }, + { + "epoch": 0.6319314526542454, + "grad_norm": 6.18922456396831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130300 + }, + { + "epoch": 0.6319799508470816, + "grad_norm": 5.089955266157631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130310 + }, + { + "epoch": 0.6320284490399176, + "grad_norm": 6.452919478761032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130320 + }, + { + "epoch": 0.6320769472327538, + "grad_norm": 4.797949486601283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130330 + }, + { + "epoch": 0.6321254454255898, + "grad_norm": 6.780832109143375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130340 + }, + { + "epoch": 0.6321739436184259, + "grad_norm": 5.283286554913502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130350 + }, + { + "epoch": 0.632222441811262, + "grad_norm": 5.04946854107402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130360 + }, + { + "epoch": 0.6322709400040981, + "grad_norm": 5.481641665028292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130370 + }, + { + "epoch": 0.6323194381969341, + "grad_norm": 6.643933261329948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130380 + }, + { + "epoch": 0.6323679363897703, + "grad_norm": 5.227003043728473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130390 + }, + { + "epoch": 0.6324164345826063, + "grad_norm": 5.61379863484035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130400 + }, + { + "epoch": 0.6324649327754425, + "grad_norm": 5.288488864607643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130410 + }, + { + "epoch": 0.6325134309682785, + "grad_norm": 5.432082730294496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130420 + }, + { + "epoch": 0.6325619291611146, + "grad_norm": 5.158020144335751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130430 + }, + { + "epoch": 0.6326104273539507, + "grad_norm": 5.051086873208988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130440 + }, + { + "epoch": 0.6326589255467868, + "grad_norm": 4.4756436068382754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130450 + }, + { + "epoch": 0.6327074237396229, + "grad_norm": 3.772272521018749e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130460 + }, + { + "epoch": 0.632755921932459, + "grad_norm": 1.1410149909352185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130470 + }, + { + "epoch": 0.632804420125295, + "grad_norm": 5.618808245344553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130480 + }, + { + "epoch": 0.6328529183181312, + "grad_norm": 4.6380972662518616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130490 + }, + { + "epoch": 0.6329014165109672, + "grad_norm": 3.4131046504626283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130500 + }, + { + "epoch": 0.6329499147038034, + "grad_norm": 5.35457843398035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130510 + }, + { + "epoch": 0.6329984128966394, + "grad_norm": 4.7344320819320274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130520 + }, + { + "epoch": 0.6330469110894755, + "grad_norm": 4.819135028810706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130530 + }, + { + "epoch": 0.6330954092823117, + "grad_norm": 2.8024376206303714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130540 + }, + { + "epoch": 0.6331439074751477, + "grad_norm": 4.4804849608226505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130550 + }, + { + "epoch": 0.6331924056679838, + "grad_norm": 5.024949700782599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130560 + }, + { + "epoch": 0.6332409038608199, + "grad_norm": 4.23909284563706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130570 + }, + { + "epoch": 0.633289402053656, + "grad_norm": 4.956507382303244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130580 + }, + { + "epoch": 0.6333379002464921, + "grad_norm": 5.23854509992816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130590 + }, + { + "epoch": 0.6333863984393282, + "grad_norm": 4.3934923610322585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130600 + }, + { + "epoch": 0.6334348966321642, + "grad_norm": 4.2319092585785256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130610 + }, + { + "epoch": 0.6334833948250004, + "grad_norm": 4.1475189505035814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130620 + }, + { + "epoch": 0.6335318930178364, + "grad_norm": 4.6846548684698064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130630 + }, + { + "epoch": 0.6335803912106726, + "grad_norm": 4.068054693107115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130640 + }, + { + "epoch": 0.6336288894035086, + "grad_norm": 4.181489998700272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130650 + }, + { + "epoch": 0.6336773875963447, + "grad_norm": 6.52469736905914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130660 + }, + { + "epoch": 0.6337258857891808, + "grad_norm": 4.401177875479334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130670 + }, + { + "epoch": 0.6337743839820169, + "grad_norm": 4.6283685151138343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130680 + }, + { + "epoch": 0.6338228821748529, + "grad_norm": 3.7958216125844046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130690 + }, + { + "epoch": 0.6338713803676891, + "grad_norm": 7.648683890693064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130700 + }, + { + "epoch": 0.6339198785605251, + "grad_norm": 4.6454252355943026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130710 + }, + { + "epoch": 0.6339683767533613, + "grad_norm": 6.647046575380955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130720 + }, + { + "epoch": 0.6340168749461973, + "grad_norm": 4.056153954934416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130730 + }, + { + "epoch": 0.6340653731390334, + "grad_norm": 3.858762909203506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130740 + }, + { + "epoch": 0.6341138713318695, + "grad_norm": 4.5893997935309017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130750 + }, + { + "epoch": 0.6341623695247056, + "grad_norm": 4.345831143837131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130760 + }, + { + "epoch": 0.6342108677175416, + "grad_norm": 3.85322152851586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130770 + }, + { + "epoch": 0.6342593659103778, + "grad_norm": 3.8435740634668036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130780 + }, + { + "epoch": 0.6343078641032138, + "grad_norm": 3.8913347566449374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130790 + }, + { + "epoch": 0.63435636229605, + "grad_norm": 3.3331264148728224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130800 + }, + { + "epoch": 0.634404860488886, + "grad_norm": 3.4717865560196515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130810 + }, + { + "epoch": 0.6344533586817221, + "grad_norm": 4.4068116267226287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130820 + }, + { + "epoch": 0.6345018568745582, + "grad_norm": 3.721647203747125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130830 + }, + { + "epoch": 0.6345503550673943, + "grad_norm": 3.992982726686023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130840 + }, + { + "epoch": 0.6345988532602304, + "grad_norm": 3.8791273482274846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130850 + }, + { + "epoch": 0.6346473514530665, + "grad_norm": 4.379480174065975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130860 + }, + { + "epoch": 0.6346958496459025, + "grad_norm": 4.001889806204417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130870 + }, + { + "epoch": 0.6347443478387387, + "grad_norm": 4.2523029719632177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130880 + }, + { + "epoch": 0.6347928460315747, + "grad_norm": 3.998045485786861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130890 + }, + { + "epoch": 0.6348413442244109, + "grad_norm": 3.8282871628325665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130900 + }, + { + "epoch": 0.6348898424172469, + "grad_norm": 3.9258364381566935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130910 + }, + { + "epoch": 0.634938340610083, + "grad_norm": 4.1284937424279633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130920 + }, + { + "epoch": 0.6349868388029191, + "grad_norm": 3.7887281223447644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130930 + }, + { + "epoch": 0.6350353369957552, + "grad_norm": 3.4362190604042553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130940 + }, + { + "epoch": 0.6350838351885912, + "grad_norm": 4.184090585113154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130950 + }, + { + "epoch": 0.6351323333814274, + "grad_norm": 3.70160989859869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130960 + }, + { + "epoch": 0.6351808315742634, + "grad_norm": 4.076852917478391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130970 + }, + { + "epoch": 0.6352293297670996, + "grad_norm": 3.95719752077639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130980 + }, + { + "epoch": 0.6352778279599356, + "grad_norm": 4.5819700744687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 130990 + }, + { + "epoch": 0.6353263261527717, + "grad_norm": 3.7111760775587754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131000 + }, + { + "epoch": 0.6353748243456078, + "grad_norm": 6.326415586954681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131010 + }, + { + "epoch": 0.6354233225384439, + "grad_norm": 4.034332903302129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131020 + }, + { + "epoch": 0.63547182073128, + "grad_norm": 3.7075389514029666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131030 + }, + { + "epoch": 0.6355203189241161, + "grad_norm": 3.482443560187676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131040 + }, + { + "epoch": 0.6355688171169522, + "grad_norm": 4.144868626099196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131050 + }, + { + "epoch": 0.6356173153097883, + "grad_norm": 3.58856993898371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131060 + }, + { + "epoch": 0.6356658135026244, + "grad_norm": 3.3346745453854965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131070 + }, + { + "epoch": 0.6357143116954604, + "grad_norm": 3.5924105645790405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131080 + }, + { + "epoch": 0.6357628098882966, + "grad_norm": 3.8371285882021766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131090 + }, + { + "epoch": 0.6358113080811326, + "grad_norm": 4.36456190300305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131100 + }, + { + "epoch": 0.6358598062739688, + "grad_norm": 3.4242125934724754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131110 + }, + { + "epoch": 0.6359083044668048, + "grad_norm": 4.0821512925504067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131120 + }, + { + "epoch": 0.6359568026596409, + "grad_norm": 3.6453380403145275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131130 + }, + { + "epoch": 0.636005300852477, + "grad_norm": 3.656736566881591e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131140 + }, + { + "epoch": 0.6360537990453131, + "grad_norm": 3.2202277111537114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131150 + }, + { + "epoch": 0.6361022972381492, + "grad_norm": 4.774726676259888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131160 + }, + { + "epoch": 0.6361507954309853, + "grad_norm": 3.7187180623732274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131170 + }, + { + "epoch": 0.6361992936238213, + "grad_norm": 3.5200775982957566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131180 + }, + { + "epoch": 0.6362477918166575, + "grad_norm": 3.1592421123605163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131190 + }, + { + "epoch": 0.6362962900094935, + "grad_norm": 3.2060179933068866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131200 + }, + { + "epoch": 0.6363447882023296, + "grad_norm": 3.1391758170684625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131210 + }, + { + "epoch": 0.6363932863951657, + "grad_norm": 1.479213779020938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131220 + }, + { + "epoch": 0.6364417845880018, + "grad_norm": 4.0052049143923796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131230 + }, + { + "epoch": 0.6364902827808379, + "grad_norm": 2.9905012866038305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131240 + }, + { + "epoch": 0.636538780973674, + "grad_norm": 3.1036984182719607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131250 + }, + { + "epoch": 0.63658727916651, + "grad_norm": 3.6405734249456145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131260 + }, + { + "epoch": 0.6366357773593462, + "grad_norm": 3.1326246130447544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131270 + }, + { + "epoch": 0.6366842755521822, + "grad_norm": 3.140661704037484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131280 + }, + { + "epoch": 0.6367327737450184, + "grad_norm": 3.9681626162746397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131290 + }, + { + "epoch": 0.6367812719378544, + "grad_norm": 2.85445537429041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131300 + }, + { + "epoch": 0.6368297701306905, + "grad_norm": 3.172150400132523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131310 + }, + { + "epoch": 0.6368782683235266, + "grad_norm": 3.6593957020158996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131320 + }, + { + "epoch": 0.6369267665163627, + "grad_norm": 2.982172588872345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131330 + }, + { + "epoch": 0.6369752647091987, + "grad_norm": 8.072032642303384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131340 + }, + { + "epoch": 0.6370237629020349, + "grad_norm": 3.679919871046877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131350 + }, + { + "epoch": 0.6370722610948709, + "grad_norm": 3.172907838688843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131360 + }, + { + "epoch": 0.6371207592877071, + "grad_norm": 6.744238589817542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131370 + }, + { + "epoch": 0.6371692574805431, + "grad_norm": 4.7096452249206777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131380 + }, + { + "epoch": 0.6372177556733792, + "grad_norm": 2.9869104878343933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131390 + }, + { + "epoch": 0.6372662538662153, + "grad_norm": 2.827547973538458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131400 + }, + { + "epoch": 0.6373147520590514, + "grad_norm": 3.109349506758008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131410 + }, + { + "epoch": 0.6373632502518874, + "grad_norm": 4.711976089311065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131420 + }, + { + "epoch": 0.6374117484447236, + "grad_norm": 3.135958763778035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131430 + }, + { + "epoch": 0.6374602466375596, + "grad_norm": 2.879889677842584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131440 + }, + { + "epoch": 0.6375087448303958, + "grad_norm": 2.838343391431408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131450 + }, + { + "epoch": 0.6375572430232318, + "grad_norm": 3.1794397159501386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131460 + }, + { + "epoch": 0.637605741216068, + "grad_norm": 3.124489751371584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131470 + }, + { + "epoch": 0.637654239408904, + "grad_norm": 3.0267378292592184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131480 + }, + { + "epoch": 0.6377027376017401, + "grad_norm": 3.9449298583349446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131490 + }, + { + "epoch": 0.6377512357945762, + "grad_norm": 3.631508320722787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131500 + }, + { + "epoch": 0.6377997339874123, + "grad_norm": 3.061187783259811e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131510 + }, + { + "epoch": 0.6378482321802483, + "grad_norm": 2.903724123370921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131520 + }, + { + "epoch": 0.6378967303730845, + "grad_norm": 3.311980094622413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131530 + }, + { + "epoch": 0.6379452285659205, + "grad_norm": 2.792237694393407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131540 + }, + { + "epoch": 0.6379937267587567, + "grad_norm": 2.948318922335602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131550 + }, + { + "epoch": 0.6380422249515928, + "grad_norm": 3.0042363619031676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131560 + }, + { + "epoch": 0.6380907231444288, + "grad_norm": 2.884201535380271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131570 + }, + { + "epoch": 0.638139221337265, + "grad_norm": 2.791236681787268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131580 + }, + { + "epoch": 0.638187719530101, + "grad_norm": 3.174010032580554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131590 + }, + { + "epoch": 0.6382362177229371, + "grad_norm": 2.4633882844682375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131600 + }, + { + "epoch": 0.6382847159157732, + "grad_norm": 2.9197047979323543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131610 + }, + { + "epoch": 0.6383332141086093, + "grad_norm": 5.820897968078498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131620 + }, + { + "epoch": 0.6383817123014454, + "grad_norm": 3.1431227398570627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131630 + }, + { + "epoch": 0.6384302104942815, + "grad_norm": 4.622733058567974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131640 + }, + { + "epoch": 0.6384787086871175, + "grad_norm": 2.2661207310648024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131650 + }, + { + "epoch": 0.6385272068799537, + "grad_norm": 2.5883579723995354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131660 + }, + { + "epoch": 0.6385757050727897, + "grad_norm": 2.9646417942785774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131670 + }, + { + "epoch": 0.6386242032656259, + "grad_norm": 1.8064654341287678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131680 + }, + { + "epoch": 0.6386727014584619, + "grad_norm": 2.9835698001079436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131690 + }, + { + "epoch": 0.638721199651298, + "grad_norm": 2.74682037115781e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131700 + }, + { + "epoch": 0.6387696978441341, + "grad_norm": 2.497629623121611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131710 + }, + { + "epoch": 0.6388181960369702, + "grad_norm": 2.7685652526088234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131720 + }, + { + "epoch": 0.6388666942298062, + "grad_norm": 2.6220857307635015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131730 + }, + { + "epoch": 0.6389151924226424, + "grad_norm": 2.159316494498853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131740 + }, + { + "epoch": 0.6389636906154784, + "grad_norm": 2.61016168678907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131750 + }, + { + "epoch": 0.6390121888083146, + "grad_norm": 4.785331384482561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131760 + }, + { + "epoch": 0.6390606870011506, + "grad_norm": 2.3753825928451988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131770 + }, + { + "epoch": 0.6391091851939867, + "grad_norm": 4.794528649654239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131780 + }, + { + "epoch": 0.6391576833868228, + "grad_norm": 3.180582552886335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131790 + }, + { + "epoch": 0.6392061815796589, + "grad_norm": 2.2586384318401542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131800 + }, + { + "epoch": 0.639254679772495, + "grad_norm": 2.6014592435785744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131810 + }, + { + "epoch": 0.6393031779653311, + "grad_norm": 2.608541933568631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131820 + }, + { + "epoch": 0.6393516761581671, + "grad_norm": 2.5555709726177156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131830 + }, + { + "epoch": 0.6394001743510033, + "grad_norm": 2.474133111718402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131840 + }, + { + "epoch": 0.6394486725438393, + "grad_norm": 2.352286685436411e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131850 + }, + { + "epoch": 0.6394971707366754, + "grad_norm": 2.407596753073449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131860 + }, + { + "epoch": 0.6395456689295115, + "grad_norm": 2.1755629120434605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131870 + }, + { + "epoch": 0.6395941671223476, + "grad_norm": 2.476718066191097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131880 + }, + { + "epoch": 0.6396426653151837, + "grad_norm": 2.3084224665126385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131890 + }, + { + "epoch": 0.6396911635080198, + "grad_norm": 2.0240501896751084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131900 + }, + { + "epoch": 0.6397396617008558, + "grad_norm": 2.231559932397431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131910 + }, + { + "epoch": 0.639788159893692, + "grad_norm": 2.4680537080712384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131920 + }, + { + "epoch": 0.639836658086528, + "grad_norm": 2.4354460492759245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131930 + }, + { + "epoch": 0.6398851562793642, + "grad_norm": 2.7231791932536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131940 + }, + { + "epoch": 0.6399336544722002, + "grad_norm": 2.0015372115267382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131950 + }, + { + "epoch": 0.6399821526650363, + "grad_norm": 7.585802563880861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131960 + }, + { + "epoch": 0.6400306508578724, + "grad_norm": 2.315095457561256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131970 + }, + { + "epoch": 0.6400791490507085, + "grad_norm": 2.6016573428933043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131980 + }, + { + "epoch": 0.6401276472435445, + "grad_norm": 2.45556316258444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 131990 + }, + { + "epoch": 0.6401761454363807, + "grad_norm": 2.3350138178557245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132000 + }, + { + "epoch": 0.6402246436292167, + "grad_norm": 2.2514622344260715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132010 + }, + { + "epoch": 0.6402731418220529, + "grad_norm": 2.3785474922988215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132020 + }, + { + "epoch": 0.6403216400148889, + "grad_norm": 2.5634295752752223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132030 + }, + { + "epoch": 0.640370138207725, + "grad_norm": 2.0616816698293405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132040 + }, + { + "epoch": 0.6404186364005611, + "grad_norm": 2.785424157991656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132050 + }, + { + "epoch": 0.6404671345933972, + "grad_norm": 2.384912249908666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132060 + }, + { + "epoch": 0.6405156327862334, + "grad_norm": 2.2659121157175832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132070 + }, + { + "epoch": 0.6405641309790694, + "grad_norm": 2.245224663965928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132080 + }, + { + "epoch": 0.6406126291719055, + "grad_norm": 2.3579541164053808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132090 + }, + { + "epoch": 0.6406611273647416, + "grad_norm": 2.219356076693657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132100 + }, + { + "epoch": 0.6407096255575777, + "grad_norm": 2.2667440191526111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132110 + }, + { + "epoch": 0.6407581237504137, + "grad_norm": 2.2829858892237098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132120 + }, + { + "epoch": 0.6408066219432499, + "grad_norm": 2.0838317027482844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132130 + }, + { + "epoch": 0.6408551201360859, + "grad_norm": 2.148298108295421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132140 + }, + { + "epoch": 0.6409036183289221, + "grad_norm": 2.014937336980438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132150 + }, + { + "epoch": 0.6409521165217581, + "grad_norm": 2.1377186953941418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132160 + }, + { + "epoch": 0.6410006147145942, + "grad_norm": 2.1921678694525326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132170 + }, + { + "epoch": 0.6410491129074303, + "grad_norm": 2.2979592984029296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132180 + }, + { + "epoch": 0.6410976111002664, + "grad_norm": 2.0116105758916092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132190 + }, + { + "epoch": 0.6411461092931025, + "grad_norm": 2.085371022531035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132200 + }, + { + "epoch": 0.6411946074859386, + "grad_norm": 2.2187401782503002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132210 + }, + { + "epoch": 0.6412431056787746, + "grad_norm": 2.1536145311529253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132220 + }, + { + "epoch": 0.6412916038716108, + "grad_norm": 2.189150762887948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132230 + }, + { + "epoch": 0.6413401020644468, + "grad_norm": 2.062807595848426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132240 + }, + { + "epoch": 0.641388600257283, + "grad_norm": 1.922486490002484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132250 + }, + { + "epoch": 0.641437098450119, + "grad_norm": 2.2977827995873668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132260 + }, + { + "epoch": 0.6414855966429551, + "grad_norm": 3.9522277006653894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132270 + }, + { + "epoch": 0.6415340948357912, + "grad_norm": 1.9722564559288003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132280 + }, + { + "epoch": 0.6415825930286273, + "grad_norm": 2.213434839859474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132290 + }, + { + "epoch": 0.6416310912214633, + "grad_norm": 1.838586030089573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132300 + }, + { + "epoch": 0.6416795894142995, + "grad_norm": 2.1865665189579886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132310 + }, + { + "epoch": 0.6417280876071355, + "grad_norm": 2.1544717299093463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132320 + }, + { + "epoch": 0.6417765857999717, + "grad_norm": 1.912562623829217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132330 + }, + { + "epoch": 0.6418250839928077, + "grad_norm": 1.9588898680922284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132340 + }, + { + "epoch": 0.6418735821856438, + "grad_norm": 1.95900355492995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132350 + }, + { + "epoch": 0.6419220803784799, + "grad_norm": 1.8348360697473254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132360 + }, + { + "epoch": 0.641970578571316, + "grad_norm": 2.9618529424624285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132370 + }, + { + "epoch": 0.642019076764152, + "grad_norm": 2.025664116445114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132380 + }, + { + "epoch": 0.6420675749569882, + "grad_norm": 2.8831385634475737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132390 + }, + { + "epoch": 0.6421160731498242, + "grad_norm": 2.7992632567475084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132400 + }, + { + "epoch": 0.6421645713426604, + "grad_norm": 1.9730995859390532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132410 + }, + { + "epoch": 0.6422130695354964, + "grad_norm": 1.9136082585191616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132420 + }, + { + "epoch": 0.6422615677283325, + "grad_norm": 1.8237089705053222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132430 + }, + { + "epoch": 0.6423100659211686, + "grad_norm": 1.627518315672205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132440 + }, + { + "epoch": 0.6423585641140047, + "grad_norm": 1.8679699564927432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132450 + }, + { + "epoch": 0.6424070623068407, + "grad_norm": 2.042404645408169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132460 + }, + { + "epoch": 0.6424555604996769, + "grad_norm": 1.959853221933372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132470 + }, + { + "epoch": 0.6425040586925129, + "grad_norm": 1.974395047454891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132480 + }, + { + "epoch": 0.6425525568853491, + "grad_norm": 1.8139323287869047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132490 + }, + { + "epoch": 0.6426010550781851, + "grad_norm": 2.1087535628794285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132500 + }, + { + "epoch": 0.6426495532710212, + "grad_norm": 1.8511325095005304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132510 + }, + { + "epoch": 0.6426980514638573, + "grad_norm": 1.917961043318428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132520 + }, + { + "epoch": 0.6427465496566934, + "grad_norm": 2.9244304755593475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132530 + }, + { + "epoch": 0.6427950478495295, + "grad_norm": 1.7103626248626824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132540 + }, + { + "epoch": 0.6428435460423656, + "grad_norm": 1.5179131196418894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132550 + }, + { + "epoch": 0.6428920442352016, + "grad_norm": 1.7411282726698118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132560 + }, + { + "epoch": 0.6429405424280378, + "grad_norm": 1.8272332624746923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132570 + }, + { + "epoch": 0.6429890406208739, + "grad_norm": 1.9192498257325497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132580 + }, + { + "epoch": 0.64303753881371, + "grad_norm": 1.6654735190968495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132590 + }, + { + "epoch": 0.6430860370065461, + "grad_norm": 1.5421264265569334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132600 + }, + { + "epoch": 0.6431345351993821, + "grad_norm": 1.7229507420779555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132610 + }, + { + "epoch": 0.6431830333922183, + "grad_norm": 1.7462849655203172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132620 + }, + { + "epoch": 0.6432315315850543, + "grad_norm": 1.7635034055274446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132630 + }, + { + "epoch": 0.6432800297778904, + "grad_norm": 1.7198789237227174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132640 + }, + { + "epoch": 0.6433285279707265, + "grad_norm": 2.145551576404614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132650 + }, + { + "epoch": 0.6433770261635626, + "grad_norm": 1.8465148343693727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132660 + }, + { + "epoch": 0.6434255243563987, + "grad_norm": 1.7806175378609623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132670 + }, + { + "epoch": 0.6434740225492348, + "grad_norm": 1.8498751330753294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132680 + }, + { + "epoch": 0.6435225207420708, + "grad_norm": 1.5933726160710648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132690 + }, + { + "epoch": 0.643571018934907, + "grad_norm": 1.7046994571501273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132700 + }, + { + "epoch": 0.643619517127743, + "grad_norm": 1.687177331177736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132710 + }, + { + "epoch": 0.6436680153205792, + "grad_norm": 1.7420083509023243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132720 + }, + { + "epoch": 0.6437165135134152, + "grad_norm": 1.8954523284264724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132730 + }, + { + "epoch": 0.6437650117062513, + "grad_norm": 1.4732795250438357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132740 + }, + { + "epoch": 0.6438135098990874, + "grad_norm": 1.6643436140384438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132750 + }, + { + "epoch": 0.6438620080919235, + "grad_norm": 1.6221994769693993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132760 + }, + { + "epoch": 0.6439105062847595, + "grad_norm": 1.7671762009285885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132770 + }, + { + "epoch": 0.6439590044775957, + "grad_norm": 1.9204323109534016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132780 + }, + { + "epoch": 0.6440075026704317, + "grad_norm": 1.8892498587774753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132790 + }, + { + "epoch": 0.6440560008632679, + "grad_norm": 1.5592748070503148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132800 + }, + { + "epoch": 0.6441044990561039, + "grad_norm": 1.6390598034377035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132810 + }, + { + "epoch": 0.64415299724894, + "grad_norm": 1.7207561597842869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132820 + }, + { + "epoch": 0.6442014954417761, + "grad_norm": 2.072046356715873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132830 + }, + { + "epoch": 0.6442499936346122, + "grad_norm": 1.396503535033844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132840 + }, + { + "epoch": 0.6442984918274482, + "grad_norm": 1.4216131205557758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132850 + }, + { + "epoch": 0.6443469900202844, + "grad_norm": 1.4852636809337127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132860 + }, + { + "epoch": 0.6443954882131204, + "grad_norm": 1.8489096476059785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132870 + }, + { + "epoch": 0.6444439864059566, + "grad_norm": 1.8303303761513234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132880 + }, + { + "epoch": 0.6444924845987926, + "grad_norm": 1.4885321775182092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132890 + }, + { + "epoch": 0.6445409827916287, + "grad_norm": 1.61507799134597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132900 + }, + { + "epoch": 0.6445894809844648, + "grad_norm": 1.5358098437445733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132910 + }, + { + "epoch": 0.6446379791773009, + "grad_norm": 1.5094349237187998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132920 + }, + { + "epoch": 0.644686477370137, + "grad_norm": 1.5649088425107038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132930 + }, + { + "epoch": 0.6447349755629731, + "grad_norm": 1.4850688501155673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132940 + }, + { + "epoch": 0.6447834737558091, + "grad_norm": 1.495089634317992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132950 + }, + { + "epoch": 0.6448319719486453, + "grad_norm": 1.4235008904961433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132960 + }, + { + "epoch": 0.6448804701414813, + "grad_norm": 1.5121261753847648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132970 + }, + { + "epoch": 0.6449289683343175, + "grad_norm": 1.5195200830930844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132980 + }, + { + "epoch": 0.6449774665271535, + "grad_norm": 1.553878234972217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 132990 + }, + { + "epoch": 0.6450259647199896, + "grad_norm": 1.4611167387101887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133000 + }, + { + "epoch": 0.6450744629128257, + "grad_norm": 1.383880885441613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133010 + }, + { + "epoch": 0.6451229611056618, + "grad_norm": 1.399268825252875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133020 + }, + { + "epoch": 0.6451714592984978, + "grad_norm": 1.6799687330149027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133030 + }, + { + "epoch": 0.645219957491334, + "grad_norm": 1.5580005197080027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133040 + }, + { + "epoch": 0.64526845568417, + "grad_norm": 1.3757053807239572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133050 + }, + { + "epoch": 0.6453169538770062, + "grad_norm": 1.8516458055728435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133060 + }, + { + "epoch": 0.6453654520698422, + "grad_norm": 1.4072813314669474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133070 + }, + { + "epoch": 0.6454139502626783, + "grad_norm": 1.4315037333290093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133080 + }, + { + "epoch": 0.6454624484555145, + "grad_norm": 1.6873516983650916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133090 + }, + { + "epoch": 0.6455109466483505, + "grad_norm": 1.4386020552592527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133100 + }, + { + "epoch": 0.6455594448411867, + "grad_norm": 1.4417990712445317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133110 + }, + { + "epoch": 0.6456079430340227, + "grad_norm": 1.3706745960462285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133120 + }, + { + "epoch": 0.6456564412268588, + "grad_norm": 1.3305574952937604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133130 + }, + { + "epoch": 0.6457049394196949, + "grad_norm": 1.5565335331757524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133140 + }, + { + "epoch": 0.645753437612531, + "grad_norm": 1.397425393179219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133150 + }, + { + "epoch": 0.645801935805367, + "grad_norm": 1.340949182804252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133160 + }, + { + "epoch": 0.6458504339982032, + "grad_norm": 1.370782598542064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133170 + }, + { + "epoch": 0.6458989321910392, + "grad_norm": 1.5576348744161805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133180 + }, + { + "epoch": 0.6459474303838754, + "grad_norm": 1.4790610691761685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133190 + }, + { + "epoch": 0.6459959285767114, + "grad_norm": 1.378890033265634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133200 + }, + { + "epoch": 0.6460444267695475, + "grad_norm": 1.2800677495761192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133210 + }, + { + "epoch": 0.6460929249623836, + "grad_norm": 1.9552058461158595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133220 + }, + { + "epoch": 0.6461414231552197, + "grad_norm": 1.4261114245073259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133230 + }, + { + "epoch": 0.6461899213480558, + "grad_norm": 1.5337785441715823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133240 + }, + { + "epoch": 0.6462384195408919, + "grad_norm": 3.000940864694712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133250 + }, + { + "epoch": 0.6462869177337279, + "grad_norm": 1.3222266659340676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133260 + }, + { + "epoch": 0.6463354159265641, + "grad_norm": 1.7037932309449388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133270 + }, + { + "epoch": 0.6463839141194001, + "grad_norm": 1.5460878444173431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133280 + }, + { + "epoch": 0.6464324123122362, + "grad_norm": 1.3479673555139016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133290 + }, + { + "epoch": 0.6464809105050723, + "grad_norm": 1.380638963155434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133300 + }, + { + "epoch": 0.6465294086979084, + "grad_norm": 1.3794857522952952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133310 + }, + { + "epoch": 0.6465779068907445, + "grad_norm": 1.3983131452732778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133320 + }, + { + "epoch": 0.6466264050835806, + "grad_norm": 1.3539104770643462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133330 + }, + { + "epoch": 0.6466749032764166, + "grad_norm": 1.3593300707270828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133340 + }, + { + "epoch": 0.6467234014692528, + "grad_norm": 1.417124479985432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133350 + }, + { + "epoch": 0.6467718996620888, + "grad_norm": 1.7618316405787482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133360 + }, + { + "epoch": 0.646820397854925, + "grad_norm": 1.2424690964962792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133370 + }, + { + "epoch": 0.646868896047761, + "grad_norm": 1.2572310481573368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133380 + }, + { + "epoch": 0.6469173942405971, + "grad_norm": 1.347648890259734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133390 + }, + { + "epoch": 0.6469658924334332, + "grad_norm": 1.4524444225116895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133400 + }, + { + "epoch": 0.6470143906262693, + "grad_norm": 1.2503592472512537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133410 + }, + { + "epoch": 0.6470628888191053, + "grad_norm": 1.242756866304262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133420 + }, + { + "epoch": 0.6471113870119415, + "grad_norm": 1.4704798445563938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133430 + }, + { + "epoch": 0.6471598852047775, + "grad_norm": 3.0658912919534487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133440 + }, + { + "epoch": 0.6472083833976137, + "grad_norm": 1.3654249642058858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133450 + }, + { + "epoch": 0.6472568815904497, + "grad_norm": 1.261875297586812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133460 + }, + { + "epoch": 0.6473053797832858, + "grad_norm": 1.2119610914851364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133470 + }, + { + "epoch": 0.6473538779761219, + "grad_norm": 1.2509229918578058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133480 + }, + { + "epoch": 0.647402376168958, + "grad_norm": 1.2860803622061212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133490 + }, + { + "epoch": 0.647450874361794, + "grad_norm": 1.2691913298112922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133500 + }, + { + "epoch": 0.6474993725546302, + "grad_norm": 1.2816704497708997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133510 + }, + { + "epoch": 0.6475478707474662, + "grad_norm": 1.2820859751627722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133520 + }, + { + "epoch": 0.6475963689403024, + "grad_norm": 1.2154177397860622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133530 + }, + { + "epoch": 0.6476448671331384, + "grad_norm": 1.1968165836151456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133540 + }, + { + "epoch": 0.6476933653259745, + "grad_norm": 1.254687731488957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133550 + }, + { + "epoch": 0.6477418635188106, + "grad_norm": 1.4850091645257635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133560 + }, + { + "epoch": 0.6477903617116467, + "grad_norm": 1.281152890442172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133570 + }, + { + "epoch": 0.6478388599044828, + "grad_norm": 1.2841988450418285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133580 + }, + { + "epoch": 0.6478873580973189, + "grad_norm": 1.280484411836369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133590 + }, + { + "epoch": 0.647935856290155, + "grad_norm": 1.2717958952634945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133600 + }, + { + "epoch": 0.6479843544829911, + "grad_norm": 1.2001106597381295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133610 + }, + { + "epoch": 0.6480328526758272, + "grad_norm": 1.321534313092343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133620 + }, + { + "epoch": 0.6480813508686633, + "grad_norm": 1.2547339167667815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133630 + }, + { + "epoch": 0.6481298490614994, + "grad_norm": 1.2035846452818078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133640 + }, + { + "epoch": 0.6481783472543354, + "grad_norm": 1.1272245359350563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133650 + }, + { + "epoch": 0.6482268454471716, + "grad_norm": 2.2817397393737338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133660 + }, + { + "epoch": 0.6482753436400076, + "grad_norm": 2.8507943738986796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133670 + }, + { + "epoch": 0.6483238418328438, + "grad_norm": 1.171032764091251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133680 + }, + { + "epoch": 0.6483723400256798, + "grad_norm": 1.1716711867393315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133690 + }, + { + "epoch": 0.6484208382185159, + "grad_norm": 1.2154872308656195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133700 + }, + { + "epoch": 0.648469336411352, + "grad_norm": 1.5093091576545703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133710 + }, + { + "epoch": 0.6485178346041881, + "grad_norm": 1.1047212922221661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133720 + }, + { + "epoch": 0.6485663327970241, + "grad_norm": 1.1858286086408043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133730 + }, + { + "epoch": 0.6486148309898603, + "grad_norm": 1.155924493900784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133740 + }, + { + "epoch": 0.6486633291826963, + "grad_norm": 1.1603729177522837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133750 + }, + { + "epoch": 0.6487118273755325, + "grad_norm": 1.1408651090505373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133760 + }, + { + "epoch": 0.6487603255683685, + "grad_norm": 1.0987623966229876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133770 + }, + { + "epoch": 0.6488088237612046, + "grad_norm": 6.590053658328543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133780 + }, + { + "epoch": 0.6488573219540407, + "grad_norm": 1.668633728968416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133790 + }, + { + "epoch": 0.6489058201468768, + "grad_norm": 1.1409268552142748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133800 + }, + { + "epoch": 0.6489543183397128, + "grad_norm": 1.212464439959149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133810 + }, + { + "epoch": 0.649002816532549, + "grad_norm": 1.246311853719817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133820 + }, + { + "epoch": 0.649051314725385, + "grad_norm": 1.1020509305126325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133830 + }, + { + "epoch": 0.6490998129182212, + "grad_norm": 1.1959535584082914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133840 + }, + { + "epoch": 0.6491483111110572, + "grad_norm": 1.35088797037497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133850 + }, + { + "epoch": 0.6491968093038933, + "grad_norm": 2.1026782803801325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133860 + }, + { + "epoch": 0.6492453074967294, + "grad_norm": 1.402803206929093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133870 + }, + { + "epoch": 0.6492938056895655, + "grad_norm": 1.257936190768305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133880 + }, + { + "epoch": 0.6493423038824016, + "grad_norm": 1.0890737200952572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133890 + }, + { + "epoch": 0.6493908020752377, + "grad_norm": 1.1549681033784509e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133900 + }, + { + "epoch": 0.6494393002680737, + "grad_norm": 1.1024219048749728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133910 + }, + { + "epoch": 0.6494877984609099, + "grad_norm": 1.1532272736758387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133920 + }, + { + "epoch": 0.6495362966537459, + "grad_norm": 1.085806857759053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133930 + }, + { + "epoch": 0.649584794846582, + "grad_norm": 1.288972555357759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133940 + }, + { + "epoch": 0.6496332930394181, + "grad_norm": 1.1654369558300459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133950 + }, + { + "epoch": 0.6496817912322542, + "grad_norm": 1.1107979958069336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133960 + }, + { + "epoch": 0.6497302894250903, + "grad_norm": 1.1417904488553177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133970 + }, + { + "epoch": 0.6497787876179264, + "grad_norm": 1.1585397885482962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133980 + }, + { + "epoch": 0.6498272858107624, + "grad_norm": 1.1971761182394403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 133990 + }, + { + "epoch": 0.6498757840035986, + "grad_norm": 1.2163960150246567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134000 + }, + { + "epoch": 0.6499242821964346, + "grad_norm": 1.0660359350822546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134010 + }, + { + "epoch": 0.6499727803892708, + "grad_norm": 1.0722258991791023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134020 + }, + { + "epoch": 0.6500212785821068, + "grad_norm": 1.1315604808714852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134030 + }, + { + "epoch": 0.6500697767749429, + "grad_norm": 1.306470096551493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134040 + }, + { + "epoch": 0.650118274967779, + "grad_norm": 1.081424514381979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134050 + }, + { + "epoch": 0.6501667731606151, + "grad_norm": 1.0539847039581218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134060 + }, + { + "epoch": 0.6502152713534511, + "grad_norm": 1.1395420074222784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134070 + }, + { + "epoch": 0.6502637695462873, + "grad_norm": 1.1026991586504664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134080 + }, + { + "epoch": 0.6503122677391233, + "grad_norm": 1.1467175653478989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134090 + }, + { + "epoch": 0.6503607659319595, + "grad_norm": 1.1257442622536473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134100 + }, + { + "epoch": 0.6504092641247956, + "grad_norm": 1.1380434727925604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134110 + }, + { + "epoch": 0.6504577623176316, + "grad_norm": 1.039729582430482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134120 + }, + { + "epoch": 0.6505062605104678, + "grad_norm": 1.1112822306813541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134130 + }, + { + "epoch": 0.6505547587033038, + "grad_norm": 1.1682426048764682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134140 + }, + { + "epoch": 0.65060325689614, + "grad_norm": 1.1428482338260437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134150 + }, + { + "epoch": 0.650651755088976, + "grad_norm": 1.0224734836583593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134160 + }, + { + "epoch": 0.6507002532818121, + "grad_norm": 1.0491660873412911e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134170 + }, + { + "epoch": 0.6507487514746482, + "grad_norm": 1.0492945534679166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134180 + }, + { + "epoch": 0.6507972496674843, + "grad_norm": 1.9786349980677187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134190 + }, + { + "epoch": 0.6508457478603203, + "grad_norm": 1.0987962184572098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134200 + }, + { + "epoch": 0.6508942460531565, + "grad_norm": 1.0665953453781185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134210 + }, + { + "epoch": 0.6509427442459925, + "grad_norm": 2.6468774194654543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134220 + }, + { + "epoch": 0.6509912424388287, + "grad_norm": 1.1751647122082431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134230 + }, + { + "epoch": 0.6510397406316647, + "grad_norm": 1.3782978669496515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134240 + }, + { + "epoch": 0.6510882388245008, + "grad_norm": 1.1973098423823103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134250 + }, + { + "epoch": 0.6511367370173369, + "grad_norm": 9.961311064898837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134260 + }, + { + "epoch": 0.651185235210173, + "grad_norm": 1.0850690301822397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134270 + }, + { + "epoch": 0.651233733403009, + "grad_norm": 1.0709923969898227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134280 + }, + { + "epoch": 0.6512822315958452, + "grad_norm": 1.233699435942981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134290 + }, + { + "epoch": 0.6513307297886812, + "grad_norm": 1.0944224726472385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134300 + }, + { + "epoch": 0.6513792279815174, + "grad_norm": 1.029988041523211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134310 + }, + { + "epoch": 0.6514277261743534, + "grad_norm": 1.0309366871297243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134320 + }, + { + "epoch": 0.6514762243671895, + "grad_norm": 1.0360381708096611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134330 + }, + { + "epoch": 0.6515247225600256, + "grad_norm": 1.0224997737395825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134340 + }, + { + "epoch": 0.6515732207528617, + "grad_norm": 1.2375254243579548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134350 + }, + { + "epoch": 0.6516217189456978, + "grad_norm": 1.0405057082607527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134360 + }, + { + "epoch": 0.6516702171385339, + "grad_norm": 1.0431063657279083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134370 + }, + { + "epoch": 0.6517187153313699, + "grad_norm": 9.069668749361881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134380 + }, + { + "epoch": 0.6517672135242061, + "grad_norm": 1.0797332805623228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134390 + }, + { + "epoch": 0.6518157117170421, + "grad_norm": 1.1096452112724364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134400 + }, + { + "epoch": 0.6518642099098783, + "grad_norm": 1.0447555354176075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134410 + }, + { + "epoch": 0.6519127081027143, + "grad_norm": 9.609844653368782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134420 + }, + { + "epoch": 0.6519612062955504, + "grad_norm": 9.37611446261144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134430 + }, + { + "epoch": 0.6520097044883865, + "grad_norm": 1.083225029674395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134440 + }, + { + "epoch": 0.6520582026812226, + "grad_norm": 1.864525955852514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134450 + }, + { + "epoch": 0.6521067008740586, + "grad_norm": 9.248293508790084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134460 + }, + { + "epoch": 0.6521551990668948, + "grad_norm": 9.999781980241096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134470 + }, + { + "epoch": 0.6522036972597308, + "grad_norm": 9.377833265489244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134480 + }, + { + "epoch": 0.652252195452567, + "grad_norm": 1.057392182701733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134490 + }, + { + "epoch": 0.652300693645403, + "grad_norm": 1.0208361089780738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134500 + }, + { + "epoch": 0.6523491918382391, + "grad_norm": 4.3476620703586377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134510 + }, + { + "epoch": 0.6523976900310752, + "grad_norm": 1.055826430729212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134520 + }, + { + "epoch": 0.6524461882239113, + "grad_norm": 8.952049057597833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134530 + }, + { + "epoch": 0.6524946864167473, + "grad_norm": 9.890167973480857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134540 + }, + { + "epoch": 0.6525431846095835, + "grad_norm": 1.2232861479333224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134550 + }, + { + "epoch": 0.6525916828024195, + "grad_norm": 9.513539822592065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134560 + }, + { + "epoch": 0.6526401809952557, + "grad_norm": 9.60135508876192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134570 + }, + { + "epoch": 0.6526886791880917, + "grad_norm": 9.229802344634663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134580 + }, + { + "epoch": 0.6527371773809278, + "grad_norm": 1.1934402266433608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134590 + }, + { + "epoch": 0.6527856755737639, + "grad_norm": 1.0820701845659642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134600 + }, + { + "epoch": 0.6528341737666, + "grad_norm": 9.542935686113196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134610 + }, + { + "epoch": 0.6528826719594361, + "grad_norm": 9.117054133866986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134620 + }, + { + "epoch": 0.6529311701522722, + "grad_norm": 9.175921888981975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134630 + }, + { + "epoch": 0.6529796683451083, + "grad_norm": 9.910338150120879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134640 + }, + { + "epoch": 0.6530281665379444, + "grad_norm": 1.0136681538597259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134650 + }, + { + "epoch": 0.6530766647307805, + "grad_norm": 9.711069992590637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134660 + }, + { + "epoch": 0.6531251629236166, + "grad_norm": 9.602076289638717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134670 + }, + { + "epoch": 0.6531736611164527, + "grad_norm": 9.278949875124454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134680 + }, + { + "epoch": 0.6532221593092887, + "grad_norm": 9.949005885800943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134690 + }, + { + "epoch": 0.6532706575021249, + "grad_norm": 9.500622866198682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134700 + }, + { + "epoch": 0.6533191556949609, + "grad_norm": 9.295463598846254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134710 + }, + { + "epoch": 0.653367653887797, + "grad_norm": 9.203901640830736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134720 + }, + { + "epoch": 0.6534161520806331, + "grad_norm": 9.140998713519366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134730 + }, + { + "epoch": 0.6534646502734692, + "grad_norm": 9.655270361008661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134740 + }, + { + "epoch": 0.6535131484663053, + "grad_norm": 1.0366859726218536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134750 + }, + { + "epoch": 0.6535616466591414, + "grad_norm": 8.71493881504648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134760 + }, + { + "epoch": 0.6536101448519774, + "grad_norm": 8.575938181820675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134770 + }, + { + "epoch": 0.6536586430448136, + "grad_norm": 1.5528067365266907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134780 + }, + { + "epoch": 0.6537071412376496, + "grad_norm": 9.557669500281918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134790 + }, + { + "epoch": 0.6537556394304858, + "grad_norm": 9.43681541798469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134800 + }, + { + "epoch": 0.6538041376233218, + "grad_norm": 8.725240974172266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134810 + }, + { + "epoch": 0.6538526358161579, + "grad_norm": 8.595352340989848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134820 + }, + { + "epoch": 0.653901134008994, + "grad_norm": 8.762887659941043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134830 + }, + { + "epoch": 0.6539496322018301, + "grad_norm": 1.0468653499628999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134840 + }, + { + "epoch": 0.6539981303946661, + "grad_norm": 9.844973902772836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134850 + }, + { + "epoch": 0.6540466285875023, + "grad_norm": 9.013900381660278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134860 + }, + { + "epoch": 0.6540951267803383, + "grad_norm": 8.980033783245744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134870 + }, + { + "epoch": 0.6541436249731745, + "grad_norm": 8.599929657293615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134880 + }, + { + "epoch": 0.6541921231660105, + "grad_norm": 9.740382722611685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134890 + }, + { + "epoch": 0.6542406213588466, + "grad_norm": 1.0040550790790803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134900 + }, + { + "epoch": 0.6542891195516827, + "grad_norm": 8.312465382687151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134910 + }, + { + "epoch": 0.6543376177445188, + "grad_norm": 9.455830962679102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134920 + }, + { + "epoch": 0.6543861159373549, + "grad_norm": 2.6688826437748503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134930 + }, + { + "epoch": 0.654434614130191, + "grad_norm": 9.367418840611208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134940 + }, + { + "epoch": 0.654483112323027, + "grad_norm": 9.843819981369961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134950 + }, + { + "epoch": 0.6545316105158632, + "grad_norm": 9.497637165623019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134960 + }, + { + "epoch": 0.6545801087086992, + "grad_norm": 4.926585006614914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134970 + }, + { + "epoch": 0.6546286069015353, + "grad_norm": 8.55537791721872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134980 + }, + { + "epoch": 0.6546771050943714, + "grad_norm": 9.114489785133628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 134990 + }, + { + "epoch": 0.6547256032872075, + "grad_norm": 9.130106093380164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135000 + }, + { + "epoch": 0.6547741014800436, + "grad_norm": 8.259075201522137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135010 + }, + { + "epoch": 0.6548225996728797, + "grad_norm": 8.15555409872104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135020 + }, + { + "epoch": 0.6548710978657157, + "grad_norm": 8.645742610724483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135030 + }, + { + "epoch": 0.6549195960585519, + "grad_norm": 1.2550106021080865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135040 + }, + { + "epoch": 0.6549680942513879, + "grad_norm": 9.380167398376216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135050 + }, + { + "epoch": 0.655016592444224, + "grad_norm": 8.529050887773337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135060 + }, + { + "epoch": 0.6550650906370601, + "grad_norm": 8.781030658155942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135070 + }, + { + "epoch": 0.6551135888298962, + "grad_norm": 1.027621578941762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135080 + }, + { + "epoch": 0.6551620870227323, + "grad_norm": 9.47606366707987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135090 + }, + { + "epoch": 0.6552105852155684, + "grad_norm": 9.374560505648333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135100 + }, + { + "epoch": 0.6552590834084044, + "grad_norm": 8.440922982799748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135110 + }, + { + "epoch": 0.6553075816012406, + "grad_norm": 8.787858973846596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135120 + }, + { + "epoch": 0.6553560797940766, + "grad_norm": 8.269665130455905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135130 + }, + { + "epoch": 0.6554045779869128, + "grad_norm": 8.858815192525071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135140 + }, + { + "epoch": 0.6554530761797489, + "grad_norm": 9.465586003898352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135150 + }, + { + "epoch": 0.6555015743725849, + "grad_norm": 9.116448040913383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135160 + }, + { + "epoch": 0.6555500725654211, + "grad_norm": 8.486203029178796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135170 + }, + { + "epoch": 0.6555985707582571, + "grad_norm": 7.591735595724458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135180 + }, + { + "epoch": 0.6556470689510933, + "grad_norm": 9.335711581570649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135190 + }, + { + "epoch": 0.6556955671439293, + "grad_norm": 2.091930468850478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135200 + }, + { + "epoch": 0.6557440653367654, + "grad_norm": 2.1782824433103087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135210 + }, + { + "epoch": 0.6557925635296015, + "grad_norm": 8.527185713091967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135220 + }, + { + "epoch": 0.6558410617224376, + "grad_norm": 9.143803936240147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135230 + }, + { + "epoch": 0.6558895599152736, + "grad_norm": 9.867517292150296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135240 + }, + { + "epoch": 0.6559380581081098, + "grad_norm": 9.069076867262993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135250 + }, + { + "epoch": 0.6559865563009458, + "grad_norm": 8.201148204989295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135260 + }, + { + "epoch": 0.656035054493782, + "grad_norm": 1.0099293490384298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135270 + }, + { + "epoch": 0.656083552686618, + "grad_norm": 7.494619325143503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135280 + }, + { + "epoch": 0.6561320508794541, + "grad_norm": 9.173677284479709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135290 + }, + { + "epoch": 0.6561805490722902, + "grad_norm": 1.0072329104104938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135300 + }, + { + "epoch": 0.6562290472651263, + "grad_norm": 8.292232678286382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135310 + }, + { + "epoch": 0.6562775454579624, + "grad_norm": 7.675017599240164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135320 + }, + { + "epoch": 0.6563260436507985, + "grad_norm": 8.282339791776394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135330 + }, + { + "epoch": 0.6563745418436345, + "grad_norm": 9.465977512945756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135340 + }, + { + "epoch": 0.6564230400364707, + "grad_norm": 1.3585774638613657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135350 + }, + { + "epoch": 0.6564715382293067, + "grad_norm": 8.16669682990323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135360 + }, + { + "epoch": 0.6565200364221428, + "grad_norm": 1.3859531122761837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135370 + }, + { + "epoch": 0.6565685346149789, + "grad_norm": 7.678934821342409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135380 + }, + { + "epoch": 0.656617032807815, + "grad_norm": 8.7956678385126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135390 + }, + { + "epoch": 0.6566655310006511, + "grad_norm": 8.822826913501558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135400 + }, + { + "epoch": 0.6567140291934872, + "grad_norm": 7.680070268634154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135410 + }, + { + "epoch": 0.6567625273863232, + "grad_norm": 7.371473031980713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135420 + }, + { + "epoch": 0.6568110255791594, + "grad_norm": 7.819601677283572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135430 + }, + { + "epoch": 0.6568595237719954, + "grad_norm": 1.2638690805033548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135440 + }, + { + "epoch": 0.6569080219648316, + "grad_norm": 8.497902825865822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135450 + }, + { + "epoch": 0.6569565201576676, + "grad_norm": 7.384171141211482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135460 + }, + { + "epoch": 0.6570050183505037, + "grad_norm": 7.739244267668255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135470 + }, + { + "epoch": 0.6570535165433398, + "grad_norm": 7.703381754708971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135480 + }, + { + "epoch": 0.6571020147361759, + "grad_norm": 1.0586627041675456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135490 + }, + { + "epoch": 0.6571505129290119, + "grad_norm": 8.868956768992575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135500 + }, + { + "epoch": 0.6571990111218481, + "grad_norm": 7.458509543312175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135510 + }, + { + "epoch": 0.6572475093146841, + "grad_norm": 7.314564953730951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135520 + }, + { + "epoch": 0.6572960075075203, + "grad_norm": 7.299725268694601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135530 + }, + { + "epoch": 0.6573445057003563, + "grad_norm": 8.161854481159025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135540 + }, + { + "epoch": 0.6573930038931924, + "grad_norm": 8.504985515855878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135550 + }, + { + "epoch": 0.6574415020860285, + "grad_norm": 7.925444123202396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135560 + }, + { + "epoch": 0.6574900002788646, + "grad_norm": 1.0419474705258835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135570 + }, + { + "epoch": 0.6575384984717006, + "grad_norm": 1.0774982683869894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135580 + }, + { + "epoch": 0.6575869966645368, + "grad_norm": 1.1321151305310195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135590 + }, + { + "epoch": 0.6576354948573728, + "grad_norm": 1.0668108529898745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135600 + }, + { + "epoch": 0.657683993050209, + "grad_norm": 1.02083305364431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135610 + }, + { + "epoch": 0.657732491243045, + "grad_norm": 1.0397528171779413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135620 + }, + { + "epoch": 0.6577809894358811, + "grad_norm": 1.1251614751017769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135630 + }, + { + "epoch": 0.6578294876287172, + "grad_norm": 1.1876871042204584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135640 + }, + { + "epoch": 0.6578779858215533, + "grad_norm": 1.0494317592701918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135650 + }, + { + "epoch": 0.6579264840143895, + "grad_norm": 1.0260361449354605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135660 + }, + { + "epoch": 0.6579749822072255, + "grad_norm": 9.779241594287669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135670 + }, + { + "epoch": 0.6580234804000616, + "grad_norm": 1.4546016302574571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135680 + }, + { + "epoch": 0.6580719785928977, + "grad_norm": 1.0221975088597901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135690 + }, + { + "epoch": 0.6581204767857338, + "grad_norm": 9.920045584976833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135700 + }, + { + "epoch": 0.6581689749785699, + "grad_norm": 9.330504013860264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135710 + }, + { + "epoch": 0.658217473171406, + "grad_norm": 9.20132521287087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135720 + }, + { + "epoch": 0.658265971364242, + "grad_norm": 9.420572411045214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135730 + }, + { + "epoch": 0.6583144695570782, + "grad_norm": 9.969808445475792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135740 + }, + { + "epoch": 0.6583629677499142, + "grad_norm": 9.971702752409328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135750 + }, + { + "epoch": 0.6584114659427504, + "grad_norm": 9.000545020398931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135760 + }, + { + "epoch": 0.6584599641355864, + "grad_norm": 1.0221868507187537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135770 + }, + { + "epoch": 0.6585084623284225, + "grad_norm": 8.562749798102232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135780 + }, + { + "epoch": 0.6585569605212586, + "grad_norm": 9.605169992710216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135790 + }, + { + "epoch": 0.6586054587140947, + "grad_norm": 9.995487459946162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135800 + }, + { + "epoch": 0.6586539569069307, + "grad_norm": 8.660008887773074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135810 + }, + { + "epoch": 0.6587024550997669, + "grad_norm": 8.877766788373265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135820 + }, + { + "epoch": 0.6587509532926029, + "grad_norm": 9.136152812061482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135830 + }, + { + "epoch": 0.6587994514854391, + "grad_norm": 9.187766636387096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135840 + }, + { + "epoch": 0.6588479496782751, + "grad_norm": 9.056071803570376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135850 + }, + { + "epoch": 0.6588964478711112, + "grad_norm": 8.481430313622695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135860 + }, + { + "epoch": 0.6589449460639473, + "grad_norm": 7.960173320498143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135870 + }, + { + "epoch": 0.6589934442567834, + "grad_norm": 7.893147113691157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135880 + }, + { + "epoch": 0.6590419424496194, + "grad_norm": 8.75979964121143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135890 + }, + { + "epoch": 0.6590904406424556, + "grad_norm": 8.746512492052716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135900 + }, + { + "epoch": 0.6591389388352916, + "grad_norm": 8.973206178097826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135910 + }, + { + "epoch": 0.6591874370281278, + "grad_norm": 9.058393146688104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135920 + }, + { + "epoch": 0.6592359352209638, + "grad_norm": 8.184736088878708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135930 + }, + { + "epoch": 0.6592844334137999, + "grad_norm": 8.956179442520806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135940 + }, + { + "epoch": 0.659332931606636, + "grad_norm": 8.331457479471283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135950 + }, + { + "epoch": 0.6593814297994721, + "grad_norm": 1.5674663700337987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135960 + }, + { + "epoch": 0.6594299279923082, + "grad_norm": 7.762591991422596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135970 + }, + { + "epoch": 0.6594784261851443, + "grad_norm": 7.938972856891269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135980 + }, + { + "epoch": 0.6595269243779803, + "grad_norm": 8.29931678936191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 135990 + }, + { + "epoch": 0.6595754225708165, + "grad_norm": 8.201456580536615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136000 + }, + { + "epoch": 0.6596239207636525, + "grad_norm": 7.175222549449245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136010 + }, + { + "epoch": 0.6596724189564886, + "grad_norm": 7.020619108288884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136020 + }, + { + "epoch": 0.6597209171493247, + "grad_norm": 7.637903820523206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136030 + }, + { + "epoch": 0.6597694153421608, + "grad_norm": 7.801870793855414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136040 + }, + { + "epoch": 0.6598179135349969, + "grad_norm": 7.728925766059547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136050 + }, + { + "epoch": 0.659866411727833, + "grad_norm": 7.007347591070356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136060 + }, + { + "epoch": 0.659914909920669, + "grad_norm": 7.518593747590785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136070 + }, + { + "epoch": 0.6599634081135052, + "grad_norm": 7.393397538635327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136080 + }, + { + "epoch": 0.6600119063063412, + "grad_norm": 7.992865391770465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136090 + }, + { + "epoch": 0.6600604044991774, + "grad_norm": 7.939907220588793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136100 + }, + { + "epoch": 0.6601089026920134, + "grad_norm": 7.25898061659791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136110 + }, + { + "epoch": 0.6601574008848495, + "grad_norm": 7.485559194719826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136120 + }, + { + "epoch": 0.6602058990776856, + "grad_norm": 6.89364583195129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136130 + }, + { + "epoch": 0.6602543972705217, + "grad_norm": 7.515299671467801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136140 + }, + { + "epoch": 0.6603028954633577, + "grad_norm": 7.633885701352483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136150 + }, + { + "epoch": 0.6603513936561939, + "grad_norm": 7.147503566784508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136160 + }, + { + "epoch": 0.66039989184903, + "grad_norm": 1.1403724187175612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136170 + }, + { + "epoch": 0.6604483900418661, + "grad_norm": 6.677458941339864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136180 + }, + { + "epoch": 0.6604968882347022, + "grad_norm": 7.800339574259851e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136190 + }, + { + "epoch": 0.6605453864275382, + "grad_norm": 9.744356077590055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136200 + }, + { + "epoch": 0.6605938846203744, + "grad_norm": 6.968655696937276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136210 + }, + { + "epoch": 0.6606423828132104, + "grad_norm": 6.741075964100673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136220 + }, + { + "epoch": 0.6606908810060466, + "grad_norm": 7.10272445303417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136230 + }, + { + "epoch": 0.6607393791988826, + "grad_norm": 7.518060840538965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136240 + }, + { + "epoch": 0.6607878773917187, + "grad_norm": 7.187912132167185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136250 + }, + { + "epoch": 0.6608363755845548, + "grad_norm": 7.184114281244547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136260 + }, + { + "epoch": 0.6608848737773909, + "grad_norm": 6.827807652598494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136270 + }, + { + "epoch": 0.660933371970227, + "grad_norm": 7.10095306999392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136280 + }, + { + "epoch": 0.6609818701630631, + "grad_norm": 1.0318457555058558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136290 + }, + { + "epoch": 0.6610303683558991, + "grad_norm": 7.242182675781805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136300 + }, + { + "epoch": 0.6610788665487353, + "grad_norm": 6.46758309130746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136310 + }, + { + "epoch": 0.6611273647415713, + "grad_norm": 6.608144076380995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136320 + }, + { + "epoch": 0.6611758629344074, + "grad_norm": 6.682174813477104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136330 + }, + { + "epoch": 0.6612243611272435, + "grad_norm": 7.296237924947491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136340 + }, + { + "epoch": 0.6612728593200796, + "grad_norm": 7.546872637931301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136350 + }, + { + "epoch": 0.6613213575129157, + "grad_norm": 7.658918121933311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136360 + }, + { + "epoch": 0.6613698557057518, + "grad_norm": 6.7885750354435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136370 + }, + { + "epoch": 0.6614183538985878, + "grad_norm": 6.668005880783312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136380 + }, + { + "epoch": 0.661466852091424, + "grad_norm": 7.592790041144326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136390 + }, + { + "epoch": 0.66151535028426, + "grad_norm": 7.102595844798998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136400 + }, + { + "epoch": 0.6615638484770962, + "grad_norm": 6.309368671963966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136410 + }, + { + "epoch": 0.6616123466699322, + "grad_norm": 6.705410271479195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136420 + }, + { + "epoch": 0.6616608448627683, + "grad_norm": 6.63963248825894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136430 + }, + { + "epoch": 0.6617093430556044, + "grad_norm": 7.071205487818588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136440 + }, + { + "epoch": 0.6617578412484405, + "grad_norm": 6.999028556720077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136450 + }, + { + "epoch": 0.6618063394412765, + "grad_norm": 6.376311745270868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136460 + }, + { + "epoch": 0.6618548376341127, + "grad_norm": 6.394354556960025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136470 + }, + { + "epoch": 0.6619033358269487, + "grad_norm": 6.413065989363531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136480 + }, + { + "epoch": 0.6619518340197849, + "grad_norm": 7.183692929402241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136490 + }, + { + "epoch": 0.6620003322126209, + "grad_norm": 7.576303318046484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136500 + }, + { + "epoch": 0.662048830405457, + "grad_norm": 6.474660096955631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136510 + }, + { + "epoch": 0.6620973285982931, + "grad_norm": 6.310230560302443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136520 + }, + { + "epoch": 0.6621458267911292, + "grad_norm": 6.647121608693851e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136530 + }, + { + "epoch": 0.6621943249839652, + "grad_norm": 7.300151594336057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136540 + }, + { + "epoch": 0.6622428231768014, + "grad_norm": 6.834618204720755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136550 + }, + { + "epoch": 0.6622913213696374, + "grad_norm": 6.309529965164984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136560 + }, + { + "epoch": 0.6623398195624736, + "grad_norm": 6.421714005000467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136570 + }, + { + "epoch": 0.6623883177553096, + "grad_norm": 6.601796087579714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136580 + }, + { + "epoch": 0.6624368159481457, + "grad_norm": 6.858761736339147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136590 + }, + { + "epoch": 0.6624853141409818, + "grad_norm": 6.928222262558847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136600 + }, + { + "epoch": 0.6625338123338179, + "grad_norm": 6.152860265729032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136610 + }, + { + "epoch": 0.662582310526654, + "grad_norm": 6.165922883383246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136620 + }, + { + "epoch": 0.6626308087194901, + "grad_norm": 6.507394800792099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136630 + }, + { + "epoch": 0.6626793069123261, + "grad_norm": 1.02456617412372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136640 + }, + { + "epoch": 0.6627278051051623, + "grad_norm": 6.949358777319503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136650 + }, + { + "epoch": 0.6627763032979983, + "grad_norm": 6.298323995679311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136660 + }, + { + "epoch": 0.6628248014908344, + "grad_norm": 6.000740881972888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136670 + }, + { + "epoch": 0.6628732996836706, + "grad_norm": 6.009608455315174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136680 + }, + { + "epoch": 0.6629217978765066, + "grad_norm": 6.919737671751136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136690 + }, + { + "epoch": 0.6629702960693428, + "grad_norm": 6.857138146187935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136700 + }, + { + "epoch": 0.6630187942621788, + "grad_norm": 6.360218662848638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136710 + }, + { + "epoch": 0.663067292455015, + "grad_norm": 6.125898721620615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136720 + }, + { + "epoch": 0.663115790647851, + "grad_norm": 6.052143675105981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136730 + }, + { + "epoch": 0.6631642888406871, + "grad_norm": 2.4926168862293707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136740 + }, + { + "epoch": 0.6632127870335232, + "grad_norm": 7.011205838125534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136750 + }, + { + "epoch": 0.6632612852263593, + "grad_norm": 6.16875439618525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136760 + }, + { + "epoch": 0.6633097834191953, + "grad_norm": 6.184642842299581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136770 + }, + { + "epoch": 0.6633582816120315, + "grad_norm": 6.346976277882277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136780 + }, + { + "epoch": 0.6634067798048675, + "grad_norm": 6.60027339449698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136790 + }, + { + "epoch": 0.6634552779977037, + "grad_norm": 6.783810135857493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136800 + }, + { + "epoch": 0.6635037761905397, + "grad_norm": 6.023735465987556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136810 + }, + { + "epoch": 0.6635522743833758, + "grad_norm": 5.936860247857112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136820 + }, + { + "epoch": 0.6636007725762119, + "grad_norm": 6.28644087896646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136830 + }, + { + "epoch": 0.663649270769048, + "grad_norm": 6.690783749263574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136840 + }, + { + "epoch": 0.663697768961884, + "grad_norm": 6.771917782089076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136850 + }, + { + "epoch": 0.6637462671547202, + "grad_norm": 5.976197314794263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136860 + }, + { + "epoch": 0.6637947653475562, + "grad_norm": 5.695233085134532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136870 + }, + { + "epoch": 0.6638432635403924, + "grad_norm": 6.249833006677363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136880 + }, + { + "epoch": 0.6638917617332284, + "grad_norm": 6.649405293046584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136890 + }, + { + "epoch": 0.6639402599260645, + "grad_norm": 7.443053107181186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136900 + }, + { + "epoch": 0.6639887581189006, + "grad_norm": 5.861878804580556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136910 + }, + { + "epoch": 0.6640372563117367, + "grad_norm": 5.902451860606561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136920 + }, + { + "epoch": 0.6640857545045727, + "grad_norm": 6.030012400515261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136930 + }, + { + "epoch": 0.6641342526974089, + "grad_norm": 6.857108303393034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136940 + }, + { + "epoch": 0.6641827508902449, + "grad_norm": 6.994594770048934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136950 + }, + { + "epoch": 0.6642312490830811, + "grad_norm": 5.875405761912589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136960 + }, + { + "epoch": 0.6642797472759171, + "grad_norm": 5.887855891728577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136970 + }, + { + "epoch": 0.6643282454687532, + "grad_norm": 6.00575305043094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136980 + }, + { + "epoch": 0.6643767436615893, + "grad_norm": 6.655908890706996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 136990 + }, + { + "epoch": 0.6644252418544254, + "grad_norm": 6.664343032980469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137000 + }, + { + "epoch": 0.6644737400472615, + "grad_norm": 5.971354255507322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137010 + }, + { + "epoch": 0.6645222382400976, + "grad_norm": 5.977459949235708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137020 + }, + { + "epoch": 0.6645707364329336, + "grad_norm": 6.774762084660324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137030 + }, + { + "epoch": 0.6646192346257698, + "grad_norm": 9.42098026257554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137040 + }, + { + "epoch": 0.6646677328186058, + "grad_norm": 7.59505169867225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137050 + }, + { + "epoch": 0.664716231011442, + "grad_norm": 6.024721699304791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137060 + }, + { + "epoch": 0.664764729204278, + "grad_norm": 6.069326730084867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137070 + }, + { + "epoch": 0.6648132273971141, + "grad_norm": 5.9264742446885066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137080 + }, + { + "epoch": 0.6648617255899502, + "grad_norm": 6.981076694501098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137090 + }, + { + "epoch": 0.6649102237827863, + "grad_norm": 6.68751809485002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137100 + }, + { + "epoch": 0.6649587219756223, + "grad_norm": 5.8168804883962366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137110 + }, + { + "epoch": 0.6650072201684585, + "grad_norm": 5.505750522161179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137120 + }, + { + "epoch": 0.6650557183612945, + "grad_norm": 5.737484443102403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137130 + }, + { + "epoch": 0.6651042165541307, + "grad_norm": 6.436904698148282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137140 + }, + { + "epoch": 0.6651527147469667, + "grad_norm": 6.463540103140986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137150 + }, + { + "epoch": 0.6652012129398028, + "grad_norm": 6.002307628705239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137160 + }, + { + "epoch": 0.6652497111326389, + "grad_norm": 5.859508078742692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137170 + }, + { + "epoch": 0.665298209325475, + "grad_norm": 5.5249689268066504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137180 + }, + { + "epoch": 0.6653467075183112, + "grad_norm": 6.521649709156918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137190 + }, + { + "epoch": 0.6653952057111472, + "grad_norm": 6.278332165265965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137200 + }, + { + "epoch": 0.6654437039039833, + "grad_norm": 5.589975771158606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137210 + }, + { + "epoch": 0.6654922020968194, + "grad_norm": 5.4085834477746175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137220 + }, + { + "epoch": 0.6655407002896555, + "grad_norm": 5.647722645107933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137230 + }, + { + "epoch": 0.6655891984824915, + "grad_norm": 6.59778649492182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137240 + }, + { + "epoch": 0.6656376966753277, + "grad_norm": 6.317570466762845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137250 + }, + { + "epoch": 0.6656861948681637, + "grad_norm": 5.4211096056633323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137260 + }, + { + "epoch": 0.6657346930609999, + "grad_norm": 5.539665792753112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137270 + }, + { + "epoch": 0.6657831912538359, + "grad_norm": 5.4243642466644815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137280 + }, + { + "epoch": 0.665831689446672, + "grad_norm": 6.43859934257307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137290 + }, + { + "epoch": 0.6658801876395081, + "grad_norm": 6.625291604223094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137300 + }, + { + "epoch": 0.6659286858323442, + "grad_norm": 5.6663221670305575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137310 + }, + { + "epoch": 0.6659771840251802, + "grad_norm": 5.568051975046728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137320 + }, + { + "epoch": 0.6660256822180164, + "grad_norm": 5.6132279269149876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137330 + }, + { + "epoch": 0.6660741804108524, + "grad_norm": 6.945003150349294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137340 + }, + { + "epoch": 0.6661226786036886, + "grad_norm": 6.958530462952695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137350 + }, + { + "epoch": 0.6661711767965246, + "grad_norm": 9.059557015689279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137360 + }, + { + "epoch": 0.6662196749893607, + "grad_norm": 5.564289295989511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137370 + }, + { + "epoch": 0.6662681731821968, + "grad_norm": 5.4328591403418613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137380 + }, + { + "epoch": 0.6663166713750329, + "grad_norm": 6.249335626762331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137390 + }, + { + "epoch": 0.666365169567869, + "grad_norm": 6.507529093369158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137400 + }, + { + "epoch": 0.6664136677607051, + "grad_norm": 5.4569650131952585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137410 + }, + { + "epoch": 0.6664621659535411, + "grad_norm": 5.336305264336261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137420 + }, + { + "epoch": 0.6665106641463773, + "grad_norm": 5.79832999392238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137430 + }, + { + "epoch": 0.6665591623392133, + "grad_norm": 6.519485395983793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137440 + }, + { + "epoch": 0.6666076605320495, + "grad_norm": 6.276210484656985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137450 + }, + { + "epoch": 0.6666561587248855, + "grad_norm": 5.325064122985168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137460 + }, + { + "epoch": 0.6667046569177216, + "grad_norm": 5.416762505205952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137470 + }, + { + "epoch": 0.6667531551105577, + "grad_norm": 5.6431161965520005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137480 + }, + { + "epoch": 0.6668016533033938, + "grad_norm": 6.213918624098369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137490 + }, + { + "epoch": 0.6668501514962298, + "grad_norm": 6.133777930017459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137500 + }, + { + "epoch": 0.666898649689066, + "grad_norm": 5.245246725849029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137510 + }, + { + "epoch": 0.666947147881902, + "grad_norm": 5.6441848528265837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137520 + }, + { + "epoch": 0.6669956460747382, + "grad_norm": 5.2397947314375415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137530 + }, + { + "epoch": 0.6670441442675742, + "grad_norm": 7.119461287175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137540 + }, + { + "epoch": 0.6670926424604103, + "grad_norm": 6.17576105810258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137550 + }, + { + "epoch": 0.6671411406532464, + "grad_norm": 5.344137932183912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137560 + }, + { + "epoch": 0.6671896388460825, + "grad_norm": 5.3051000037385165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137570 + }, + { + "epoch": 0.6672381370389185, + "grad_norm": 5.420864823690863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137580 + }, + { + "epoch": 0.6672866352317547, + "grad_norm": 6.117723927445695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137590 + }, + { + "epoch": 0.6673351334245907, + "grad_norm": 6.15604633935618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137600 + }, + { + "epoch": 0.6673836316174269, + "grad_norm": 5.421039972475228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137610 + }, + { + "epoch": 0.6674321298102629, + "grad_norm": 5.9356267456678324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137620 + }, + { + "epoch": 0.667480628003099, + "grad_norm": 5.5254368191981484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137630 + }, + { + "epoch": 0.6675291261959351, + "grad_norm": 6.016673381736837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137640 + }, + { + "epoch": 0.6675776243887712, + "grad_norm": 6.063704915959534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137650 + }, + { + "epoch": 0.6676261225816073, + "grad_norm": 5.1726889438441503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137660 + }, + { + "epoch": 0.6676746207744434, + "grad_norm": 5.249014023434029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137670 + }, + { + "epoch": 0.6677231189672794, + "grad_norm": 4.989633239915747e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137680 + }, + { + "epoch": 0.6677716171601156, + "grad_norm": 6.335664437528976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137690 + }, + { + "epoch": 0.6678201153529517, + "grad_norm": 5.909752687216496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137700 + }, + { + "epoch": 0.6678686135457877, + "grad_norm": 5.0857110522883886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137710 + }, + { + "epoch": 0.6679171117386239, + "grad_norm": 5.168059757920673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137720 + }, + { + "epoch": 0.6679656099314599, + "grad_norm": 5.248672962920864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137730 + }, + { + "epoch": 0.6680141081242961, + "grad_norm": 6.071110192351625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137740 + }, + { + "epoch": 0.6680626063171321, + "grad_norm": 5.786766266169252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137750 + }, + { + "epoch": 0.6681111045099682, + "grad_norm": 5.169748718003575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137760 + }, + { + "epoch": 0.6681596027028043, + "grad_norm": 5.234253919184084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137770 + }, + { + "epoch": 0.6682081008956404, + "grad_norm": 5.534317892852414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137780 + }, + { + "epoch": 0.6682565990884765, + "grad_norm": 6.326897050712432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137790 + }, + { + "epoch": 0.6683050972813126, + "grad_norm": 5.795935820174236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137800 + }, + { + "epoch": 0.6683535954741486, + "grad_norm": 5.119034085510066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137810 + }, + { + "epoch": 0.6684020936669848, + "grad_norm": 5.4777835600816616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137820 + }, + { + "epoch": 0.6684505918598208, + "grad_norm": 1.5496077310217515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137830 + }, + { + "epoch": 0.668499090052657, + "grad_norm": 6.113360484505392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137840 + }, + { + "epoch": 0.668547588245493, + "grad_norm": 5.883872589151906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137850 + }, + { + "epoch": 0.6685960864383291, + "grad_norm": 5.050126361538787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137860 + }, + { + "epoch": 0.6686445846311652, + "grad_norm": 5.263099467356369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137870 + }, + { + "epoch": 0.6686930828240013, + "grad_norm": 5.0919403804527974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137880 + }, + { + "epoch": 0.6687415810168373, + "grad_norm": 5.886771958785175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137890 + }, + { + "epoch": 0.6687900792096735, + "grad_norm": 5.988464124584425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137900 + }, + { + "epoch": 0.6688385774025095, + "grad_norm": 4.840740075451322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137910 + }, + { + "epoch": 0.6688870755953457, + "grad_norm": 4.975799683393234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137920 + }, + { + "epoch": 0.6689355737881817, + "grad_norm": 5.2215412438272324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137930 + }, + { + "epoch": 0.6689840719810178, + "grad_norm": 6.120431805811677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137940 + }, + { + "epoch": 0.6690325701738539, + "grad_norm": 6.313390343848369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137950 + }, + { + "epoch": 0.66908106836669, + "grad_norm": 4.8897863536012665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137960 + }, + { + "epoch": 0.669129566559526, + "grad_norm": 4.9624105713519384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137970 + }, + { + "epoch": 0.6691780647523622, + "grad_norm": 4.879741766217194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137980 + }, + { + "epoch": 0.6692265629451982, + "grad_norm": 5.7085429716607905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 137990 + }, + { + "epoch": 0.6692750611380344, + "grad_norm": 6.196898283405972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138000 + }, + { + "epoch": 0.6693235593308704, + "grad_norm": 4.974015865855108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138010 + }, + { + "epoch": 0.6693720575237065, + "grad_norm": 5.2555630958295296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138020 + }, + { + "epoch": 0.6694205557165426, + "grad_norm": 5.0043915678088524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138030 + }, + { + "epoch": 0.6694690539093787, + "grad_norm": 5.8381200318535775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138040 + }, + { + "epoch": 0.6695175521022148, + "grad_norm": 5.6758619138008726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138050 + }, + { + "epoch": 0.6695660502950509, + "grad_norm": 4.987416346580176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138060 + }, + { + "epoch": 0.6696145484878869, + "grad_norm": 4.841584555492773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138070 + }, + { + "epoch": 0.6696630466807231, + "grad_norm": 4.887035842671139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138080 + }, + { + "epoch": 0.6697115448735591, + "grad_norm": 5.807607905694567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138090 + }, + { + "epoch": 0.6697600430663952, + "grad_norm": 5.720785622997937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138100 + }, + { + "epoch": 0.6698085412592313, + "grad_norm": 4.9438902749443514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138110 + }, + { + "epoch": 0.6698570394520674, + "grad_norm": 4.666518904627992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138120 + }, + { + "epoch": 0.6699055376449035, + "grad_norm": 4.663310448904667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138130 + }, + { + "epoch": 0.6699540358377396, + "grad_norm": 1.2444724006854813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138140 + }, + { + "epoch": 0.6700025340305756, + "grad_norm": 5.844646366881534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138150 + }, + { + "epoch": 0.6700510322234118, + "grad_norm": 4.736474323863149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138160 + }, + { + "epoch": 0.6700995304162478, + "grad_norm": 5.8806637781572135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138170 + }, + { + "epoch": 0.670148028609084, + "grad_norm": 4.673650977338184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138180 + }, + { + "epoch": 0.67019652680192, + "grad_norm": 6.006147401649287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138190 + }, + { + "epoch": 0.6702450249947561, + "grad_norm": 5.84531072433947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138200 + }, + { + "epoch": 0.6702935231875923, + "grad_norm": 5.4197258236854395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138210 + }, + { + "epoch": 0.6703420213804283, + "grad_norm": 5.201528452403181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138220 + }, + { + "epoch": 0.6703905195732645, + "grad_norm": 4.689049148964841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138230 + }, + { + "epoch": 0.6704390177661005, + "grad_norm": 5.527262558757684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138240 + }, + { + "epoch": 0.6704875159589366, + "grad_norm": 5.601477326422355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138250 + }, + { + "epoch": 0.6705360141517727, + "grad_norm": 4.594247471345625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138260 + }, + { + "epoch": 0.6705845123446088, + "grad_norm": 5.1667143452505115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138270 + }, + { + "epoch": 0.6706330105374448, + "grad_norm": 4.549623611183051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138280 + }, + { + "epoch": 0.670681508730281, + "grad_norm": 5.5349087091371985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138290 + }, + { + "epoch": 0.670730006923117, + "grad_norm": 5.984882278653458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138300 + }, + { + "epoch": 0.6707785051159532, + "grad_norm": 4.7408178716068505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138310 + }, + { + "epoch": 0.6708270033087892, + "grad_norm": 4.758199523280382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138320 + }, + { + "epoch": 0.6708755015016253, + "grad_norm": 4.776065765099702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138330 + }, + { + "epoch": 0.6709239996944614, + "grad_norm": 6.062089852321151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138340 + }, + { + "epoch": 0.6709724978872975, + "grad_norm": 5.49679626260513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138350 + }, + { + "epoch": 0.6710209960801335, + "grad_norm": 4.742187798001396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138360 + }, + { + "epoch": 0.6710694942729697, + "grad_norm": 5.0314096000647623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138370 + }, + { + "epoch": 0.6711179924658057, + "grad_norm": 4.416505205995236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138380 + }, + { + "epoch": 0.6711664906586419, + "grad_norm": 6.77575826557586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138390 + }, + { + "epoch": 0.6712149888514779, + "grad_norm": 6.305279498519667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138400 + }, + { + "epoch": 0.671263487044314, + "grad_norm": 4.761054839264034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138410 + }, + { + "epoch": 0.6713119852371501, + "grad_norm": 4.6903807060516556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138420 + }, + { + "epoch": 0.6713604834299862, + "grad_norm": 4.625421112791628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138430 + }, + { + "epoch": 0.6714089816228223, + "grad_norm": 5.852368545333775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138440 + }, + { + "epoch": 0.6714574798156584, + "grad_norm": 5.736210439977185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138450 + }, + { + "epoch": 0.6715059780084944, + "grad_norm": 4.931673203145692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138460 + }, + { + "epoch": 0.6715544762013306, + "grad_norm": 4.5515065494328155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138470 + }, + { + "epoch": 0.6716029743941666, + "grad_norm": 5.3343782724368793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138480 + }, + { + "epoch": 0.6716514725870028, + "grad_norm": 5.354900878273838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138490 + }, + { + "epoch": 0.6716999707798388, + "grad_norm": 5.338383957109727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138500 + }, + { + "epoch": 0.6717484689726749, + "grad_norm": 4.632648042957044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138510 + }, + { + "epoch": 0.671796967165511, + "grad_norm": 4.5875729881572624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138520 + }, + { + "epoch": 0.6718454653583471, + "grad_norm": 4.489487537284731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138530 + }, + { + "epoch": 0.6718939635511831, + "grad_norm": 5.745513576016492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138540 + }, + { + "epoch": 0.6719424617440193, + "grad_norm": 5.684941584149783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138550 + }, + { + "epoch": 0.6719909599368553, + "grad_norm": 4.4769748797079956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138560 + }, + { + "epoch": 0.6720394581296915, + "grad_norm": 4.6294854172401756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138570 + }, + { + "epoch": 0.6720879563225275, + "grad_norm": 4.4960902556567817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138580 + }, + { + "epoch": 0.6721364545153636, + "grad_norm": 5.662310442744456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138590 + }, + { + "epoch": 0.6721849527081997, + "grad_norm": 5.973510042167618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138600 + }, + { + "epoch": 0.6722334509010358, + "grad_norm": 4.534701858460721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138610 + }, + { + "epoch": 0.6722819490938718, + "grad_norm": 4.6163410871713495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138620 + }, + { + "epoch": 0.672330447286708, + "grad_norm": 4.649079343721496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138630 + }, + { + "epoch": 0.672378945479544, + "grad_norm": 6.022889920132002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138640 + }, + { + "epoch": 0.6724274436723802, + "grad_norm": 5.308831418915361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138650 + }, + { + "epoch": 0.6724759418652162, + "grad_norm": 4.571367639982782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138660 + }, + { + "epoch": 0.6725244400580523, + "grad_norm": 4.815253973333711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138670 + }, + { + "epoch": 0.6725729382508884, + "grad_norm": 4.6345441262474196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138680 + }, + { + "epoch": 0.6726214364437245, + "grad_norm": 5.535709846071768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138690 + }, + { + "epoch": 0.6726699346365606, + "grad_norm": 5.643132183763555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138700 + }, + { + "epoch": 0.6727184328293967, + "grad_norm": 4.4859064018965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138710 + }, + { + "epoch": 0.6727669310222328, + "grad_norm": 4.519007390513252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138720 + }, + { + "epoch": 0.6728154292150689, + "grad_norm": 4.6661707386874696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138730 + }, + { + "epoch": 0.672863927407905, + "grad_norm": 5.374527489721004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138740 + }, + { + "epoch": 0.672912425600741, + "grad_norm": 5.2322452148700904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138750 + }, + { + "epoch": 0.6729609237935772, + "grad_norm": 4.3215294454057585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138760 + }, + { + "epoch": 0.6730094219864132, + "grad_norm": 4.682437904079961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138770 + }, + { + "epoch": 0.6730579201792494, + "grad_norm": 4.498450678624977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138780 + }, + { + "epoch": 0.6731064183720854, + "grad_norm": 5.564140437286369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138790 + }, + { + "epoch": 0.6731549165649215, + "grad_norm": 5.48952314716189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138800 + }, + { + "epoch": 0.6732034147577576, + "grad_norm": 4.5434713769054724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138810 + }, + { + "epoch": 0.6732519129505937, + "grad_norm": 4.481124094013467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138820 + }, + { + "epoch": 0.6733004111434298, + "grad_norm": 4.3154308571047295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138830 + }, + { + "epoch": 0.6733489093362659, + "grad_norm": 5.200217501055704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138840 + }, + { + "epoch": 0.6733974075291019, + "grad_norm": 5.405332714758515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138850 + }, + { + "epoch": 0.6734459057219381, + "grad_norm": 4.339566217481661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138860 + }, + { + "epoch": 0.6734944039147741, + "grad_norm": 4.408416032219975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138870 + }, + { + "epoch": 0.6735429021076103, + "grad_norm": 4.240623141527067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138880 + }, + { + "epoch": 0.6735914003004463, + "grad_norm": 5.202610964261112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138890 + }, + { + "epoch": 0.6736398984932824, + "grad_norm": 5.010653936210474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138900 + }, + { + "epoch": 0.6736883966861185, + "grad_norm": 4.834784306240181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138910 + }, + { + "epoch": 0.6737368948789546, + "grad_norm": 4.117726248864528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138920 + }, + { + "epoch": 0.6737853930717906, + "grad_norm": 4.182991375500933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138930 + }, + { + "epoch": 0.6738338912646268, + "grad_norm": 5.5406811583225135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138940 + }, + { + "epoch": 0.6738823894574628, + "grad_norm": 5.1416186863662006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138950 + }, + { + "epoch": 0.673930887650299, + "grad_norm": 4.3799339266570314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138960 + }, + { + "epoch": 0.673979385843135, + "grad_norm": 4.5585615282561776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138970 + }, + { + "epoch": 0.6740278840359711, + "grad_norm": 4.0482238006234184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138980 + }, + { + "epoch": 0.6740763822288072, + "grad_norm": 5.137176728453596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 138990 + }, + { + "epoch": 0.6741248804216433, + "grad_norm": 5.0065271040011794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139000 + }, + { + "epoch": 0.6741733786144793, + "grad_norm": 4.4635761753397674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139010 + }, + { + "epoch": 0.6742218768073155, + "grad_norm": 4.39952820840972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139020 + }, + { + "epoch": 0.6742703750001515, + "grad_norm": 1.351875340560582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139030 + }, + { + "epoch": 0.6743188731929877, + "grad_norm": 5.6055750263794835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139040 + }, + { + "epoch": 0.6743673713858237, + "grad_norm": 5.271808234397213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139050 + }, + { + "epoch": 0.6744158695786598, + "grad_norm": 4.510950901703836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139060 + }, + { + "epoch": 0.6744643677714959, + "grad_norm": 4.264989073021752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139070 + }, + { + "epoch": 0.674512865964332, + "grad_norm": 4.14669294457326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139080 + }, + { + "epoch": 0.674561364157168, + "grad_norm": 4.991893121086832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139090 + }, + { + "epoch": 0.6746098623500042, + "grad_norm": 4.8493838278318435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139100 + }, + { + "epoch": 0.6746583605428402, + "grad_norm": 4.154453137061864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139110 + }, + { + "epoch": 0.6747068587356764, + "grad_norm": 4.173312362354409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139120 + }, + { + "epoch": 0.6747553569285124, + "grad_norm": 4.218057014782062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139130 + }, + { + "epoch": 0.6748038551213486, + "grad_norm": 5.222661414450158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139140 + }, + { + "epoch": 0.6748523533141846, + "grad_norm": 5.89902491299199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139150 + }, + { + "epoch": 0.6749008515070207, + "grad_norm": 4.076845883105307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139160 + }, + { + "epoch": 0.6749493496998568, + "grad_norm": 4.189281810340617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139170 + }, + { + "epoch": 0.6749978478926929, + "grad_norm": 4.500227746007113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139180 + }, + { + "epoch": 0.6750463460855289, + "grad_norm": 5.4199091437112656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139190 + }, + { + "epoch": 0.6750948442783651, + "grad_norm": 5.309825823474057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139200 + }, + { + "epoch": 0.6751433424712011, + "grad_norm": 4.398807362804291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139210 + }, + { + "epoch": 0.6751918406640373, + "grad_norm": 4.457057656281904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139220 + }, + { + "epoch": 0.6752403388568734, + "grad_norm": 3.958572492024359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139230 + }, + { + "epoch": 0.6752888370497094, + "grad_norm": 4.9507708155260843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139240 + }, + { + "epoch": 0.6753373352425456, + "grad_norm": 4.952555343606946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139250 + }, + { + "epoch": 0.6753858334353816, + "grad_norm": 4.4922817465931075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139260 + }, + { + "epoch": 0.6754343316282178, + "grad_norm": 4.255023711152717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139270 + }, + { + "epoch": 0.6754828298210538, + "grad_norm": 4.151411658881443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139280 + }, + { + "epoch": 0.6755313280138899, + "grad_norm": 1.0402327177416737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139290 + }, + { + "epoch": 0.675579826206726, + "grad_norm": 5.193473029407869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139300 + }, + { + "epoch": 0.6756283243995621, + "grad_norm": 3.854081853660318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139310 + }, + { + "epoch": 0.6756768225923981, + "grad_norm": 4.0519193333921066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139320 + }, + { + "epoch": 0.6757253207852343, + "grad_norm": 3.970281881038318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139330 + }, + { + "epoch": 0.6757738189780703, + "grad_norm": 4.833492894817937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139340 + }, + { + "epoch": 0.6758223171709065, + "grad_norm": 5.0354991287804296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139350 + }, + { + "epoch": 0.6758708153637425, + "grad_norm": 3.939652160056539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139360 + }, + { + "epoch": 0.6759193135565786, + "grad_norm": 3.874986731489116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139370 + }, + { + "epoch": 0.6759678117494147, + "grad_norm": 3.964170858239413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139380 + }, + { + "epoch": 0.6760163099422508, + "grad_norm": 4.983072798836474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139390 + }, + { + "epoch": 0.6760648081350868, + "grad_norm": 5.0721254751806555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139400 + }, + { + "epoch": 0.676113306327923, + "grad_norm": 4.014323096157568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139410 + }, + { + "epoch": 0.676161804520759, + "grad_norm": 3.8267142343784144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139420 + }, + { + "epoch": 0.6762103027135952, + "grad_norm": 4.021662647346602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139430 + }, + { + "epoch": 0.6762588009064312, + "grad_norm": 5.5085838113200225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139440 + }, + { + "epoch": 0.6763072990992673, + "grad_norm": 5.189796681293046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139450 + }, + { + "epoch": 0.6763557972921034, + "grad_norm": 4.1822492136134315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139460 + }, + { + "epoch": 0.6764042954849395, + "grad_norm": 3.9683811792201595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139470 + }, + { + "epoch": 0.6764527936777756, + "grad_norm": 4.051732105381234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139480 + }, + { + "epoch": 0.6765012918706117, + "grad_norm": 4.823031218847973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139490 + }, + { + "epoch": 0.6765497900634477, + "grad_norm": 4.7009603321157556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139500 + }, + { + "epoch": 0.6765982882562839, + "grad_norm": 3.874057341590742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139510 + }, + { + "epoch": 0.6766467864491199, + "grad_norm": 4.3597548682328124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139520 + }, + { + "epoch": 0.676695284641956, + "grad_norm": 4.38563709792561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139530 + }, + { + "epoch": 0.6767437828347921, + "grad_norm": 4.827178301525237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139540 + }, + { + "epoch": 0.6767922810276282, + "grad_norm": 4.850994272942444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139550 + }, + { + "epoch": 0.6768407792204643, + "grad_norm": 3.9054366851587474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139560 + }, + { + "epoch": 0.6768892774133004, + "grad_norm": 4.372693496179636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139570 + }, + { + "epoch": 0.6769377756061364, + "grad_norm": 4.157933730652985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139580 + }, + { + "epoch": 0.6769862737989726, + "grad_norm": 4.698545907899643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139590 + }, + { + "epoch": 0.6770347719918086, + "grad_norm": 4.8063363067285536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139600 + }, + { + "epoch": 0.6770832701846448, + "grad_norm": 3.884547794541504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139610 + }, + { + "epoch": 0.6771317683774808, + "grad_norm": 3.760183986400989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139620 + }, + { + "epoch": 0.6771802665703169, + "grad_norm": 4.0613770124764415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139630 + }, + { + "epoch": 0.677228764763153, + "grad_norm": 4.935115072157714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139640 + }, + { + "epoch": 0.6772772629559891, + "grad_norm": 4.952291376980611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139650 + }, + { + "epoch": 0.6773257611488251, + "grad_norm": 3.778638912876886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139660 + }, + { + "epoch": 0.6773742593416613, + "grad_norm": 3.7817297737774425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139670 + }, + { + "epoch": 0.6774227575344973, + "grad_norm": 3.8313551442570315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139680 + }, + { + "epoch": 0.6774712557273335, + "grad_norm": 4.859965230252783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139690 + }, + { + "epoch": 0.6775197539201695, + "grad_norm": 4.642401307819455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139700 + }, + { + "epoch": 0.6775682521130056, + "grad_norm": 3.794297498416199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139710 + }, + { + "epoch": 0.6776167503058417, + "grad_norm": 3.9477729529835415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139720 + }, + { + "epoch": 0.6776652484986778, + "grad_norm": 3.8730217255533717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139730 + }, + { + "epoch": 0.677713746691514, + "grad_norm": 4.835628075738896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139740 + }, + { + "epoch": 0.67776224488435, + "grad_norm": 4.577712076070384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139750 + }, + { + "epoch": 0.6778107430771861, + "grad_norm": 3.845893559173419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139760 + }, + { + "epoch": 0.6778592412700222, + "grad_norm": 3.6212995269124804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139770 + }, + { + "epoch": 0.6779077394628583, + "grad_norm": 3.671141612926476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139780 + }, + { + "epoch": 0.6779562376556943, + "grad_norm": 5.259328972329058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139790 + }, + { + "epoch": 0.6780047358485305, + "grad_norm": 4.6931290853535756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139800 + }, + { + "epoch": 0.6780532340413665, + "grad_norm": 4.6108123541444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139810 + }, + { + "epoch": 0.6781017322342027, + "grad_norm": 3.8495521437198477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139820 + }, + { + "epoch": 0.6781502304270387, + "grad_norm": 3.835467410340243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139830 + }, + { + "epoch": 0.6781987286198748, + "grad_norm": 4.549103138629107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139840 + }, + { + "epoch": 0.6782472268127109, + "grad_norm": 4.4966917300826026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139850 + }, + { + "epoch": 0.678295725005547, + "grad_norm": 3.636630552250608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139860 + }, + { + "epoch": 0.6783442231983831, + "grad_norm": 3.610121979136238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139870 + }, + { + "epoch": 0.6783927213912192, + "grad_norm": 4.240740025807099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139880 + }, + { + "epoch": 0.6784412195840552, + "grad_norm": 4.89914029344618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139890 + }, + { + "epoch": 0.6784897177768914, + "grad_norm": 4.827409227914359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139900 + }, + { + "epoch": 0.6785382159697274, + "grad_norm": 3.684384708435573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139910 + }, + { + "epoch": 0.6785867141625636, + "grad_norm": 3.7567527755300034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139920 + }, + { + "epoch": 0.6786352123553996, + "grad_norm": 3.600830567052071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139930 + }, + { + "epoch": 0.6786837105482357, + "grad_norm": 4.52521007332507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139940 + }, + { + "epoch": 0.6787322087410718, + "grad_norm": 4.537602649179462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139950 + }, + { + "epoch": 0.6787807069339079, + "grad_norm": 3.723036812175451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139960 + }, + { + "epoch": 0.6788292051267439, + "grad_norm": 3.9635359883050114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139970 + }, + { + "epoch": 0.6788777033195801, + "grad_norm": 3.638739443090344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139980 + }, + { + "epoch": 0.6789262015124161, + "grad_norm": 4.551801424668156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 139990 + }, + { + "epoch": 0.6789746997052523, + "grad_norm": 4.8802217378352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140000 + }, + { + "epoch": 0.6790231978980883, + "grad_norm": 3.6952503279508164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140010 + }, + { + "epoch": 0.6790716960909244, + "grad_norm": 3.8334491136993165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140020 + }, + { + "epoch": 0.6791201942837605, + "grad_norm": 3.951121030354443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140030 + }, + { + "epoch": 0.6791686924765966, + "grad_norm": 4.668994790790748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140040 + }, + { + "epoch": 0.6792171906694326, + "grad_norm": 4.497824335203404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140050 + }, + { + "epoch": 0.6792656888622688, + "grad_norm": 4.915890983170357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140060 + }, + { + "epoch": 0.6793141870551048, + "grad_norm": 3.5211712656746386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140070 + }, + { + "epoch": 0.679362685247941, + "grad_norm": 3.4900278222949055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140080 + }, + { + "epoch": 0.679411183440777, + "grad_norm": 4.382002671832197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140090 + }, + { + "epoch": 0.6794596816336131, + "grad_norm": 4.628694227903907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140100 + }, + { + "epoch": 0.6795081798264492, + "grad_norm": 3.593122599454546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140110 + }, + { + "epoch": 0.6795566780192853, + "grad_norm": 3.595829412006424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140120 + }, + { + "epoch": 0.6796051762121214, + "grad_norm": 3.694712091828478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140130 + }, + { + "epoch": 0.6796536744049575, + "grad_norm": 4.530579289507841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140140 + }, + { + "epoch": 0.6797021725977935, + "grad_norm": 4.5982204710526275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140150 + }, + { + "epoch": 0.6797506707906297, + "grad_norm": 3.4630602385732345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140160 + }, + { + "epoch": 0.6797991689834657, + "grad_norm": 3.383937041689933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140170 + }, + { + "epoch": 0.6798476671763019, + "grad_norm": 3.6393572599990875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140180 + }, + { + "epoch": 0.6798961653691379, + "grad_norm": 4.3571567687195056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140190 + }, + { + "epoch": 0.679944663561974, + "grad_norm": 4.6259316377472715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140200 + }, + { + "epoch": 0.6799931617548101, + "grad_norm": 3.6390876090308666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140210 + }, + { + "epoch": 0.6800416599476462, + "grad_norm": 1.049931981356167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140220 + }, + { + "epoch": 0.6800901581404822, + "grad_norm": 3.9072105550985725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140230 + }, + { + "epoch": 0.6801386563333184, + "grad_norm": 4.4072027804986647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140240 + }, + { + "epoch": 0.6801871545261544, + "grad_norm": 4.717411172805441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140250 + }, + { + "epoch": 0.6802356527189906, + "grad_norm": 3.478946908330727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140260 + }, + { + "epoch": 0.6802841509118267, + "grad_norm": 3.6762944688462085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140270 + }, + { + "epoch": 0.6803326491046627, + "grad_norm": 3.559533823249694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140280 + }, + { + "epoch": 0.6803811472974989, + "grad_norm": 4.327457148178837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140290 + }, + { + "epoch": 0.6804296454903349, + "grad_norm": 4.637043105049088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140300 + }, + { + "epoch": 0.680478143683171, + "grad_norm": 3.433336814850918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140310 + }, + { + "epoch": 0.6805266418760071, + "grad_norm": 3.663663150632601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140320 + }, + { + "epoch": 0.6805751400688432, + "grad_norm": 3.699502570952973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140330 + }, + { + "epoch": 0.6806236382616793, + "grad_norm": 4.4043478197863806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140340 + }, + { + "epoch": 0.6806721364545154, + "grad_norm": 4.690705424081898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140350 + }, + { + "epoch": 0.6807206346473514, + "grad_norm": 3.6402372671773264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140360 + }, + { + "epoch": 0.6807691328401876, + "grad_norm": 3.590613673054577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140370 + }, + { + "epoch": 0.6808176310330236, + "grad_norm": 3.67352903651863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140380 + }, + { + "epoch": 0.6808661292258598, + "grad_norm": 4.6436703371455224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140390 + }, + { + "epoch": 0.6809146274186958, + "grad_norm": 4.505768913531938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140400 + }, + { + "epoch": 0.6809631256115319, + "grad_norm": 3.883807053739474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140410 + }, + { + "epoch": 0.681011623804368, + "grad_norm": 3.414407601098901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140420 + }, + { + "epoch": 0.6810601219972041, + "grad_norm": 3.768772316448121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140430 + }, + { + "epoch": 0.6811086201900401, + "grad_norm": 4.343984727483985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140440 + }, + { + "epoch": 0.6811571183828763, + "grad_norm": 4.414552634557367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140450 + }, + { + "epoch": 0.6812056165757123, + "grad_norm": 3.896722233776018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140460 + }, + { + "epoch": 0.6812541147685485, + "grad_norm": 3.734102449470811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140470 + }, + { + "epoch": 0.6813026129613845, + "grad_norm": 3.295972916816936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140480 + }, + { + "epoch": 0.6813511111542206, + "grad_norm": 4.429968569752418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140490 + }, + { + "epoch": 0.6813996093470567, + "grad_norm": 4.424499522315273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140500 + }, + { + "epoch": 0.6814481075398928, + "grad_norm": 3.315441077234027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140510 + }, + { + "epoch": 0.6814966057327289, + "grad_norm": 3.6966305572150304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140520 + }, + { + "epoch": 0.681545103925565, + "grad_norm": 3.291702910246386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140530 + }, + { + "epoch": 0.681593602118401, + "grad_norm": 5.285487603146066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140540 + }, + { + "epoch": 0.6816421003112372, + "grad_norm": 4.283885957079292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140550 + }, + { + "epoch": 0.6816905985040732, + "grad_norm": 3.2842837782709466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140560 + }, + { + "epoch": 0.6817390966969094, + "grad_norm": 3.378995927505457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140570 + }, + { + "epoch": 0.6817875948897454, + "grad_norm": 3.2325470300520465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140580 + }, + { + "epoch": 0.6818360930825815, + "grad_norm": 4.951314380718941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140590 + }, + { + "epoch": 0.6818845912754176, + "grad_norm": 4.133407571771386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140600 + }, + { + "epoch": 0.6819330894682537, + "grad_norm": 3.569423512317371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140610 + }, + { + "epoch": 0.6819815876610897, + "grad_norm": 4.0679200452586883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140620 + }, + { + "epoch": 0.6820300858539259, + "grad_norm": 3.5553714639036116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140630 + }, + { + "epoch": 0.6820785840467619, + "grad_norm": 4.040757772827419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140640 + }, + { + "epoch": 0.6821270822395981, + "grad_norm": 4.149634946770675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140650 + }, + { + "epoch": 0.6821755804324341, + "grad_norm": 3.236704415598979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140660 + }, + { + "epoch": 0.6822240786252702, + "grad_norm": 3.3276183586394836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140670 + }, + { + "epoch": 0.6822725768181063, + "grad_norm": 3.305426687916224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140680 + }, + { + "epoch": 0.6823210750109424, + "grad_norm": 4.1802319827866086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140690 + }, + { + "epoch": 0.6823695732037784, + "grad_norm": 4.335949554956642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140700 + }, + { + "epoch": 0.6824180713966146, + "grad_norm": 9.867294181731268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140710 + }, + { + "epoch": 0.6824665695894506, + "grad_norm": 3.363068046269291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140720 + }, + { + "epoch": 0.6825150677822868, + "grad_norm": 3.2412280859261955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140730 + }, + { + "epoch": 0.6825635659751228, + "grad_norm": 4.579977996854723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140740 + }, + { + "epoch": 0.6826120641679589, + "grad_norm": 4.2048860393606446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140750 + }, + { + "epoch": 0.682660562360795, + "grad_norm": 3.249391511417343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140760 + }, + { + "epoch": 0.6827090605536311, + "grad_norm": 3.2955895790109935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140770 + }, + { + "epoch": 0.6827575587464673, + "grad_norm": 5.7148223930880704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140780 + }, + { + "epoch": 0.6828060569393033, + "grad_norm": 4.085632809847084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140790 + }, + { + "epoch": 0.6828545551321394, + "grad_norm": 4.145334742133855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140800 + }, + { + "epoch": 0.6829030533249755, + "grad_norm": 3.5846078105805645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140810 + }, + { + "epoch": 0.6829515515178116, + "grad_norm": 3.743810950140869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140820 + }, + { + "epoch": 0.6830000497106476, + "grad_norm": 3.2647097469862274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140830 + }, + { + "epoch": 0.6830485479034838, + "grad_norm": 4.1094473601788195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140840 + }, + { + "epoch": 0.6830970460963198, + "grad_norm": 4.138583875601398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140850 + }, + { + "epoch": 0.683145544289156, + "grad_norm": 3.415290805719451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140860 + }, + { + "epoch": 0.683194042481992, + "grad_norm": 3.323740571659073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140870 + }, + { + "epoch": 0.6832425406748281, + "grad_norm": 3.176093343881803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140880 + }, + { + "epoch": 0.6832910388676642, + "grad_norm": 4.3752926615070464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140890 + }, + { + "epoch": 0.6833395370605003, + "grad_norm": 4.438073020196498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140900 + }, + { + "epoch": 0.6833880352533364, + "grad_norm": 3.156882044663689e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140910 + }, + { + "epoch": 0.6834365334461725, + "grad_norm": 3.4061603315649336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140920 + }, + { + "epoch": 0.6834850316390085, + "grad_norm": 3.642044532625732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140930 + }, + { + "epoch": 0.6835335298318447, + "grad_norm": 4.067669934215701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140940 + }, + { + "epoch": 0.6835820280246807, + "grad_norm": 4.14422949290838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140950 + }, + { + "epoch": 0.6836305262175169, + "grad_norm": 3.3233774843210995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140960 + }, + { + "epoch": 0.6836790244103529, + "grad_norm": 3.3866243143165775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140970 + }, + { + "epoch": 0.683727522603189, + "grad_norm": 3.313995833309491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140980 + }, + { + "epoch": 0.6837760207960251, + "grad_norm": 4.23860484488614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 140990 + }, + { + "epoch": 0.6838245189888612, + "grad_norm": 4.221249128022464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141000 + }, + { + "epoch": 0.6838730171816972, + "grad_norm": 3.056705111248448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141010 + }, + { + "epoch": 0.6839215153745334, + "grad_norm": 3.127186687379435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141020 + }, + { + "epoch": 0.6839700135673694, + "grad_norm": 3.647778967774684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141030 + }, + { + "epoch": 0.6840185117602056, + "grad_norm": 3.9332721968321493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141040 + }, + { + "epoch": 0.6840670099530416, + "grad_norm": 5.1834696535024705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141050 + }, + { + "epoch": 0.6841155081458777, + "grad_norm": 3.2444152253674474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141060 + }, + { + "epoch": 0.6841640063387138, + "grad_norm": 3.3147703248914695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141070 + }, + { + "epoch": 0.6842125045315499, + "grad_norm": 3.040364404682805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141080 + }, + { + "epoch": 0.684261002724386, + "grad_norm": 3.8636507326827996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141090 + }, + { + "epoch": 0.6843095009172221, + "grad_norm": 4.0791597655243095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141100 + }, + { + "epoch": 0.6843579991100581, + "grad_norm": 3.455333796864579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141110 + }, + { + "epoch": 0.6844064973028943, + "grad_norm": 3.2099599422963365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141120 + }, + { + "epoch": 0.6844549954957303, + "grad_norm": 3.3674137256411996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141130 + }, + { + "epoch": 0.6845034936885664, + "grad_norm": 4.1171748677015785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141140 + }, + { + "epoch": 0.6845519918814025, + "grad_norm": 4.1986755405787335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141150 + }, + { + "epoch": 0.6846004900742386, + "grad_norm": 3.161413530960999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141160 + }, + { + "epoch": 0.6846489882670747, + "grad_norm": 3.208261389886502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141170 + }, + { + "epoch": 0.6846974864599108, + "grad_norm": 3.424228367521209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141180 + }, + { + "epoch": 0.6847459846527468, + "grad_norm": 3.979318563551715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141190 + }, + { + "epoch": 0.684794482845583, + "grad_norm": 4.4727837433811146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141200 + }, + { + "epoch": 0.684842981038419, + "grad_norm": 3.1986669313255334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141210 + }, + { + "epoch": 0.6848914792312552, + "grad_norm": 3.004899085112811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141220 + }, + { + "epoch": 0.6849399774240912, + "grad_norm": 3.38313306258442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141230 + }, + { + "epoch": 0.6849884756169273, + "grad_norm": 3.948287385924232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141240 + }, + { + "epoch": 0.6850369738097634, + "grad_norm": 3.987404184613297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141250 + }, + { + "epoch": 0.6850854720025995, + "grad_norm": 3.339678045222172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141260 + }, + { + "epoch": 0.6851339701954355, + "grad_norm": 3.1349575380090755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141270 + }, + { + "epoch": 0.6851824683882717, + "grad_norm": 3.587568997431845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141280 + }, + { + "epoch": 0.6852309665811078, + "grad_norm": 3.854067642805603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141290 + }, + { + "epoch": 0.6852794647739439, + "grad_norm": 3.91387580123137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141300 + }, + { + "epoch": 0.68532796296678, + "grad_norm": 3.6071117648361906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141310 + }, + { + "epoch": 0.685376461159616, + "grad_norm": 3.0829237829266276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141320 + }, + { + "epoch": 0.6854249593524522, + "grad_norm": 4.403156239618511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141330 + }, + { + "epoch": 0.6854734575452882, + "grad_norm": 3.718024288446031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141340 + }, + { + "epoch": 0.6855219557381244, + "grad_norm": 3.796231595742938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141350 + }, + { + "epoch": 0.6855704539309604, + "grad_norm": 3.0650106452867476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141360 + }, + { + "epoch": 0.6856189521237965, + "grad_norm": 3.3706509583453226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141370 + }, + { + "epoch": 0.6856674503166326, + "grad_norm": 2.942298138464139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141380 + }, + { + "epoch": 0.6857159485094687, + "grad_norm": 3.828527894711442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141390 + }, + { + "epoch": 0.6857644467023047, + "grad_norm": 3.8339944552490124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141400 + }, + { + "epoch": 0.6858129448951409, + "grad_norm": 2.881497707107883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141410 + }, + { + "epoch": 0.6858614430879769, + "grad_norm": 3.0623215963032635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141420 + }, + { + "epoch": 0.6859099412808131, + "grad_norm": 3.088245037474735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141430 + }, + { + "epoch": 0.6859584394736491, + "grad_norm": 3.9313558630738044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141440 + }, + { + "epoch": 0.6860069376664852, + "grad_norm": 3.845430995852439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141450 + }, + { + "epoch": 0.6860554358593213, + "grad_norm": 4.1386481797189845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141460 + }, + { + "epoch": 0.6861039340521574, + "grad_norm": 2.8556478071095626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141470 + }, + { + "epoch": 0.6861524322449934, + "grad_norm": 2.9173630622381097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141480 + }, + { + "epoch": 0.6862009304378296, + "grad_norm": 3.8761058362979384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141490 + }, + { + "epoch": 0.6862494286306656, + "grad_norm": 4.049311996823235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141500 + }, + { + "epoch": 0.6862979268235018, + "grad_norm": 3.3069277094455174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141510 + }, + { + "epoch": 0.6863464250163378, + "grad_norm": 3.31510712214822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141520 + }, + { + "epoch": 0.686394923209174, + "grad_norm": 2.8870253743207286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141530 + }, + { + "epoch": 0.68644342140201, + "grad_norm": 3.7359992433039224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141540 + }, + { + "epoch": 0.6864919195948461, + "grad_norm": 3.848067109402109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141550 + }, + { + "epoch": 0.6865404177876822, + "grad_norm": 3.6222733257318396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141560 + }, + { + "epoch": 0.6865889159805183, + "grad_norm": 3.574539420014844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141570 + }, + { + "epoch": 0.6866374141733543, + "grad_norm": 2.8432079801632426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141580 + }, + { + "epoch": 0.6866859123661905, + "grad_norm": 3.7865223845301443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141590 + }, + { + "epoch": 0.6867344105590265, + "grad_norm": 4.185779189924688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141600 + }, + { + "epoch": 0.6867829087518627, + "grad_norm": 2.9233971687858684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141610 + }, + { + "epoch": 0.6868314069446987, + "grad_norm": 4.273029929890981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141620 + }, + { + "epoch": 0.6868799051375348, + "grad_norm": 2.882909733159522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141630 + }, + { + "epoch": 0.6869284033303709, + "grad_norm": 3.616775856585264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141640 + }, + { + "epoch": 0.686976901523207, + "grad_norm": 3.9172775245788216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141650 + }, + { + "epoch": 0.687025399716043, + "grad_norm": 3.3090508111399686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141660 + }, + { + "epoch": 0.6870738979088792, + "grad_norm": 2.722556224910022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141670 + }, + { + "epoch": 0.6871223961017152, + "grad_norm": 3.4257912062685136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141680 + }, + { + "epoch": 0.6871708942945514, + "grad_norm": 4.6817149268463254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141690 + }, + { + "epoch": 0.6872193924873874, + "grad_norm": 3.8487829812083874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141700 + }, + { + "epoch": 0.6872678906802235, + "grad_norm": 3.764134959283183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141710 + }, + { + "epoch": 0.6873163888730596, + "grad_norm": 2.901013118616902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141720 + }, + { + "epoch": 0.6873648870658957, + "grad_norm": 2.9909770660196955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141730 + }, + { + "epoch": 0.6874133852587317, + "grad_norm": 3.549494209664772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141740 + }, + { + "epoch": 0.6874618834515679, + "grad_norm": 3.542879412066213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141750 + }, + { + "epoch": 0.6875103816444039, + "grad_norm": 3.47052520055513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141760 + }, + { + "epoch": 0.6875588798372401, + "grad_norm": 2.8760254622284265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141770 + }, + { + "epoch": 0.6876073780300761, + "grad_norm": 3.0660768146617556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141780 + }, + { + "epoch": 0.6876558762229122, + "grad_norm": 3.6550432014337275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141790 + }, + { + "epoch": 0.6877043744157484, + "grad_norm": 3.565988748732707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141800 + }, + { + "epoch": 0.6877528726085844, + "grad_norm": 2.918330466172847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141810 + }, + { + "epoch": 0.6878013708014206, + "grad_norm": 2.8668630136508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141820 + }, + { + "epoch": 0.6878498689942566, + "grad_norm": 2.98397289100194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141830 + }, + { + "epoch": 0.6878983671870927, + "grad_norm": 3.64852041911945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141840 + }, + { + "epoch": 0.6879468653799288, + "grad_norm": 3.614689703113072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141850 + }, + { + "epoch": 0.6879953635727649, + "grad_norm": 3.085757427356839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141860 + }, + { + "epoch": 0.688043861765601, + "grad_norm": 2.8843716748383486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141870 + }, + { + "epoch": 0.6880923599584371, + "grad_norm": 2.915252927948586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141880 + }, + { + "epoch": 0.6881408581512731, + "grad_norm": 3.606234599828895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141890 + }, + { + "epoch": 0.6881893563441093, + "grad_norm": 3.695023664818109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141900 + }, + { + "epoch": 0.6882378545369453, + "grad_norm": 3.039955132067007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141910 + }, + { + "epoch": 0.6882863527297814, + "grad_norm": 3.009555626931615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141920 + }, + { + "epoch": 0.6883348509226175, + "grad_norm": 3.238969270569214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141930 + }, + { + "epoch": 0.6883833491154536, + "grad_norm": 4.7818630832807685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141940 + }, + { + "epoch": 0.6884318473082897, + "grad_norm": 3.922156821545286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141950 + }, + { + "epoch": 0.6884803455011258, + "grad_norm": 2.9854646754756686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141960 + }, + { + "epoch": 0.6885288436939618, + "grad_norm": 2.928193509887933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141970 + }, + { + "epoch": 0.688577341886798, + "grad_norm": 2.841008672760381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141980 + }, + { + "epoch": 0.688625840079634, + "grad_norm": 3.532104742021147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 141990 + }, + { + "epoch": 0.6886743382724702, + "grad_norm": 3.588870711723757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142000 + }, + { + "epoch": 0.6887228364653062, + "grad_norm": 2.98382545338427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142010 + }, + { + "epoch": 0.6887713346581423, + "grad_norm": 2.863711756617704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142020 + }, + { + "epoch": 0.6888198328509784, + "grad_norm": 2.918946861996119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142030 + }, + { + "epoch": 0.6888683310438145, + "grad_norm": 3.6343461573551394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142040 + }, + { + "epoch": 0.6889168292366505, + "grad_norm": 3.418390548404204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142050 + }, + { + "epoch": 0.6889653274294867, + "grad_norm": 2.819160194178494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142060 + }, + { + "epoch": 0.6890138256223227, + "grad_norm": 2.9645049082205333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142070 + }, + { + "epoch": 0.6890623238151589, + "grad_norm": 2.827914968861478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142080 + }, + { + "epoch": 0.6891108220079949, + "grad_norm": 3.5671209985821406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142090 + }, + { + "epoch": 0.689159320200831, + "grad_norm": 3.5902679940136295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142100 + }, + { + "epoch": 0.6892078183936671, + "grad_norm": 3.0847338905459765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142110 + }, + { + "epoch": 0.6892563165865032, + "grad_norm": 3.252253222285617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142120 + }, + { + "epoch": 0.6893048147793392, + "grad_norm": 2.9733607576076793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142130 + }, + { + "epoch": 0.6893533129721754, + "grad_norm": 4.930373975753355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142140 + }, + { + "epoch": 0.6894018111650114, + "grad_norm": 3.645940793717273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142150 + }, + { + "epoch": 0.6894503093578476, + "grad_norm": 2.8476090818685407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142160 + }, + { + "epoch": 0.6894988075506836, + "grad_norm": 3.146507410178856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142170 + }, + { + "epoch": 0.6895473057435197, + "grad_norm": 2.8102880023084253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142180 + }, + { + "epoch": 0.6895958039363558, + "grad_norm": 3.75987880829598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142190 + }, + { + "epoch": 0.6896443021291919, + "grad_norm": 3.4975837337469784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142200 + }, + { + "epoch": 0.689692800322028, + "grad_norm": 3.0716748256054416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142210 + }, + { + "epoch": 0.6897412985148641, + "grad_norm": 4.204630243975771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142220 + }, + { + "epoch": 0.6897897967077001, + "grad_norm": 2.94910105225199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142230 + }, + { + "epoch": 0.6898382949005363, + "grad_norm": 3.89527556876601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142240 + }, + { + "epoch": 0.6898867930933723, + "grad_norm": 3.393780900751153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142250 + }, + { + "epoch": 0.6899352912862085, + "grad_norm": 2.7917520739606516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142260 + }, + { + "epoch": 0.6899837894790445, + "grad_norm": 3.391868119706487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142270 + }, + { + "epoch": 0.6900322876718806, + "grad_norm": 3.0900235259423425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142280 + }, + { + "epoch": 0.6900807858647167, + "grad_norm": 3.578345086907575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142290 + }, + { + "epoch": 0.6901292840575528, + "grad_norm": 3.414833926740357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142300 + }, + { + "epoch": 0.690177782250389, + "grad_norm": 2.7422315085345872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142310 + }, + { + "epoch": 0.690226280443225, + "grad_norm": 2.8479510305601252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142320 + }, + { + "epoch": 0.6902747786360611, + "grad_norm": 2.6971269662112718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142330 + }, + { + "epoch": 0.6903232768288972, + "grad_norm": 3.2032836827511346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142340 + }, + { + "epoch": 0.6903717750217333, + "grad_norm": 4.566519606896691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142350 + }, + { + "epoch": 0.6904202732145693, + "grad_norm": 2.7854474282662522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142360 + }, + { + "epoch": 0.6904687714074055, + "grad_norm": 2.774287644058404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142370 + }, + { + "epoch": 0.6905172696002415, + "grad_norm": 2.841041357726226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142380 + }, + { + "epoch": 0.6905657677930777, + "grad_norm": 3.410540116988159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142390 + }, + { + "epoch": 0.6906142659859137, + "grad_norm": 3.405788717714131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142400 + }, + { + "epoch": 0.6906627641787498, + "grad_norm": 2.7590782991637752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142410 + }, + { + "epoch": 0.6907112623715859, + "grad_norm": 2.8764109316625763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142420 + }, + { + "epoch": 0.690759760564422, + "grad_norm": 3.429752126749008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142430 + }, + { + "epoch": 0.690808258757258, + "grad_norm": 3.6655634971793916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142440 + }, + { + "epoch": 0.6908567569500942, + "grad_norm": 4.0922660815567724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142450 + }, + { + "epoch": 0.6909052551429302, + "grad_norm": 2.9509187982057483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142460 + }, + { + "epoch": 0.6909537533357664, + "grad_norm": 3.2092550839024625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142470 + }, + { + "epoch": 0.6910022515286024, + "grad_norm": 2.999041015527837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142480 + }, + { + "epoch": 0.6910507497214385, + "grad_norm": 3.251355806810352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142490 + }, + { + "epoch": 0.6910992479142746, + "grad_norm": 3.474354670629509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142500 + }, + { + "epoch": 0.6911477461071107, + "grad_norm": 2.879735028216146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142510 + }, + { + "epoch": 0.6911962442999467, + "grad_norm": 2.8219785619398863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142520 + }, + { + "epoch": 0.6912447424927829, + "grad_norm": 2.8464747003908997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142530 + }, + { + "epoch": 0.6912932406856189, + "grad_norm": 3.5625365768510164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142540 + }, + { + "epoch": 0.6913417388784551, + "grad_norm": 3.262842085405282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142550 + }, + { + "epoch": 0.6913902370712911, + "grad_norm": 2.6225153604286788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142560 + }, + { + "epoch": 0.6914387352641272, + "grad_norm": 2.587307790236082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142570 + }, + { + "epoch": 0.6914872334569633, + "grad_norm": 2.5315172180739864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142580 + }, + { + "epoch": 0.6915357316497994, + "grad_norm": 3.430750439292751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142590 + }, + { + "epoch": 0.6915842298426355, + "grad_norm": 3.244809931857162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142600 + }, + { + "epoch": 0.6916327280354716, + "grad_norm": 2.684719113688061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142610 + }, + { + "epoch": 0.6916812262283076, + "grad_norm": 2.808301502454924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142620 + }, + { + "epoch": 0.6917297244211438, + "grad_norm": 2.74349041262667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142630 + }, + { + "epoch": 0.6917782226139798, + "grad_norm": 3.3663123844007714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142640 + }, + { + "epoch": 0.691826720806816, + "grad_norm": 3.2984242892553084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142650 + }, + { + "epoch": 0.691875218999652, + "grad_norm": 2.7267368807315506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142660 + }, + { + "epoch": 0.6919237171924881, + "grad_norm": 3.0266726014360756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142670 + }, + { + "epoch": 0.6919722153853242, + "grad_norm": 2.581096225640067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142680 + }, + { + "epoch": 0.6920207135781603, + "grad_norm": 3.1908708564287735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142690 + }, + { + "epoch": 0.6920692117709963, + "grad_norm": 3.279633276065397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142700 + }, + { + "epoch": 0.6921177099638325, + "grad_norm": 2.893741779530501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142710 + }, + { + "epoch": 0.6921662081566685, + "grad_norm": 2.604017801388636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142720 + }, + { + "epoch": 0.6922147063495047, + "grad_norm": 2.8233529292265303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142730 + }, + { + "epoch": 0.6922632045423407, + "grad_norm": 3.227799894034433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142740 + }, + { + "epoch": 0.6923117027351768, + "grad_norm": 3.357473232767916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142750 + }, + { + "epoch": 0.6923602009280129, + "grad_norm": 3.164728923366056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142760 + }, + { + "epoch": 0.692408699120849, + "grad_norm": 2.5938373227063494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142770 + }, + { + "epoch": 0.692457197313685, + "grad_norm": 2.6715561318724212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142780 + }, + { + "epoch": 0.6925056955065212, + "grad_norm": 3.0360027380993415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142790 + }, + { + "epoch": 0.6925541936993572, + "grad_norm": 3.209597565501099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142800 + }, + { + "epoch": 0.6926026918921934, + "grad_norm": 2.6884798387527553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142810 + }, + { + "epoch": 0.6926511900850295, + "grad_norm": 2.7864626161999695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142820 + }, + { + "epoch": 0.6926996882778655, + "grad_norm": 2.8104684801633084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142830 + }, + { + "epoch": 0.6927481864707017, + "grad_norm": 3.264988990281381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142840 + }, + { + "epoch": 0.6927966846635377, + "grad_norm": 3.257342484630499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142850 + }, + { + "epoch": 0.6928451828563739, + "grad_norm": 2.7919567102685505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142860 + }, + { + "epoch": 0.6928936810492099, + "grad_norm": 2.7107827094141612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142870 + }, + { + "epoch": 0.692942179242046, + "grad_norm": 2.6489242799243584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142880 + }, + { + "epoch": 0.6929906774348821, + "grad_norm": 3.142929827504304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142890 + }, + { + "epoch": 0.6930391756277182, + "grad_norm": 3.336691634103772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142900 + }, + { + "epoch": 0.6930876738205543, + "grad_norm": 2.6634584315843313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142910 + }, + { + "epoch": 0.6931361720133904, + "grad_norm": 2.44894255985173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142920 + }, + { + "epoch": 0.6931846702062264, + "grad_norm": 3.333042997155644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142930 + }, + { + "epoch": 0.6932331683990626, + "grad_norm": 3.203354381753343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142940 + }, + { + "epoch": 0.6932816665918986, + "grad_norm": 3.3687733491660765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142950 + }, + { + "epoch": 0.6933301647847347, + "grad_norm": 3.028220518785929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142960 + }, + { + "epoch": 0.6933786629775708, + "grad_norm": 2.9500833775841784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142970 + }, + { + "epoch": 0.6934271611704069, + "grad_norm": 2.7329946306053898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142980 + }, + { + "epoch": 0.693475659363243, + "grad_norm": 3.263082604121337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 142990 + }, + { + "epoch": 0.6935241575560791, + "grad_norm": 3.333655484993869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143000 + }, + { + "epoch": 0.6935726557489151, + "grad_norm": 2.5885267262992784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143010 + }, + { + "epoch": 0.6936211539417513, + "grad_norm": 2.8730289258760422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143020 + }, + { + "epoch": 0.6936696521345873, + "grad_norm": 2.657591835486528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143030 + }, + { + "epoch": 0.6937181503274235, + "grad_norm": 3.435958362274505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143040 + }, + { + "epoch": 0.6937666485202595, + "grad_norm": 3.37848433673571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143050 + }, + { + "epoch": 0.6938151467130956, + "grad_norm": 3.124048220115583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143060 + }, + { + "epoch": 0.6938636449059317, + "grad_norm": 2.580773461602348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143070 + }, + { + "epoch": 0.6939121430987678, + "grad_norm": 2.615824534757394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143080 + }, + { + "epoch": 0.6939606412916038, + "grad_norm": 4.03478317423378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143090 + }, + { + "epoch": 0.69400913948444, + "grad_norm": 3.821663341341264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143100 + }, + { + "epoch": 0.694057637677276, + "grad_norm": 2.6010757991912214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143110 + }, + { + "epoch": 0.6941061358701122, + "grad_norm": 2.9448518290564607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143120 + }, + { + "epoch": 0.6941546340629482, + "grad_norm": 2.6797957630719793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143130 + }, + { + "epoch": 0.6942031322557843, + "grad_norm": 3.585655861115811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143140 + }, + { + "epoch": 0.6942516304486204, + "grad_norm": 2.976474711147148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143150 + }, + { + "epoch": 0.6943001286414565, + "grad_norm": 2.5696957450804803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143160 + }, + { + "epoch": 0.6943486268342925, + "grad_norm": 2.447781533021498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143170 + }, + { + "epoch": 0.6943971250271287, + "grad_norm": 2.744672933374659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143180 + }, + { + "epoch": 0.6944456232199647, + "grad_norm": 3.211904697764112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143190 + }, + { + "epoch": 0.6944941214128009, + "grad_norm": 3.2766660496008626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143200 + }, + { + "epoch": 0.6945426196056369, + "grad_norm": 3.323862074466888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143210 + }, + { + "epoch": 0.694591117798473, + "grad_norm": 2.6496826066590984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143220 + }, + { + "epoch": 0.6946396159913091, + "grad_norm": 2.5595797481514637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143230 + }, + { + "epoch": 0.6946881141841452, + "grad_norm": 3.2620139478467536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143240 + }, + { + "epoch": 0.6947366123769813, + "grad_norm": 3.251726710118419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143250 + }, + { + "epoch": 0.6947851105698174, + "grad_norm": 2.575237800783725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143260 + }, + { + "epoch": 0.6948336087626534, + "grad_norm": 2.5779621992683133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143270 + }, + { + "epoch": 0.6948821069554896, + "grad_norm": 2.7549779346713876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143280 + }, + { + "epoch": 0.6949306051483256, + "grad_norm": 3.079900778857336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143290 + }, + { + "epoch": 0.6949791033411618, + "grad_norm": 6.830620691289369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143300 + }, + { + "epoch": 0.6950276015339978, + "grad_norm": 2.3639755042381694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143310 + }, + { + "epoch": 0.6950760997268339, + "grad_norm": 2.532510379182895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143320 + }, + { + "epoch": 0.6951245979196701, + "grad_norm": 3.097009226848968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143330 + }, + { + "epoch": 0.6951730961125061, + "grad_norm": 3.0506406289987353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143340 + }, + { + "epoch": 0.6952215943053422, + "grad_norm": 3.074979204598094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143350 + }, + { + "epoch": 0.6952700924981783, + "grad_norm": 2.4631232165006622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143360 + }, + { + "epoch": 0.6953185906910144, + "grad_norm": 2.3058484899252107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143370 + }, + { + "epoch": 0.6953670888838505, + "grad_norm": 3.0299425191060436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143380 + }, + { + "epoch": 0.6954155870766866, + "grad_norm": 3.318092822723884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143390 + }, + { + "epoch": 0.6954640852695226, + "grad_norm": 2.890119255027912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143400 + }, + { + "epoch": 0.6955125834623588, + "grad_norm": 2.4988393576563794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143410 + }, + { + "epoch": 0.6955610816551948, + "grad_norm": 2.438869906029595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143420 + }, + { + "epoch": 0.695609579848031, + "grad_norm": 3.529257952550324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143430 + }, + { + "epoch": 0.695658078040867, + "grad_norm": 2.943722599013654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143440 + }, + { + "epoch": 0.6957065762337031, + "grad_norm": 2.8132616236575814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143450 + }, + { + "epoch": 0.6957550744265392, + "grad_norm": 2.4664123188244957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143460 + }, + { + "epoch": 0.6958035726193753, + "grad_norm": 2.752773120562324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143470 + }, + { + "epoch": 0.6958520708122113, + "grad_norm": 2.7359073229149544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143480 + }, + { + "epoch": 0.6959005690050475, + "grad_norm": 3.015840022158045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143490 + }, + { + "epoch": 0.6959490671978835, + "grad_norm": 3.038048035364227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143500 + }, + { + "epoch": 0.6959975653907197, + "grad_norm": 2.7339529751202463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143510 + }, + { + "epoch": 0.6960460635835557, + "grad_norm": 2.639240115343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143520 + }, + { + "epoch": 0.6960945617763918, + "grad_norm": 2.447452907006209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143530 + }, + { + "epoch": 0.6961430599692279, + "grad_norm": 3.09129255526841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143540 + }, + { + "epoch": 0.696191558162064, + "grad_norm": 2.9578163918131395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143550 + }, + { + "epoch": 0.6962400563549, + "grad_norm": 2.4832186085177455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143560 + }, + { + "epoch": 0.6962885545477362, + "grad_norm": 2.5448356311130738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143570 + }, + { + "epoch": 0.6963370527405722, + "grad_norm": 2.4989004643316548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143580 + }, + { + "epoch": 0.6963855509334084, + "grad_norm": 3.091706801683358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143590 + }, + { + "epoch": 0.6964340491262444, + "grad_norm": 3.1825248214545354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143600 + }, + { + "epoch": 0.6964825473190805, + "grad_norm": 2.42056792387757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143610 + }, + { + "epoch": 0.6965310455119166, + "grad_norm": 2.5565592309817475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143620 + }, + { + "epoch": 0.6965795437047527, + "grad_norm": 2.5509459433692427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143630 + }, + { + "epoch": 0.6966280418975888, + "grad_norm": 3.1821361545780746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143640 + }, + { + "epoch": 0.6966765400904249, + "grad_norm": 2.9504761300813698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143650 + }, + { + "epoch": 0.6967250382832609, + "grad_norm": 2.3058909448536724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143660 + }, + { + "epoch": 0.6967735364760971, + "grad_norm": 2.3811621119307347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143670 + }, + { + "epoch": 0.6968220346689331, + "grad_norm": 3.140193527428892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143680 + }, + { + "epoch": 0.6968705328617693, + "grad_norm": 3.481458676901639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143690 + }, + { + "epoch": 0.6969190310546053, + "grad_norm": 3.139270887686507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143700 + }, + { + "epoch": 0.6969675292474414, + "grad_norm": 2.9648180799313195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143710 + }, + { + "epoch": 0.6970160274402775, + "grad_norm": 2.6118353702031527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143720 + }, + { + "epoch": 0.6970645256331136, + "grad_norm": 2.4954983857128354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143730 + }, + { + "epoch": 0.6971130238259496, + "grad_norm": 2.862879178167077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143740 + }, + { + "epoch": 0.6971615220187858, + "grad_norm": 3.314291419087567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143750 + }, + { + "epoch": 0.6972100202116218, + "grad_norm": 2.367572093930903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143760 + }, + { + "epoch": 0.697258518404458, + "grad_norm": 3.576342066935467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143770 + }, + { + "epoch": 0.697307016597294, + "grad_norm": 2.4464794634582177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143780 + }, + { + "epoch": 0.6973555147901301, + "grad_norm": 3.079379951032024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143790 + }, + { + "epoch": 0.6974040129829662, + "grad_norm": 2.7125443224917944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143800 + }, + { + "epoch": 0.6974525111758023, + "grad_norm": 2.8839400201263743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143810 + }, + { + "epoch": 0.6975010093686383, + "grad_norm": 2.434835622011633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143820 + }, + { + "epoch": 0.6975495075614745, + "grad_norm": 2.6073013970062675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143830 + }, + { + "epoch": 0.6975980057543106, + "grad_norm": 2.91999242563179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143840 + }, + { + "epoch": 0.6976465039471467, + "grad_norm": 3.961037720046079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143850 + }, + { + "epoch": 0.6976950021399828, + "grad_norm": 2.6753884441177433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143860 + }, + { + "epoch": 0.6977435003328188, + "grad_norm": 2.6606794989447735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143870 + }, + { + "epoch": 0.697791998525655, + "grad_norm": 2.462010684212146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143880 + }, + { + "epoch": 0.697840496718491, + "grad_norm": 3.404284143471159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143890 + }, + { + "epoch": 0.6978889949113272, + "grad_norm": 2.7097401655851172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143900 + }, + { + "epoch": 0.6979374931041632, + "grad_norm": 2.5155816985034107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143910 + }, + { + "epoch": 0.6979859912969993, + "grad_norm": 2.6077699999405013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143920 + }, + { + "epoch": 0.6980344894898354, + "grad_norm": 2.4555312450047495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143930 + }, + { + "epoch": 0.6980829876826715, + "grad_norm": 2.789449560225421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143940 + }, + { + "epoch": 0.6981314858755076, + "grad_norm": 2.8720233302692577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143950 + }, + { + "epoch": 0.6981799840683437, + "grad_norm": 2.994331538275219e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143960 + }, + { + "epoch": 0.6982284822611797, + "grad_norm": 2.679180433062811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143970 + }, + { + "epoch": 0.6982769804540159, + "grad_norm": 2.5192601782464408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143980 + }, + { + "epoch": 0.6983254786468519, + "grad_norm": 3.2775901104287186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 143990 + }, + { + "epoch": 0.698373976839688, + "grad_norm": 3.119566116538408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144000 + }, + { + "epoch": 0.6984224750325241, + "grad_norm": 2.5038540130140063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144010 + }, + { + "epoch": 0.6984709732253602, + "grad_norm": 2.3445922536780017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144020 + }, + { + "epoch": 0.6985194714181963, + "grad_norm": 2.6066938829671926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144030 + }, + { + "epoch": 0.6985679696110324, + "grad_norm": 3.208186782899247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144040 + }, + { + "epoch": 0.6986164678038684, + "grad_norm": 3.663429026801168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144050 + }, + { + "epoch": 0.6986649659967046, + "grad_norm": 2.534720522362477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144060 + }, + { + "epoch": 0.6987134641895406, + "grad_norm": 2.4027109191138152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144070 + }, + { + "epoch": 0.6987619623823768, + "grad_norm": 2.260910747509115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144080 + }, + { + "epoch": 0.6988104605752128, + "grad_norm": 2.9512387200725243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144090 + }, + { + "epoch": 0.6988589587680489, + "grad_norm": 2.973219714874631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144100 + }, + { + "epoch": 0.698907456960885, + "grad_norm": 2.3365654300278038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144110 + }, + { + "epoch": 0.6989559551537211, + "grad_norm": 2.4501144224586824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144120 + }, + { + "epoch": 0.6990044533465571, + "grad_norm": 2.562564560548708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144130 + }, + { + "epoch": 0.6990529515393933, + "grad_norm": 2.988437586282089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144140 + }, + { + "epoch": 0.6991014497322293, + "grad_norm": 2.750012484398212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144150 + }, + { + "epoch": 0.6991499479250655, + "grad_norm": 2.623764672193829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144160 + }, + { + "epoch": 0.6991984461179015, + "grad_norm": 2.3715955421721446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144170 + }, + { + "epoch": 0.6992469443107376, + "grad_norm": 2.6773703254434622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144180 + }, + { + "epoch": 0.6992954425035737, + "grad_norm": 3.0419794683211876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144190 + }, + { + "epoch": 0.6993439406964098, + "grad_norm": 2.8489274939147435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144200 + }, + { + "epoch": 0.6993924388892458, + "grad_norm": 2.893013295590663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144210 + }, + { + "epoch": 0.699440937082082, + "grad_norm": 2.3079225641708945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144220 + }, + { + "epoch": 0.699489435274918, + "grad_norm": 2.4810859144963615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144230 + }, + { + "epoch": 0.6995379334677542, + "grad_norm": 2.7504587052362695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144240 + }, + { + "epoch": 0.6995864316605902, + "grad_norm": 2.686735811607832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144250 + }, + { + "epoch": 0.6996349298534263, + "grad_norm": 2.362762607788227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144260 + }, + { + "epoch": 0.6996834280462624, + "grad_norm": 2.284659039730741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144270 + }, + { + "epoch": 0.6997319262390985, + "grad_norm": 2.8029161214249143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144280 + }, + { + "epoch": 0.6997804244319346, + "grad_norm": 2.8809337138113733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144290 + }, + { + "epoch": 0.6998289226247707, + "grad_norm": 3.243275514819288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144300 + }, + { + "epoch": 0.6998774208176067, + "grad_norm": 2.853813541037198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144310 + }, + { + "epoch": 0.6999259190104429, + "grad_norm": 2.533224652268018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144320 + }, + { + "epoch": 0.6999744172032789, + "grad_norm": 2.360091144737453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144330 + }, + { + "epoch": 0.700022915396115, + "grad_norm": 2.7744425423747998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144340 + }, + { + "epoch": 0.7000714135889512, + "grad_norm": 2.994941894485237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144350 + }, + { + "epoch": 0.7001199117817872, + "grad_norm": 2.4587833991063235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144360 + }, + { + "epoch": 0.7001684099746234, + "grad_norm": 2.5155795668752035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144370 + }, + { + "epoch": 0.7002169081674594, + "grad_norm": 2.2856173842455973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144380 + }, + { + "epoch": 0.7002654063602956, + "grad_norm": 2.987217584404789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144390 + }, + { + "epoch": 0.7003139045531316, + "grad_norm": 2.9349413566137628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144400 + }, + { + "epoch": 0.7003624027459677, + "grad_norm": 2.2810382915849914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144410 + }, + { + "epoch": 0.7004109009388038, + "grad_norm": 2.6554550558444134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144420 + }, + { + "epoch": 0.7004593991316399, + "grad_norm": 3.1779904929862823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144430 + }, + { + "epoch": 0.7005078973244759, + "grad_norm": 2.7329537743980836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144440 + }, + { + "epoch": 0.7005563955173121, + "grad_norm": 3.136949189297411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144450 + }, + { + "epoch": 0.7006048937101481, + "grad_norm": 2.5012242943489582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144460 + }, + { + "epoch": 0.7006533919029843, + "grad_norm": 2.2292535817314274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144470 + }, + { + "epoch": 0.7007018900958203, + "grad_norm": 2.6350567949862125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144480 + }, + { + "epoch": 0.7007503882886564, + "grad_norm": 3.3712137081920446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144490 + }, + { + "epoch": 0.7007988864814925, + "grad_norm": 2.621306727235151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144500 + }, + { + "epoch": 0.7008473846743286, + "grad_norm": 2.912895702422702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144510 + }, + { + "epoch": 0.7008958828671646, + "grad_norm": 3.403224013709405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144520 + }, + { + "epoch": 0.7009443810600008, + "grad_norm": 2.3913935720543122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144530 + }, + { + "epoch": 0.7009928792528368, + "grad_norm": 3.142307747339146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144540 + }, + { + "epoch": 0.701041377445673, + "grad_norm": 2.916737251723589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144550 + }, + { + "epoch": 0.701089875638509, + "grad_norm": 2.5744219200873886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144560 + }, + { + "epoch": 0.7011383738313451, + "grad_norm": 2.7061055618560204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144570 + }, + { + "epoch": 0.7011868720241812, + "grad_norm": 2.8123466222496063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144580 + }, + { + "epoch": 0.7012353702170173, + "grad_norm": 3.3232470997290875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144590 + }, + { + "epoch": 0.7012838684098534, + "grad_norm": 2.558349443404495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144600 + }, + { + "epoch": 0.7013323666026895, + "grad_norm": 2.748137717389909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144610 + }, + { + "epoch": 0.7013808647955255, + "grad_norm": 2.5535268122212074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144620 + }, + { + "epoch": 0.7014293629883617, + "grad_norm": 2.6449136214523605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144630 + }, + { + "epoch": 0.7014778611811977, + "grad_norm": 3.1287225255027806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144640 + }, + { + "epoch": 0.7015263593740338, + "grad_norm": 2.8667162865758655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144650 + }, + { + "epoch": 0.7015748575668699, + "grad_norm": 2.8955399855590258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144660 + }, + { + "epoch": 0.701623355759706, + "grad_norm": 3.0469731626681096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144670 + }, + { + "epoch": 0.7016718539525421, + "grad_norm": 5.025054150564756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144680 + }, + { + "epoch": 0.7017203521453782, + "grad_norm": 3.0329641020898634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144690 + }, + { + "epoch": 0.7017688503382142, + "grad_norm": 2.6849479084489758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144700 + }, + { + "epoch": 0.7018173485310504, + "grad_norm": 2.5112649737479842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144710 + }, + { + "epoch": 0.7018658467238864, + "grad_norm": 2.7288706405670382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144720 + }, + { + "epoch": 0.7019143449167226, + "grad_norm": 2.2713976477461983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144730 + }, + { + "epoch": 0.7019628431095586, + "grad_norm": 3.20197734993144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144740 + }, + { + "epoch": 0.7020113413023947, + "grad_norm": 2.755633943252178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144750 + }, + { + "epoch": 0.7020598394952308, + "grad_norm": 2.865557036102473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144760 + }, + { + "epoch": 0.7021083376880669, + "grad_norm": 2.4337982296174232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144770 + }, + { + "epoch": 0.7021568358809029, + "grad_norm": 2.9264365153380822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144780 + }, + { + "epoch": 0.7022053340737391, + "grad_norm": 2.9428859349422964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144790 + }, + { + "epoch": 0.7022538322665751, + "grad_norm": 2.6729422231142053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144800 + }, + { + "epoch": 0.7023023304594113, + "grad_norm": 2.972025292535818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144810 + }, + { + "epoch": 0.7023508286522473, + "grad_norm": 3.160898742748941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144820 + }, + { + "epoch": 0.7023993268450834, + "grad_norm": 2.7634838417611718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144830 + }, + { + "epoch": 0.7024478250379195, + "grad_norm": 3.0845946241697675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144840 + }, + { + "epoch": 0.7024963232307556, + "grad_norm": 2.651356822980233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144850 + }, + { + "epoch": 0.7025448214235918, + "grad_norm": 2.8311598399000104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144860 + }, + { + "epoch": 0.7025933196164278, + "grad_norm": 2.676038590720964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144870 + }, + { + "epoch": 0.7026418178092639, + "grad_norm": 3.031493633898208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144880 + }, + { + "epoch": 0.7026903160021, + "grad_norm": 3.2469770872012305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144890 + }, + { + "epoch": 0.7027388141949361, + "grad_norm": 3.0803118278299735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144900 + }, + { + "epoch": 0.7027873123877721, + "grad_norm": 2.5066162478992737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144910 + }, + { + "epoch": 0.7028358105806083, + "grad_norm": 2.558026501731092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144920 + }, + { + "epoch": 0.7028843087734443, + "grad_norm": 2.39359767562064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144930 + }, + { + "epoch": 0.7029328069662805, + "grad_norm": 2.8083421810265463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144940 + }, + { + "epoch": 0.7029813051591165, + "grad_norm": 2.8875437152464656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144950 + }, + { + "epoch": 0.7030298033519526, + "grad_norm": 2.47761295923965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144960 + }, + { + "epoch": 0.7030783015447887, + "grad_norm": 2.4906940510049935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144970 + }, + { + "epoch": 0.7031267997376248, + "grad_norm": 2.7768720656240475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144980 + }, + { + "epoch": 0.7031752979304609, + "grad_norm": 2.6286526733088067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 144990 + }, + { + "epoch": 0.703223796123297, + "grad_norm": 3.26242179937708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145000 + }, + { + "epoch": 0.703272294316133, + "grad_norm": 2.7586532169721067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145010 + }, + { + "epoch": 0.7033207925089692, + "grad_norm": 2.7933076296449144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145020 + }, + { + "epoch": 0.7033692907018052, + "grad_norm": 2.4467727399724026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145030 + }, + { + "epoch": 0.7034177888946413, + "grad_norm": 2.7329839724643534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145040 + }, + { + "epoch": 0.7034662870874774, + "grad_norm": 2.9257279265948455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145050 + }, + { + "epoch": 0.7035147852803135, + "grad_norm": 4.123386787568961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145060 + }, + { + "epoch": 0.7035632834731496, + "grad_norm": 2.5663661418207084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145070 + }, + { + "epoch": 0.7036117816659857, + "grad_norm": 2.4743242121871845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145080 + }, + { + "epoch": 0.7036602798588217, + "grad_norm": 2.7508965771971816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145090 + }, + { + "epoch": 0.7037087780516579, + "grad_norm": 3.0151486640761505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145100 + }, + { + "epoch": 0.7037572762444939, + "grad_norm": 3.040935325770988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145110 + }, + { + "epoch": 0.7038057744373301, + "grad_norm": 2.896203277202858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145120 + }, + { + "epoch": 0.7038542726301661, + "grad_norm": 2.830269529852103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145130 + }, + { + "epoch": 0.7039027708230022, + "grad_norm": 3.0459304412033816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145140 + }, + { + "epoch": 0.7039512690158383, + "grad_norm": 2.690260814119938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145150 + }, + { + "epoch": 0.7039997672086744, + "grad_norm": 3.149201788232858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145160 + }, + { + "epoch": 0.7040482654015104, + "grad_norm": 2.8596028656124872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145170 + }, + { + "epoch": 0.7040967635943466, + "grad_norm": 2.8878131885790026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145180 + }, + { + "epoch": 0.7041452617871826, + "grad_norm": 2.8457087353217503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145190 + }, + { + "epoch": 0.7041937599800188, + "grad_norm": 3.0260210337473836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145200 + }, + { + "epoch": 0.7042422581728548, + "grad_norm": 3.2460590659866284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145210 + }, + { + "epoch": 0.7042907563656909, + "grad_norm": 3.3132923960010885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145220 + }, + { + "epoch": 0.704339254558527, + "grad_norm": 2.3569816320900827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145230 + }, + { + "epoch": 0.7043877527513631, + "grad_norm": 2.9343194540842887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145240 + }, + { + "epoch": 0.7044362509441991, + "grad_norm": 2.7114811729234134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145250 + }, + { + "epoch": 0.7044847491370353, + "grad_norm": 2.726246606243876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145260 + }, + { + "epoch": 0.7045332473298713, + "grad_norm": 2.7699885052356876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145270 + }, + { + "epoch": 0.7045817455227075, + "grad_norm": 2.39666029244745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145280 + }, + { + "epoch": 0.7046302437155435, + "grad_norm": 2.9235296850060877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145290 + }, + { + "epoch": 0.7046787419083796, + "grad_norm": 3.1057957983193774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145300 + }, + { + "epoch": 0.7047272401012157, + "grad_norm": 2.5748256859969842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145310 + }, + { + "epoch": 0.7047757382940518, + "grad_norm": 2.6901014749114438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145320 + }, + { + "epoch": 0.7048242364868879, + "grad_norm": 2.5996744312806186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145330 + }, + { + "epoch": 0.704872734679724, + "grad_norm": 2.7692129478396055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145340 + }, + { + "epoch": 0.70492123287256, + "grad_norm": 3.200365483735368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145350 + }, + { + "epoch": 0.7049697310653962, + "grad_norm": 2.885673922037313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145360 + }, + { + "epoch": 0.7050182292582322, + "grad_norm": 5.2451678556053594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145370 + }, + { + "epoch": 0.7050667274510684, + "grad_norm": 2.5842886941518373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145380 + }, + { + "epoch": 0.7051152256439045, + "grad_norm": 2.86816010941493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145390 + }, + { + "epoch": 0.7051637238367405, + "grad_norm": 2.845037627707825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145400 + }, + { + "epoch": 0.7052122220295767, + "grad_norm": 2.530378218068563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145410 + }, + { + "epoch": 0.7052607202224127, + "grad_norm": 2.4873330062291643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145420 + }, + { + "epoch": 0.7053092184152489, + "grad_norm": 2.795406217614982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145430 + }, + { + "epoch": 0.7053577166080849, + "grad_norm": 2.8352340919468588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145440 + }, + { + "epoch": 0.705406214800921, + "grad_norm": 2.7799861967992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145450 + }, + { + "epoch": 0.7054547129937571, + "grad_norm": 2.765197670839825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145460 + }, + { + "epoch": 0.7055032111865932, + "grad_norm": 2.6613557579935332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145470 + }, + { + "epoch": 0.7055517093794292, + "grad_norm": 2.7634238009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145480 + }, + { + "epoch": 0.7056002075722654, + "grad_norm": 3.0368525472113106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145490 + }, + { + "epoch": 0.7056487057651014, + "grad_norm": 2.6826141308333717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145500 + }, + { + "epoch": 0.7056972039579376, + "grad_norm": 3.1910055042772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145510 + }, + { + "epoch": 0.7057457021507736, + "grad_norm": 2.5419373272939083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145520 + }, + { + "epoch": 0.7057942003436097, + "grad_norm": 2.5460424879497623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145530 + }, + { + "epoch": 0.7058426985364458, + "grad_norm": 2.8906709914622297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145540 + }, + { + "epoch": 0.7058911967292819, + "grad_norm": 3.001678905434346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145550 + }, + { + "epoch": 0.7059396949221179, + "grad_norm": 2.6141110609501084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145560 + }, + { + "epoch": 0.7059881931149541, + "grad_norm": 2.6565411204160227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145570 + }, + { + "epoch": 0.7060366913077901, + "grad_norm": 2.650679320481686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145580 + }, + { + "epoch": 0.7060851895006263, + "grad_norm": 3.502219669826445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145590 + }, + { + "epoch": 0.7061336876934623, + "grad_norm": 3.945452320408549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145600 + }, + { + "epoch": 0.7061821858862984, + "grad_norm": 2.910988428084238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145610 + }, + { + "epoch": 0.7062306840791345, + "grad_norm": 4.5652761571091105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145620 + }, + { + "epoch": 0.7062791822719706, + "grad_norm": 2.6904551475581684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145630 + }, + { + "epoch": 0.7063276804648067, + "grad_norm": 3.098119094602225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145640 + }, + { + "epoch": 0.7063761786576428, + "grad_norm": 2.9687639013786793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145650 + }, + { + "epoch": 0.7064246768504788, + "grad_norm": 3.3100732821367274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145660 + }, + { + "epoch": 0.706473175043315, + "grad_norm": 2.8115648476045862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145670 + }, + { + "epoch": 0.706521673236151, + "grad_norm": 3.196257836179939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145680 + }, + { + "epoch": 0.7065701714289871, + "grad_norm": 3.1770898800687064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145690 + }, + { + "epoch": 0.7066186696218232, + "grad_norm": 2.928057796225403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145700 + }, + { + "epoch": 0.7066671678146593, + "grad_norm": 2.5846935258755366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145710 + }, + { + "epoch": 0.7067156660074954, + "grad_norm": 2.8708186050607765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145720 + }, + { + "epoch": 0.7067641642003315, + "grad_norm": 3.4050028574483804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145730 + }, + { + "epoch": 0.7068126623931675, + "grad_norm": 3.0219339919312915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145740 + }, + { + "epoch": 0.7068611605860037, + "grad_norm": 2.8156255993394552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145750 + }, + { + "epoch": 0.7069096587788397, + "grad_norm": 2.86336216959171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145760 + }, + { + "epoch": 0.7069581569716759, + "grad_norm": 2.8383370320739232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145770 + }, + { + "epoch": 0.7070066551645119, + "grad_norm": 2.6092569882507632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145780 + }, + { + "epoch": 0.707055153357348, + "grad_norm": 2.940432075604349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145790 + }, + { + "epoch": 0.7071036515501841, + "grad_norm": 2.690449996123334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145800 + }, + { + "epoch": 0.7071521497430202, + "grad_norm": 2.796072173794073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145810 + }, + { + "epoch": 0.7072006479358562, + "grad_norm": 2.8994088907552396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145820 + }, + { + "epoch": 0.7072491461286924, + "grad_norm": 2.6414042508804414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145830 + }, + { + "epoch": 0.7072976443215284, + "grad_norm": 2.8284620867680133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145840 + }, + { + "epoch": 0.7073461425143646, + "grad_norm": 2.8947269470336323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145850 + }, + { + "epoch": 0.7073946407072006, + "grad_norm": 3.061769504597578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145860 + }, + { + "epoch": 0.7074431389000367, + "grad_norm": 2.7297000215753542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145870 + }, + { + "epoch": 0.7074916370928728, + "grad_norm": 2.6765409444351462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145880 + }, + { + "epoch": 0.7075401352857089, + "grad_norm": 5.660169222210243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145890 + }, + { + "epoch": 0.7075886334785451, + "grad_norm": 3.1332422878449506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145900 + }, + { + "epoch": 0.7076371316713811, + "grad_norm": 3.807330628546879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145910 + }, + { + "epoch": 0.7076856298642172, + "grad_norm": 2.6026835797665626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145920 + }, + { + "epoch": 0.7077341280570533, + "grad_norm": 3.1268747591184365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145930 + }, + { + "epoch": 0.7077826262498894, + "grad_norm": 2.9812383672833676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145940 + }, + { + "epoch": 0.7078311244427254, + "grad_norm": 3.031944473264048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145950 + }, + { + "epoch": 0.7078796226355616, + "grad_norm": 2.692313394447865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145960 + }, + { + "epoch": 0.7079281208283976, + "grad_norm": 2.5533729797189153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145970 + }, + { + "epoch": 0.7079766190212338, + "grad_norm": 2.5659367963726254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145980 + }, + { + "epoch": 0.7080251172140698, + "grad_norm": 2.8537582963394925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 145990 + }, + { + "epoch": 0.7080736154069059, + "grad_norm": 3.4385124081381946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146000 + }, + { + "epoch": 0.708122113599742, + "grad_norm": 2.825956890717407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146010 + }, + { + "epoch": 0.7081706117925781, + "grad_norm": 2.9629525499785814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146020 + }, + { + "epoch": 0.7082191099854142, + "grad_norm": 2.5502355782691666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146030 + }, + { + "epoch": 0.7082676081782503, + "grad_norm": 2.6880062620193712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146040 + }, + { + "epoch": 0.7083161063710863, + "grad_norm": 3.4373158541711746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146050 + }, + { + "epoch": 0.7083646045639225, + "grad_norm": 2.7707448779779043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146060 + }, + { + "epoch": 0.7084131027567585, + "grad_norm": 2.5719012697322796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146070 + }, + { + "epoch": 0.7084616009495946, + "grad_norm": 2.988839398199161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146080 + }, + { + "epoch": 0.7085100991424307, + "grad_norm": 2.7343398656398676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146090 + }, + { + "epoch": 0.7085585973352668, + "grad_norm": 2.6833559374495053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146100 + }, + { + "epoch": 0.7086070955281029, + "grad_norm": 2.786934771847882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146110 + }, + { + "epoch": 0.708655593720939, + "grad_norm": 2.4793678221612936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146120 + }, + { + "epoch": 0.708704091913775, + "grad_norm": 2.859979808533808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146130 + }, + { + "epoch": 0.7087525901066112, + "grad_norm": 3.0368710213224404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146140 + }, + { + "epoch": 0.7088010882994472, + "grad_norm": 3.058407926914697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146150 + }, + { + "epoch": 0.7088495864922834, + "grad_norm": 3.2006838068809884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146160 + }, + { + "epoch": 0.7088980846851194, + "grad_norm": 2.9104056054052307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146170 + }, + { + "epoch": 0.7089465828779555, + "grad_norm": 3.562701067494345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146180 + }, + { + "epoch": 0.7089950810707916, + "grad_norm": 2.8103086080477624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146190 + }, + { + "epoch": 0.7090435792636277, + "grad_norm": 2.8288633657780338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146200 + }, + { + "epoch": 0.7090920774564637, + "grad_norm": 2.842161350713468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146210 + }, + { + "epoch": 0.7091405756492999, + "grad_norm": 2.7963276139075788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146220 + }, + { + "epoch": 0.7091890738421359, + "grad_norm": 3.964617789620206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146230 + }, + { + "epoch": 0.7092375720349721, + "grad_norm": 2.9855776517706545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146240 + }, + { + "epoch": 0.7092860702278081, + "grad_norm": 3.2681125361477825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146250 + }, + { + "epoch": 0.7093345684206442, + "grad_norm": 2.6535026620422286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146260 + }, + { + "epoch": 0.7093830666134803, + "grad_norm": 2.8212559399776183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146270 + }, + { + "epoch": 0.7094315648063164, + "grad_norm": 2.5765540812017207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146280 + }, + { + "epoch": 0.7094800629991524, + "grad_norm": 3.733089570800985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146290 + }, + { + "epoch": 0.7095285611919886, + "grad_norm": 3.0555128205378423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146300 + }, + { + "epoch": 0.7095770593848246, + "grad_norm": 2.7610468578131986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146310 + }, + { + "epoch": 0.7096255575776608, + "grad_norm": 2.49658693718402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146320 + }, + { + "epoch": 0.7096740557704968, + "grad_norm": 2.865065518165011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146330 + }, + { + "epoch": 0.709722553963333, + "grad_norm": 2.908790897038216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146340 + }, + { + "epoch": 0.709771052156169, + "grad_norm": 3.108492663272955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146350 + }, + { + "epoch": 0.7098195503490051, + "grad_norm": 2.6239474593126033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146360 + }, + { + "epoch": 0.7098680485418412, + "grad_norm": 2.5716325069424784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146370 + }, + { + "epoch": 0.7099165467346773, + "grad_norm": 2.70408708757941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146380 + }, + { + "epoch": 0.7099650449275133, + "grad_norm": 3.1810994727266007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146390 + }, + { + "epoch": 0.7100135431203495, + "grad_norm": 2.6653470541759816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146400 + }, + { + "epoch": 0.7100620413131856, + "grad_norm": 3.5206351611805076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146410 + }, + { + "epoch": 0.7101105395060217, + "grad_norm": 2.9033088821961428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146420 + }, + { + "epoch": 0.7101590376988578, + "grad_norm": 2.5860597219207193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146430 + }, + { + "epoch": 0.7102075358916938, + "grad_norm": 2.8565535714619728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146440 + }, + { + "epoch": 0.71025603408453, + "grad_norm": 3.149010652236939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146450 + }, + { + "epoch": 0.710304532277366, + "grad_norm": 2.4735133052899982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146460 + }, + { + "epoch": 0.7103530304702022, + "grad_norm": 2.7497712551394216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146470 + }, + { + "epoch": 0.7104015286630382, + "grad_norm": 2.58103458605774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146480 + }, + { + "epoch": 0.7104500268558743, + "grad_norm": 3.01127585089489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146490 + }, + { + "epoch": 0.7104985250487104, + "grad_norm": 3.679277682522297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146500 + }, + { + "epoch": 0.7105470232415465, + "grad_norm": 2.5651610613408593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146510 + }, + { + "epoch": 0.7105955214343825, + "grad_norm": 2.9198185202972127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146520 + }, + { + "epoch": 0.7106440196272187, + "grad_norm": 2.5775740652989043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146530 + }, + { + "epoch": 0.7106925178200547, + "grad_norm": 3.2080787804034117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146540 + }, + { + "epoch": 0.7107410160128909, + "grad_norm": 3.160064210305791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146550 + }, + { + "epoch": 0.7107895142057269, + "grad_norm": 2.586431335771522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146560 + }, + { + "epoch": 0.710838012398563, + "grad_norm": 2.4063536940843733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146570 + }, + { + "epoch": 0.7108865105913991, + "grad_norm": 3.034259066225786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146580 + }, + { + "epoch": 0.7109350087842352, + "grad_norm": 2.9536987966594097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146590 + }, + { + "epoch": 0.7109835069770712, + "grad_norm": 2.6258129892653415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146600 + }, + { + "epoch": 0.7110320051699074, + "grad_norm": 2.649281150013394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146610 + }, + { + "epoch": 0.7110805033627434, + "grad_norm": 2.703645662904819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146620 + }, + { + "epoch": 0.7111290015555796, + "grad_norm": 2.653975705868561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146630 + }, + { + "epoch": 0.7111774997484156, + "grad_norm": 2.764652862197181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146640 + }, + { + "epoch": 0.7112259979412517, + "grad_norm": 3.038714879721738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146650 + }, + { + "epoch": 0.7112744961340878, + "grad_norm": 3.053363428762168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146660 + }, + { + "epoch": 0.7113229943269239, + "grad_norm": 2.7880922459644353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146670 + }, + { + "epoch": 0.71137149251976, + "grad_norm": 2.627209383376794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146680 + }, + { + "epoch": 0.7114199907125961, + "grad_norm": 2.7340442798617914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146690 + }, + { + "epoch": 0.7114684889054321, + "grad_norm": 3.160223371878601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146700 + }, + { + "epoch": 0.7115169870982683, + "grad_norm": 5.169293615381321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146710 + }, + { + "epoch": 0.7115654852911043, + "grad_norm": 2.556670430919894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146720 + }, + { + "epoch": 0.7116139834839404, + "grad_norm": 2.652953234871802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146730 + }, + { + "epoch": 0.7116624816767765, + "grad_norm": 3.259744474348736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146740 + }, + { + "epoch": 0.7117109798696126, + "grad_norm": 2.6345675863126417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146750 + }, + { + "epoch": 0.7117594780624487, + "grad_norm": 2.556779499229833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146760 + }, + { + "epoch": 0.7118079762552848, + "grad_norm": 2.420162914518187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146770 + }, + { + "epoch": 0.7118564744481208, + "grad_norm": 2.408745025661574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146780 + }, + { + "epoch": 0.711904972640957, + "grad_norm": 2.710821611628944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146790 + }, + { + "epoch": 0.711953470833793, + "grad_norm": 2.616594230175906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146800 + }, + { + "epoch": 0.7120019690266292, + "grad_norm": 2.7279249081857415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146810 + }, + { + "epoch": 0.7120504672194652, + "grad_norm": 3.2895680845967945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146820 + }, + { + "epoch": 0.7120989654123013, + "grad_norm": 3.335957288186364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146830 + }, + { + "epoch": 0.7121474636051374, + "grad_norm": 2.8588861056277892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146840 + }, + { + "epoch": 0.7121959617979735, + "grad_norm": 2.6845851763823703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146850 + }, + { + "epoch": 0.7122444599908095, + "grad_norm": 3.075138721442272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146860 + }, + { + "epoch": 0.7122929581836457, + "grad_norm": 2.5857215035784975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146870 + }, + { + "epoch": 0.7123414563764817, + "grad_norm": 2.5082044885493815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146880 + }, + { + "epoch": 0.7123899545693179, + "grad_norm": 3.2602876842702244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146890 + }, + { + "epoch": 0.7124384527621539, + "grad_norm": 2.540460286581947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146900 + }, + { + "epoch": 0.71248695095499, + "grad_norm": 2.346430250099729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146910 + }, + { + "epoch": 0.7125354491478262, + "grad_norm": 2.4586061186937513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146920 + }, + { + "epoch": 0.7125839473406622, + "grad_norm": 2.6071299785712654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146930 + }, + { + "epoch": 0.7126324455334984, + "grad_norm": 2.9987162974975945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146940 + }, + { + "epoch": 0.7126809437263344, + "grad_norm": 2.9358867337236916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146950 + }, + { + "epoch": 0.7127294419191705, + "grad_norm": 2.4432210921077058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146960 + }, + { + "epoch": 0.7127779401120066, + "grad_norm": 2.790554809450896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146970 + }, + { + "epoch": 0.7128264383048427, + "grad_norm": 2.5850351192957532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146980 + }, + { + "epoch": 0.7128749364976787, + "grad_norm": 2.8793017747830163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 146990 + }, + { + "epoch": 0.7129234346905149, + "grad_norm": 2.8463389867283695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147000 + }, + { + "epoch": 0.7129719328833509, + "grad_norm": 3.122762137763857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147010 + }, + { + "epoch": 0.7130204310761871, + "grad_norm": 2.5076950294078415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147020 + }, + { + "epoch": 0.7130689292690231, + "grad_norm": 2.2331780868967144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147030 + }, + { + "epoch": 0.7131174274618592, + "grad_norm": 2.7618179743171822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147040 + }, + { + "epoch": 0.7131659256546953, + "grad_norm": 2.897499662424252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147050 + }, + { + "epoch": 0.7132144238475314, + "grad_norm": 2.4942048426623842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147060 + }, + { + "epoch": 0.7132629220403675, + "grad_norm": 2.481887406702299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147070 + }, + { + "epoch": 0.7133114202332036, + "grad_norm": 3.882997771142982e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147080 + }, + { + "epoch": 0.7133599184260396, + "grad_norm": 3.0741212242446636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147090 + }, + { + "epoch": 0.7134084166188758, + "grad_norm": 3.538448467566013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147100 + }, + { + "epoch": 0.7134569148117118, + "grad_norm": 3.813556403997609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147110 + }, + { + "epoch": 0.713505413004548, + "grad_norm": 3.5041672674651636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147120 + }, + { + "epoch": 0.713553911197384, + "grad_norm": 2.888037542447819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147130 + }, + { + "epoch": 0.7136024093902201, + "grad_norm": 3.0294273756226175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147140 + }, + { + "epoch": 0.7136509075830562, + "grad_norm": 2.766317308555699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147150 + }, + { + "epoch": 0.7136994057758923, + "grad_norm": 2.984694091878737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147160 + }, + { + "epoch": 0.7137479039687283, + "grad_norm": 2.806820909029284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147170 + }, + { + "epoch": 0.7137964021615645, + "grad_norm": 2.4982998780842536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147180 + }, + { + "epoch": 0.7138449003544005, + "grad_norm": 2.994918091303589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147190 + }, + { + "epoch": 0.7138933985472367, + "grad_norm": 2.765281337246961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147200 + }, + { + "epoch": 0.7139418967400727, + "grad_norm": 2.792950937191563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147210 + }, + { + "epoch": 0.7139903949329088, + "grad_norm": 2.663986897744053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147220 + }, + { + "epoch": 0.7140388931257449, + "grad_norm": 2.6430894806139804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147230 + }, + { + "epoch": 0.714087391318581, + "grad_norm": 2.6351376192224052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147240 + }, + { + "epoch": 0.714135889511417, + "grad_norm": 2.6557067656085565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147250 + }, + { + "epoch": 0.7141843877042532, + "grad_norm": 2.4553896693646493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147260 + }, + { + "epoch": 0.7142328858970892, + "grad_norm": 2.578374846962106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147270 + }, + { + "epoch": 0.7142813840899254, + "grad_norm": 2.727787062895004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147280 + }, + { + "epoch": 0.7143298822827614, + "grad_norm": 2.81349645803175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147290 + }, + { + "epoch": 0.7143783804755975, + "grad_norm": 2.751472649720199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147300 + }, + { + "epoch": 0.7144268786684336, + "grad_norm": 2.437077206707272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147310 + }, + { + "epoch": 0.7144753768612697, + "grad_norm": 2.4644380758331863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147320 + }, + { + "epoch": 0.7145238750541058, + "grad_norm": 2.7613412001414872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147330 + }, + { + "epoch": 0.7145723732469419, + "grad_norm": 2.8676996777221575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147340 + }, + { + "epoch": 0.7146208714397779, + "grad_norm": 2.8007836050392143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147350 + }, + { + "epoch": 0.7146693696326141, + "grad_norm": 2.2828803736274494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147360 + }, + { + "epoch": 0.7147178678254501, + "grad_norm": 2.789553299464842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147370 + }, + { + "epoch": 0.7147663660182862, + "grad_norm": 2.71336872970096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147380 + }, + { + "epoch": 0.7148148642111223, + "grad_norm": 3.399818382376907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147390 + }, + { + "epoch": 0.7148633624039584, + "grad_norm": 2.591457359812921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147400 + }, + { + "epoch": 0.7149118605967945, + "grad_norm": 2.7920245670998156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147410 + }, + { + "epoch": 0.7149603587896306, + "grad_norm": 2.9334646711731693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147420 + }, + { + "epoch": 0.7150088569824667, + "grad_norm": 2.4560344868973516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147430 + }, + { + "epoch": 0.7150573551753028, + "grad_norm": 2.4197545300808088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147440 + }, + { + "epoch": 0.7151058533681389, + "grad_norm": 3.030877593346304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147450 + }, + { + "epoch": 0.715154351560975, + "grad_norm": 2.434835089104581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147460 + }, + { + "epoch": 0.7152028497538111, + "grad_norm": 2.960157452491785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147470 + }, + { + "epoch": 0.7152513479466471, + "grad_norm": 2.3634505907921266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147480 + }, + { + "epoch": 0.7152998461394833, + "grad_norm": 2.6272827469142612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147490 + }, + { + "epoch": 0.7153483443323193, + "grad_norm": 2.4634504214304798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147500 + }, + { + "epoch": 0.7153968425251555, + "grad_norm": 2.4330484293955124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147510 + }, + { + "epoch": 0.7154453407179915, + "grad_norm": 3.0605743717160294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147520 + }, + { + "epoch": 0.7154938389108276, + "grad_norm": 3.1538334610559104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147530 + }, + { + "epoch": 0.7155423371036637, + "grad_norm": 2.6693982135839178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147540 + }, + { + "epoch": 0.7155908352964998, + "grad_norm": 2.5477001841522906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147550 + }, + { + "epoch": 0.7156393334893358, + "grad_norm": 2.4264160458642436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147560 + }, + { + "epoch": 0.715687831682172, + "grad_norm": 2.8199874435586025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147570 + }, + { + "epoch": 0.715736329875008, + "grad_norm": 2.8220998871120173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147580 + }, + { + "epoch": 0.7157848280678442, + "grad_norm": 2.957971823036587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147590 + }, + { + "epoch": 0.7158333262606802, + "grad_norm": 2.5539495851489846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147600 + }, + { + "epoch": 0.7158818244535163, + "grad_norm": 2.3386682812542858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147610 + }, + { + "epoch": 0.7159303226463524, + "grad_norm": 2.3223964973340117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147620 + }, + { + "epoch": 0.7159788208391885, + "grad_norm": 2.268084209333665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147630 + }, + { + "epoch": 0.7160273190320245, + "grad_norm": 3.1663326183206664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147640 + }, + { + "epoch": 0.7160758172248607, + "grad_norm": 3.143645344039214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147650 + }, + { + "epoch": 0.7161243154176967, + "grad_norm": 2.4479998472770603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147660 + }, + { + "epoch": 0.7161728136105329, + "grad_norm": 2.441188229340696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147670 + }, + { + "epoch": 0.7162213118033689, + "grad_norm": 2.4007157151118008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147680 + }, + { + "epoch": 0.716269809996205, + "grad_norm": 2.7109591016483137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147690 + }, + { + "epoch": 0.7163183081890411, + "grad_norm": 2.308638258341489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147700 + }, + { + "epoch": 0.7163668063818772, + "grad_norm": 2.3494147072256055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147710 + }, + { + "epoch": 0.7164153045747133, + "grad_norm": 2.6579758838352063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147720 + }, + { + "epoch": 0.7164638027675494, + "grad_norm": 3.7888089110538203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147730 + }, + { + "epoch": 0.7165123009603854, + "grad_norm": 2.484762617882552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147740 + }, + { + "epoch": 0.7165607991532216, + "grad_norm": 2.9106972831982603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147750 + }, + { + "epoch": 0.7166092973460576, + "grad_norm": 2.6441504985541542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147760 + }, + { + "epoch": 0.7166577955388937, + "grad_norm": 2.4391871633611117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147770 + }, + { + "epoch": 0.7167062937317298, + "grad_norm": 2.1714024356356276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147780 + }, + { + "epoch": 0.7167547919245659, + "grad_norm": 2.556898870409441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147790 + }, + { + "epoch": 0.716803290117402, + "grad_norm": 2.464015480541093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147800 + }, + { + "epoch": 0.7168517883102381, + "grad_norm": 2.482876482190477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147810 + }, + { + "epoch": 0.7169002865030741, + "grad_norm": 2.354193640030644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147820 + }, + { + "epoch": 0.7169487846959103, + "grad_norm": 2.4611182425360312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147830 + }, + { + "epoch": 0.7169972828887463, + "grad_norm": 2.5425698879644187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147840 + }, + { + "epoch": 0.7170457810815825, + "grad_norm": 2.8215067615633416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147850 + }, + { + "epoch": 0.7170942792744185, + "grad_norm": 2.6717428269762422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147860 + }, + { + "epoch": 0.7171427774672546, + "grad_norm": 2.345678318249611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147870 + }, + { + "epoch": 0.7171912756600907, + "grad_norm": 2.1369944036564448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147880 + }, + { + "epoch": 0.7172397738529268, + "grad_norm": 2.8138854801795787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147890 + }, + { + "epoch": 0.7172882720457628, + "grad_norm": 2.4973886070256412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147900 + }, + { + "epoch": 0.717336770238599, + "grad_norm": 3.954488647650578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147910 + }, + { + "epoch": 0.717385268431435, + "grad_norm": 2.5461606156795824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147920 + }, + { + "epoch": 0.7174337666242712, + "grad_norm": 2.4023371736348054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147930 + }, + { + "epoch": 0.7174822648171073, + "grad_norm": 2.894996953273221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147940 + }, + { + "epoch": 0.7175307630099433, + "grad_norm": 2.9874438922661284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147950 + }, + { + "epoch": 0.7175792612027795, + "grad_norm": 2.7417485171099543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147960 + }, + { + "epoch": 0.7176277593956155, + "grad_norm": 2.614994976113394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147970 + }, + { + "epoch": 0.7176762575884517, + "grad_norm": 2.8357620251995286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147980 + }, + { + "epoch": 0.7177247557812877, + "grad_norm": 2.586208935895229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 147990 + }, + { + "epoch": 0.7177732539741238, + "grad_norm": 2.7993888096489172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148000 + }, + { + "epoch": 0.7178217521669599, + "grad_norm": 2.3847638530583026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148010 + }, + { + "epoch": 0.717870250359796, + "grad_norm": 3.103428269923825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148020 + }, + { + "epoch": 0.717918748552632, + "grad_norm": 2.751624350594284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148030 + }, + { + "epoch": 0.7179672467454682, + "grad_norm": 2.4815946630951657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148040 + }, + { + "epoch": 0.7180157449383042, + "grad_norm": 2.5335259223879802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148050 + }, + { + "epoch": 0.7180642431311404, + "grad_norm": 4.411885257127324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148060 + }, + { + "epoch": 0.7181127413239764, + "grad_norm": 2.5442238538175843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148070 + }, + { + "epoch": 0.7181612395168125, + "grad_norm": 2.1577035269615408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148080 + }, + { + "epoch": 0.7182097377096486, + "grad_norm": 2.5526993852054147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148090 + }, + { + "epoch": 0.7182582359024847, + "grad_norm": 2.661700371220377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148100 + }, + { + "epoch": 0.7183067340953208, + "grad_norm": 2.6219201032517958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148110 + }, + { + "epoch": 0.7183552322881569, + "grad_norm": 2.5696529348806507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148120 + }, + { + "epoch": 0.7184037304809929, + "grad_norm": 2.0784302279253097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148130 + }, + { + "epoch": 0.7184522286738291, + "grad_norm": 2.5260741054466962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148140 + }, + { + "epoch": 0.7185007268666651, + "grad_norm": 2.542338428668245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148150 + }, + { + "epoch": 0.7185492250595013, + "grad_norm": 2.3674425975173108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148160 + }, + { + "epoch": 0.7185977232523373, + "grad_norm": 2.1314658482651794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148170 + }, + { + "epoch": 0.7186462214451734, + "grad_norm": 2.3252438197118863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148180 + }, + { + "epoch": 0.7186947196380095, + "grad_norm": 2.2566590374140105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148190 + }, + { + "epoch": 0.7187432178308456, + "grad_norm": 2.803853149657698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148200 + }, + { + "epoch": 0.7187917160236816, + "grad_norm": 0.0001521449303254485, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 148210 + }, + { + "epoch": 0.7188402142165178, + "grad_norm": 0.00018073069804813713, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 148220 + }, + { + "epoch": 0.7188887124093538, + "grad_norm": 0.0001910828286781907, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 148230 + }, + { + "epoch": 0.71893721060219, + "grad_norm": 0.00014015134365763515, + "learning_rate": 0.0002, + "loss": 0.0024, + "step": 148240 + }, + { + "epoch": 0.718985708795026, + "grad_norm": 0.0005457496736198664, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 148250 + }, + { + "epoch": 0.7190342069878621, + "grad_norm": 0.006285892333835363, + "learning_rate": 0.0002, + "loss": 0.0323, + "step": 148260 + }, + { + "epoch": 0.7190827051806982, + "grad_norm": 0.0006454074755311012, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 148270 + }, + { + "epoch": 0.7191312033735343, + "grad_norm": 9.537675214232877e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148280 + }, + { + "epoch": 0.7191797015663703, + "grad_norm": 0.00011866368731716648, + "learning_rate": 0.0002, + "loss": 0.0025, + "step": 148290 + }, + { + "epoch": 0.7192281997592065, + "grad_norm": 0.09117765724658966, + "learning_rate": 0.0002, + "loss": 0.0078, + "step": 148300 + }, + { + "epoch": 0.7192766979520425, + "grad_norm": 0.011916808784008026, + "learning_rate": 0.0002, + "loss": 0.0319, + "step": 148310 + }, + { + "epoch": 0.7193251961448787, + "grad_norm": 0.000555830541998148, + "learning_rate": 0.0002, + "loss": 0.0107, + "step": 148320 + }, + { + "epoch": 0.7193736943377147, + "grad_norm": 0.0002213216503150761, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 148330 + }, + { + "epoch": 0.7194221925305508, + "grad_norm": 0.00045079539995640516, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 148340 + }, + { + "epoch": 0.7194706907233869, + "grad_norm": 0.00010114758333656937, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148350 + }, + { + "epoch": 0.719519188916223, + "grad_norm": 7.535074837505817e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148360 + }, + { + "epoch": 0.719567687109059, + "grad_norm": 6.247402779990807e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148370 + }, + { + "epoch": 0.7196161853018952, + "grad_norm": 0.0002466959413141012, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148380 + }, + { + "epoch": 0.7196646834947312, + "grad_norm": 4.330013689468615e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148390 + }, + { + "epoch": 0.7197131816875674, + "grad_norm": 0.00021266352268867195, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148400 + }, + { + "epoch": 0.7197616798804034, + "grad_norm": 3.403921073186211e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148410 + }, + { + "epoch": 0.7198101780732395, + "grad_norm": 3.207211193512194e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148420 + }, + { + "epoch": 0.7198586762660756, + "grad_norm": 5.989171768305823e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 148430 + }, + { + "epoch": 0.7199071744589117, + "grad_norm": 4.0454477129969746e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148440 + }, + { + "epoch": 0.7199556726517479, + "grad_norm": 6.767734157619998e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148450 + }, + { + "epoch": 0.7200041708445839, + "grad_norm": 4.585882925312035e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 148460 + }, + { + "epoch": 0.72005266903742, + "grad_norm": 4.253726001479663e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148470 + }, + { + "epoch": 0.7201011672302561, + "grad_norm": 3.87316285923589e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148480 + }, + { + "epoch": 0.7201496654230922, + "grad_norm": 2.459555798850488e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148490 + }, + { + "epoch": 0.7201981636159283, + "grad_norm": 2.8188022042741068e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148500 + }, + { + "epoch": 0.7202466618087644, + "grad_norm": 7.637072849320248e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148510 + }, + { + "epoch": 0.7202951600016004, + "grad_norm": 2.5944340450223535e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148520 + }, + { + "epoch": 0.7203436581944366, + "grad_norm": 2.7656387828756124e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148530 + }, + { + "epoch": 0.7203921563872726, + "grad_norm": 4.534927575150505e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148540 + }, + { + "epoch": 0.7204406545801088, + "grad_norm": 2.005580608965829e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148550 + }, + { + "epoch": 0.7204891527729448, + "grad_norm": 2.2969976271269843e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148560 + }, + { + "epoch": 0.7205376509657809, + "grad_norm": 1.9745986719499342e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148570 + }, + { + "epoch": 0.720586149158617, + "grad_norm": 1.5534653357462958e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148580 + }, + { + "epoch": 0.7206346473514531, + "grad_norm": 3.25580986100249e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148590 + }, + { + "epoch": 0.7206831455442891, + "grad_norm": 1.2178681572549976e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148600 + }, + { + "epoch": 0.7207316437371253, + "grad_norm": 1.4136750905890949e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148610 + }, + { + "epoch": 0.7207801419299613, + "grad_norm": 1.3355589544516988e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148620 + }, + { + "epoch": 0.7208286401227975, + "grad_norm": 1.2426860848790966e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148630 + }, + { + "epoch": 0.7208771383156335, + "grad_norm": 1.0361151908000465e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148640 + }, + { + "epoch": 0.7209256365084696, + "grad_norm": 1.2205219718453009e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148650 + }, + { + "epoch": 0.7209741347013057, + "grad_norm": 1.2711981071333867e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148660 + }, + { + "epoch": 0.7210226328941418, + "grad_norm": 1.1414638720452785e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148670 + }, + { + "epoch": 0.7210711310869778, + "grad_norm": 1.0730919711932074e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148680 + }, + { + "epoch": 0.721119629279814, + "grad_norm": 1.1851997442136053e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148690 + }, + { + "epoch": 0.72116812747265, + "grad_norm": 9.709050573292188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148700 + }, + { + "epoch": 0.7212166256654862, + "grad_norm": 1.0310928701073863e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148710 + }, + { + "epoch": 0.7212651238583222, + "grad_norm": 1.1173692655574996e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148720 + }, + { + "epoch": 0.7213136220511583, + "grad_norm": 1.103966042137472e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148730 + }, + { + "epoch": 0.7213621202439944, + "grad_norm": 8.816476110951044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148740 + }, + { + "epoch": 0.7214106184368305, + "grad_norm": 8.804173376120161e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148750 + }, + { + "epoch": 0.7214591166296666, + "grad_norm": 8.61239095684141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148760 + }, + { + "epoch": 0.7215076148225027, + "grad_norm": 1.0545632903813384e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148770 + }, + { + "epoch": 0.7215561130153387, + "grad_norm": 1.4849424587737303e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148780 + }, + { + "epoch": 0.7216046112081749, + "grad_norm": 8.506377525918651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148790 + }, + { + "epoch": 0.7216531094010109, + "grad_norm": 8.17462114355294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148800 + }, + { + "epoch": 0.721701607593847, + "grad_norm": 7.940251634863671e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148810 + }, + { + "epoch": 0.7217501057866831, + "grad_norm": 7.529526556027122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148820 + }, + { + "epoch": 0.7217986039795192, + "grad_norm": 7.194481895567151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148830 + }, + { + "epoch": 0.7218471021723553, + "grad_norm": 1.6748414054745808e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148840 + }, + { + "epoch": 0.7218956003651914, + "grad_norm": 1.3749365280091297e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148850 + }, + { + "epoch": 0.7219440985580274, + "grad_norm": 1.0120617844222579e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148860 + }, + { + "epoch": 0.7219925967508636, + "grad_norm": 6.4260666476911865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148870 + }, + { + "epoch": 0.7220410949436996, + "grad_norm": 6.05889044891228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148880 + }, + { + "epoch": 0.7220895931365358, + "grad_norm": 1.1265738066867925e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148890 + }, + { + "epoch": 0.7221380913293718, + "grad_norm": 5.64529727853369e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148900 + }, + { + "epoch": 0.7221865895222079, + "grad_norm": 5.7755910347623285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148910 + }, + { + "epoch": 0.722235087715044, + "grad_norm": 5.506331490323646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148920 + }, + { + "epoch": 0.7222835859078801, + "grad_norm": 5.811112259834772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148930 + }, + { + "epoch": 0.7223320841007161, + "grad_norm": 1.119360422308091e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148940 + }, + { + "epoch": 0.7223805822935523, + "grad_norm": 5.343080374586862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148950 + }, + { + "epoch": 0.7224290804863884, + "grad_norm": 5.004038030165248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148960 + }, + { + "epoch": 0.7224775786792245, + "grad_norm": 4.920504579786211e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148970 + }, + { + "epoch": 0.7225260768720606, + "grad_norm": 5.402137048804434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148980 + }, + { + "epoch": 0.7225745750648966, + "grad_norm": 5.100479029351845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 148990 + }, + { + "epoch": 0.7226230732577328, + "grad_norm": 1.2353333659120835e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149000 + }, + { + "epoch": 0.7226715714505688, + "grad_norm": 4.703933427663287e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149010 + }, + { + "epoch": 0.722720069643405, + "grad_norm": 5.542746293940581e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149020 + }, + { + "epoch": 0.722768567836241, + "grad_norm": 7.157752406783402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149030 + }, + { + "epoch": 0.7228170660290771, + "grad_norm": 4.959937086823629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149040 + }, + { + "epoch": 0.7228655642219132, + "grad_norm": 4.931262992613483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149050 + }, + { + "epoch": 0.7229140624147493, + "grad_norm": 5.878258889424615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149060 + }, + { + "epoch": 0.7229625606075853, + "grad_norm": 4.386112323118141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149070 + }, + { + "epoch": 0.7230110588004215, + "grad_norm": 1.5595245713484474e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149080 + }, + { + "epoch": 0.7230595569932575, + "grad_norm": 3.7690583667426836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149090 + }, + { + "epoch": 0.7231080551860937, + "grad_norm": 4.16778311773669e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149100 + }, + { + "epoch": 0.7231565533789297, + "grad_norm": 3.7797738059452968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149110 + }, + { + "epoch": 0.7232050515717658, + "grad_norm": 3.964436018577544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149120 + }, + { + "epoch": 0.7232535497646019, + "grad_norm": 3.618936716520693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149130 + }, + { + "epoch": 0.723302047957438, + "grad_norm": 4.128901764488546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149140 + }, + { + "epoch": 0.723350546150274, + "grad_norm": 7.090820417943178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149150 + }, + { + "epoch": 0.7233990443431102, + "grad_norm": 4.011750661447877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149160 + }, + { + "epoch": 0.7234475425359462, + "grad_norm": 3.5841535463987384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149170 + }, + { + "epoch": 0.7234960407287824, + "grad_norm": 3.666422117021284e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149180 + }, + { + "epoch": 0.7235445389216184, + "grad_norm": 4.3188765630475245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149190 + }, + { + "epoch": 0.7235930371144546, + "grad_norm": 3.21030006489309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149200 + }, + { + "epoch": 0.7236415353072906, + "grad_norm": 3.56718464900041e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149210 + }, + { + "epoch": 0.7236900335001267, + "grad_norm": 3.5806940559268696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149220 + }, + { + "epoch": 0.7237385316929628, + "grad_norm": 3.295213218734716e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149230 + }, + { + "epoch": 0.7237870298857989, + "grad_norm": 3.6887706755805993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149240 + }, + { + "epoch": 0.7238355280786349, + "grad_norm": 7.791964890202507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149250 + }, + { + "epoch": 0.7238840262714711, + "grad_norm": 4.950005404680269e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149260 + }, + { + "epoch": 0.7239325244643071, + "grad_norm": 3.308606892460375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149270 + }, + { + "epoch": 0.7239810226571433, + "grad_norm": 3.0406088171730516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149280 + }, + { + "epoch": 0.7240295208499793, + "grad_norm": 3.057021103813895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149290 + }, + { + "epoch": 0.7240780190428154, + "grad_norm": 3.944820491597056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149300 + }, + { + "epoch": 0.7241265172356515, + "grad_norm": 3.2088983061839826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149310 + }, + { + "epoch": 0.7241750154284876, + "grad_norm": 3.524761496009887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149320 + }, + { + "epoch": 0.7242235136213236, + "grad_norm": 2.9265370358189102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149330 + }, + { + "epoch": 0.7242720118141598, + "grad_norm": 2.9357104267546674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149340 + }, + { + "epoch": 0.7243205100069958, + "grad_norm": 9.805882655200548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149350 + }, + { + "epoch": 0.724369008199832, + "grad_norm": 5.055540896137245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149360 + }, + { + "epoch": 0.724417506392668, + "grad_norm": 2.848136546163005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149370 + }, + { + "epoch": 0.7244660045855041, + "grad_norm": 2.4125240543071413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149380 + }, + { + "epoch": 0.7245145027783402, + "grad_norm": 2.3348013655777322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149390 + }, + { + "epoch": 0.7245630009711763, + "grad_norm": 2.3712761958449846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149400 + }, + { + "epoch": 0.7246114991640124, + "grad_norm": 2.4080195544229355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149410 + }, + { + "epoch": 0.7246599973568485, + "grad_norm": 3.895245299645467e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149420 + }, + { + "epoch": 0.7247084955496845, + "grad_norm": 2.703421614569379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149430 + }, + { + "epoch": 0.7247569937425207, + "grad_norm": 2.4058249437075574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149440 + }, + { + "epoch": 0.7248054919353567, + "grad_norm": 2.1509140424313955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149450 + }, + { + "epoch": 0.7248539901281928, + "grad_norm": 2.232648057542974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149460 + }, + { + "epoch": 0.724902488321029, + "grad_norm": 2.6679117581807077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149470 + }, + { + "epoch": 0.724950986513865, + "grad_norm": 2.2893568711879198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149480 + }, + { + "epoch": 0.7249994847067012, + "grad_norm": 2.4817095436446834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149490 + }, + { + "epoch": 0.7250479828995372, + "grad_norm": 2.3122267975850264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149500 + }, + { + "epoch": 0.7250964810923733, + "grad_norm": 2.4637729438836686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149510 + }, + { + "epoch": 0.7251449792852094, + "grad_norm": 2.213573907283717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149520 + }, + { + "epoch": 0.7251934774780455, + "grad_norm": 7.425534931826405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149530 + }, + { + "epoch": 0.7252419756708816, + "grad_norm": 6.19962429482257e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 149540 + }, + { + "epoch": 0.7252904738637177, + "grad_norm": 1.8752876712824218e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149550 + }, + { + "epoch": 0.7253389720565537, + "grad_norm": 0.006972038187086582, + "learning_rate": 0.0002, + "loss": 0.0163, + "step": 149560 + }, + { + "epoch": 0.7253874702493899, + "grad_norm": 0.015604502521455288, + "learning_rate": 0.0002, + "loss": 0.0029, + "step": 149570 + }, + { + "epoch": 0.7254359684422259, + "grad_norm": 0.0002159166324418038, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 149580 + }, + { + "epoch": 0.725484466635062, + "grad_norm": 7.393150008283556e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149590 + }, + { + "epoch": 0.7255329648278981, + "grad_norm": 5.363257878343575e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 149600 + }, + { + "epoch": 0.7255814630207342, + "grad_norm": 0.0011937104864045978, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 149610 + }, + { + "epoch": 0.7256299612135703, + "grad_norm": 0.000439577444922179, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 149620 + }, + { + "epoch": 0.7256784594064064, + "grad_norm": 0.00010391407704446465, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 149630 + }, + { + "epoch": 0.7257269575992424, + "grad_norm": 0.0007426846423186362, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149640 + }, + { + "epoch": 0.7257754557920786, + "grad_norm": 0.0002556298568379134, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149650 + }, + { + "epoch": 0.7258239539849146, + "grad_norm": 6.929483060957864e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149660 + }, + { + "epoch": 0.7258724521777508, + "grad_norm": 6.152877176646143e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149670 + }, + { + "epoch": 0.7259209503705868, + "grad_norm": 6.388551992131397e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149680 + }, + { + "epoch": 0.7259694485634229, + "grad_norm": 4.9958536692429334e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149690 + }, + { + "epoch": 0.726017946756259, + "grad_norm": 4.557603824650869e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149700 + }, + { + "epoch": 0.7260664449490951, + "grad_norm": 4.782436371897347e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149710 + }, + { + "epoch": 0.7261149431419311, + "grad_norm": 4.7990241000661626e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149720 + }, + { + "epoch": 0.7261634413347673, + "grad_norm": 4.169118619756773e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149730 + }, + { + "epoch": 0.7262119395276033, + "grad_norm": 3.662965900730342e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149740 + }, + { + "epoch": 0.7262604377204395, + "grad_norm": 3.376854510861449e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149750 + }, + { + "epoch": 0.7263089359132755, + "grad_norm": 3.648785059340298e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149760 + }, + { + "epoch": 0.7263574341061116, + "grad_norm": 3.4577722544781864e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149770 + }, + { + "epoch": 0.7264059322989477, + "grad_norm": 3.5184893931727856e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149780 + }, + { + "epoch": 0.7264544304917838, + "grad_norm": 2.788615893223323e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149790 + }, + { + "epoch": 0.7265029286846199, + "grad_norm": 2.7278607376501895e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149800 + }, + { + "epoch": 0.726551426877456, + "grad_norm": 2.8974933229619637e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149810 + }, + { + "epoch": 0.726599925070292, + "grad_norm": 3.550813198671676e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149820 + }, + { + "epoch": 0.7266484232631282, + "grad_norm": 2.7670859708450735e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149830 + }, + { + "epoch": 0.7266969214559642, + "grad_norm": 2.360181497351732e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149840 + }, + { + "epoch": 0.7267454196488004, + "grad_norm": 2.2778871425543912e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149850 + }, + { + "epoch": 0.7267939178416364, + "grad_norm": 2.404343285888899e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149860 + }, + { + "epoch": 0.7268424160344725, + "grad_norm": 2.381739795964677e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149870 + }, + { + "epoch": 0.7268909142273086, + "grad_norm": 2.7035530365537852e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149880 + }, + { + "epoch": 0.7269394124201447, + "grad_norm": 1.9215813154005446e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149890 + }, + { + "epoch": 0.7269879106129807, + "grad_norm": 1.903821794257965e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149900 + }, + { + "epoch": 0.7270364088058169, + "grad_norm": 2.6788708055391908e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149910 + }, + { + "epoch": 0.7270849069986529, + "grad_norm": 1.9992563466075808e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149920 + }, + { + "epoch": 0.7271334051914891, + "grad_norm": 6.068043512641452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149930 + }, + { + "epoch": 0.7271819033843251, + "grad_norm": 1.746754060150124e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149940 + }, + { + "epoch": 0.7272304015771612, + "grad_norm": 1.647465251153335e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149950 + }, + { + "epoch": 0.7272788997699973, + "grad_norm": 1.7067271983250976e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149960 + }, + { + "epoch": 0.7273273979628334, + "grad_norm": 1.723696186672896e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149970 + }, + { + "epoch": 0.7273758961556696, + "grad_norm": 1.7208636563736945e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149980 + }, + { + "epoch": 0.7274243943485056, + "grad_norm": 1.4683444533147849e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 149990 + }, + { + "epoch": 0.7274728925413417, + "grad_norm": 1.467393940401962e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150000 + }, + { + "epoch": 0.7275213907341778, + "grad_norm": 1.972599420696497e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150010 + }, + { + "epoch": 0.7275698889270139, + "grad_norm": 1.607071862963494e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150020 + }, + { + "epoch": 0.7276183871198499, + "grad_norm": 1.4441920939134434e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150030 + }, + { + "epoch": 0.7276668853126861, + "grad_norm": 1.282164976146305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150040 + }, + { + "epoch": 0.7277153835055221, + "grad_norm": 1.2711887393379584e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150050 + }, + { + "epoch": 0.7277638816983583, + "grad_norm": 1.4380858374352101e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150060 + }, + { + "epoch": 0.7278123798911943, + "grad_norm": 1.5289648217731155e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150070 + }, + { + "epoch": 0.7278608780840304, + "grad_norm": 1.7207528799190186e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150080 + }, + { + "epoch": 0.7279093762768665, + "grad_norm": 1.1095641639258247e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150090 + }, + { + "epoch": 0.7279578744697026, + "grad_norm": 1.143044119089609e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150100 + }, + { + "epoch": 0.7280063726625386, + "grad_norm": 1.257761050510453e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150110 + }, + { + "epoch": 0.7280548708553748, + "grad_norm": 1.2363777386781294e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150120 + }, + { + "epoch": 0.7281033690482108, + "grad_norm": 1.249438264494529e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150130 + }, + { + "epoch": 0.728151867241047, + "grad_norm": 1.0394485798315145e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150140 + }, + { + "epoch": 0.728200365433883, + "grad_norm": 1.0492475666978862e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150150 + }, + { + "epoch": 0.7282488636267191, + "grad_norm": 1.1721930604835507e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150160 + }, + { + "epoch": 0.7282973618195552, + "grad_norm": 1.2088752555428073e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150170 + }, + { + "epoch": 0.7283458600123913, + "grad_norm": 1.0714094969443977e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150180 + }, + { + "epoch": 0.7283943582052274, + "grad_norm": 9.255770237359684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150190 + }, + { + "epoch": 0.7284428563980635, + "grad_norm": 9.026581210491713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150200 + }, + { + "epoch": 0.7284913545908995, + "grad_norm": 1.048366630129749e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150210 + }, + { + "epoch": 0.7285398527837357, + "grad_norm": 1.0696402569010388e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150220 + }, + { + "epoch": 0.7285883509765717, + "grad_norm": 1.0367322829551995e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150230 + }, + { + "epoch": 0.7286368491694079, + "grad_norm": 8.414800504397135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150240 + }, + { + "epoch": 0.7286853473622439, + "grad_norm": 8.118395271594636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150250 + }, + { + "epoch": 0.72873384555508, + "grad_norm": 1.0029710210801568e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150260 + }, + { + "epoch": 0.7287823437479161, + "grad_norm": 9.307182153861504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150270 + }, + { + "epoch": 0.7288308419407522, + "grad_norm": 9.03613181435503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150280 + }, + { + "epoch": 0.7288793401335882, + "grad_norm": 7.69499547459418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150290 + }, + { + "epoch": 0.7289278383264244, + "grad_norm": 7.566890417365357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150300 + }, + { + "epoch": 0.7289763365192604, + "grad_norm": 8.975446689873934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150310 + }, + { + "epoch": 0.7290248347120966, + "grad_norm": 1.5027054359961767e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150320 + }, + { + "epoch": 0.7290733329049326, + "grad_norm": 8.1995831351378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150330 + }, + { + "epoch": 0.7291218310977687, + "grad_norm": 7.034687769191805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150340 + }, + { + "epoch": 0.7291703292906048, + "grad_norm": 7.07525759935379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150350 + }, + { + "epoch": 0.7292188274834409, + "grad_norm": 8.050433280004654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150360 + }, + { + "epoch": 0.729267325676277, + "grad_norm": 7.763929716020357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150370 + }, + { + "epoch": 0.7293158238691131, + "grad_norm": 7.699542038608342e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150380 + }, + { + "epoch": 0.7293643220619491, + "grad_norm": 6.635899808316026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150390 + }, + { + "epoch": 0.7294128202547853, + "grad_norm": 6.615692200284684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150400 + }, + { + "epoch": 0.7294613184476213, + "grad_norm": 7.506876045226818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150410 + }, + { + "epoch": 0.7295098166404574, + "grad_norm": 7.206662303360645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150420 + }, + { + "epoch": 0.7295583148332935, + "grad_norm": 7.403782547044102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150430 + }, + { + "epoch": 0.7296068130261296, + "grad_norm": 5.861865247425158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150440 + }, + { + "epoch": 0.7296553112189657, + "grad_norm": 6.0990646488789935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150450 + }, + { + "epoch": 0.7297038094118018, + "grad_norm": 7.092365194694139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150460 + }, + { + "epoch": 0.7297523076046378, + "grad_norm": 7.261925929924473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150470 + }, + { + "epoch": 0.729800805797474, + "grad_norm": 6.751190994691569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150480 + }, + { + "epoch": 0.7298493039903101, + "grad_norm": 6.092068360885605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150490 + }, + { + "epoch": 0.7298978021831461, + "grad_norm": 5.5456735026382376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150500 + }, + { + "epoch": 0.7299463003759823, + "grad_norm": 6.482011940533994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150510 + }, + { + "epoch": 0.7299947985688183, + "grad_norm": 6.615468009840697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150520 + }, + { + "epoch": 0.7300432967616545, + "grad_norm": 6.464083980972646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150530 + }, + { + "epoch": 0.7300917949544905, + "grad_norm": 5.31926662006299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150540 + }, + { + "epoch": 0.7301402931473266, + "grad_norm": 5.369097834773129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150550 + }, + { + "epoch": 0.7301887913401627, + "grad_norm": 6.043125267751748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150560 + }, + { + "epoch": 0.7302372895329988, + "grad_norm": 5.759131909144344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150570 + }, + { + "epoch": 0.7302857877258349, + "grad_norm": 5.817572855448816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150580 + }, + { + "epoch": 0.730334285918671, + "grad_norm": 5.104460342408856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150590 + }, + { + "epoch": 0.730382784111507, + "grad_norm": 5.0532921704871114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150600 + }, + { + "epoch": 0.7304312823043432, + "grad_norm": 1.4287425074144267e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150610 + }, + { + "epoch": 0.7304797804971792, + "grad_norm": 6.585249593626941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150620 + }, + { + "epoch": 0.7305282786900154, + "grad_norm": 5.537925517273834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150630 + }, + { + "epoch": 0.7305767768828514, + "grad_norm": 4.776049991050968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150640 + }, + { + "epoch": 0.7306252750756875, + "grad_norm": 4.887125669483794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150650 + }, + { + "epoch": 0.7306737732685236, + "grad_norm": 5.208451511862222e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150660 + }, + { + "epoch": 0.7307222714613597, + "grad_norm": 5.61215301786433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150670 + }, + { + "epoch": 0.7307707696541957, + "grad_norm": 5.680030881194398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150680 + }, + { + "epoch": 0.7308192678470319, + "grad_norm": 4.588873252941994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150690 + }, + { + "epoch": 0.7308677660398679, + "grad_norm": 4.373618139652535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150700 + }, + { + "epoch": 0.7309162642327041, + "grad_norm": 7.516045570810093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150710 + }, + { + "epoch": 0.7309647624255401, + "grad_norm": 4.799072939931648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150720 + }, + { + "epoch": 0.7310132606183762, + "grad_norm": 4.735207312478451e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150730 + }, + { + "epoch": 0.7310617588112123, + "grad_norm": 4.277549123798963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150740 + }, + { + "epoch": 0.7311102570040484, + "grad_norm": 4.143738351558568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150750 + }, + { + "epoch": 0.7311587551968844, + "grad_norm": 4.668393557949457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150760 + }, + { + "epoch": 0.7312072533897206, + "grad_norm": 5.1747811085078865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150770 + }, + { + "epoch": 0.7312557515825566, + "grad_norm": 5.114573923492571e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150780 + }, + { + "epoch": 0.7313042497753928, + "grad_norm": 4.322294444136787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150790 + }, + { + "epoch": 0.7313527479682288, + "grad_norm": 4.027375325676985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150800 + }, + { + "epoch": 0.7314012461610649, + "grad_norm": 4.9640816541796084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150810 + }, + { + "epoch": 0.731449744353901, + "grad_norm": 4.869041731581092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150820 + }, + { + "epoch": 0.7314982425467371, + "grad_norm": 4.169525254837936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150830 + }, + { + "epoch": 0.7315467407395732, + "grad_norm": 2.2168667783262208e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 150840 + }, + { + "epoch": 0.7315952389324093, + "grad_norm": 0.003443922149017453, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150850 + }, + { + "epoch": 0.7316437371252453, + "grad_norm": 9.548756679578219e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150860 + }, + { + "epoch": 0.7316922353180815, + "grad_norm": 6.1703390201728325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150870 + }, + { + "epoch": 0.7317407335109175, + "grad_norm": 5.98448650634964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150880 + }, + { + "epoch": 0.7317892317037537, + "grad_norm": 7.28796885596239e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150890 + }, + { + "epoch": 0.7318377298965897, + "grad_norm": 6.356870017043548e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 150900 + }, + { + "epoch": 0.7318862280894258, + "grad_norm": 1.062523642758606e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150910 + }, + { + "epoch": 0.7319347262822619, + "grad_norm": 1.2702549611276481e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150920 + }, + { + "epoch": 0.731983224475098, + "grad_norm": 8.453986083623022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150930 + }, + { + "epoch": 0.732031722667934, + "grad_norm": 5.278679509501671e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150940 + }, + { + "epoch": 0.7320802208607702, + "grad_norm": 5.081489689473528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150950 + }, + { + "epoch": 0.7321287190536062, + "grad_norm": 3.778531436182675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150960 + }, + { + "epoch": 0.7321772172464424, + "grad_norm": 4.041662123199785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150970 + }, + { + "epoch": 0.7322257154392784, + "grad_norm": 4.18709714722354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150980 + }, + { + "epoch": 0.7322742136321145, + "grad_norm": 4.235863343637902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 150990 + }, + { + "epoch": 0.7323227118249506, + "grad_norm": 4.219107267999789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151000 + }, + { + "epoch": 0.7323712100177867, + "grad_norm": 3.720324457390234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151010 + }, + { + "epoch": 0.7324197082106229, + "grad_norm": 3.479468887235271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151020 + }, + { + "epoch": 0.7324682064034589, + "grad_norm": 3.7616237023030408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151030 + }, + { + "epoch": 0.732516704596295, + "grad_norm": 4.084416559635429e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151040 + }, + { + "epoch": 0.7325652027891311, + "grad_norm": 4.1843381950457115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151050 + }, + { + "epoch": 0.7326137009819672, + "grad_norm": 3.3909952890098793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151060 + }, + { + "epoch": 0.7326621991748032, + "grad_norm": 3.6382566577231046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151070 + }, + { + "epoch": 0.7327106973676394, + "grad_norm": 3.7006839193054475e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 151080 + }, + { + "epoch": 0.7327591955604754, + "grad_norm": 1.6568814316997305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151090 + }, + { + "epoch": 0.7328076937533116, + "grad_norm": 2.4911238142522052e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151100 + }, + { + "epoch": 0.7328561919461476, + "grad_norm": 1.3022225175518543e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151110 + }, + { + "epoch": 0.7329046901389837, + "grad_norm": 1.1796430953836534e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151120 + }, + { + "epoch": 0.7329531883318198, + "grad_norm": 1.0250710147374775e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151130 + }, + { + "epoch": 0.7330016865246559, + "grad_norm": 1.7780748748918995e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151140 + }, + { + "epoch": 0.733050184717492, + "grad_norm": 1.9240133042330854e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151150 + }, + { + "epoch": 0.7330986829103281, + "grad_norm": 7.09820369593217e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151160 + }, + { + "epoch": 0.7331471811031641, + "grad_norm": 6.770097570552025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151170 + }, + { + "epoch": 0.7331956792960003, + "grad_norm": 1.2770951798302121e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151180 + }, + { + "epoch": 0.7332441774888363, + "grad_norm": 1.0236392881779466e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151190 + }, + { + "epoch": 0.7332926756816724, + "grad_norm": 8.453365808236413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151200 + }, + { + "epoch": 0.7333411738745085, + "grad_norm": 6.275701707636472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151210 + }, + { + "epoch": 0.7333896720673446, + "grad_norm": 5.054089342593215e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151220 + }, + { + "epoch": 0.7334381702601807, + "grad_norm": 5.1272700147819705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151230 + }, + { + "epoch": 0.7334866684530168, + "grad_norm": 6.656689492956502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151240 + }, + { + "epoch": 0.7335351666458528, + "grad_norm": 6.860357643745374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151250 + }, + { + "epoch": 0.733583664838689, + "grad_norm": 4.440003976924345e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151260 + }, + { + "epoch": 0.733632163031525, + "grad_norm": 5.043268629378872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151270 + }, + { + "epoch": 0.7336806612243612, + "grad_norm": 4.513064141065115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151280 + }, + { + "epoch": 0.7337291594171972, + "grad_norm": 6.793411557737272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151290 + }, + { + "epoch": 0.7337776576100333, + "grad_norm": 5.1989572966704145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151300 + }, + { + "epoch": 0.7338261558028694, + "grad_norm": 1.3311351722222753e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151310 + }, + { + "epoch": 0.7338746539957055, + "grad_norm": 4.1041034819500055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151320 + }, + { + "epoch": 0.7339231521885415, + "grad_norm": 4.316444574214984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151330 + }, + { + "epoch": 0.7339716503813777, + "grad_norm": 5.104054253024515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151340 + }, + { + "epoch": 0.7340201485742137, + "grad_norm": 4.53578195447335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151350 + }, + { + "epoch": 0.7340686467670499, + "grad_norm": 3.711463705258211e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151360 + }, + { + "epoch": 0.7341171449598859, + "grad_norm": 3.451459633652121e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151370 + }, + { + "epoch": 0.734165643152722, + "grad_norm": 3.3239407457585912e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151380 + }, + { + "epoch": 0.7342141413455581, + "grad_norm": 3.93033542422927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151390 + }, + { + "epoch": 0.7342626395383942, + "grad_norm": 4.168148279859452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151400 + }, + { + "epoch": 0.7343111377312302, + "grad_norm": 3.118769200227689e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151410 + }, + { + "epoch": 0.7343596359240664, + "grad_norm": 3.2137593279912835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151420 + }, + { + "epoch": 0.7344081341169024, + "grad_norm": 2.7557750854612095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151430 + }, + { + "epoch": 0.7344566323097386, + "grad_norm": 4.1195798985427245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151440 + }, + { + "epoch": 0.7345051305025746, + "grad_norm": 3.789399215747835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151450 + }, + { + "epoch": 0.7345536286954107, + "grad_norm": 2.8902072699565906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151460 + }, + { + "epoch": 0.7346021268882468, + "grad_norm": 3.0950775453675305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151470 + }, + { + "epoch": 0.7346506250810829, + "grad_norm": 3.3087831070588436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151480 + }, + { + "epoch": 0.734699123273919, + "grad_norm": 3.256753871028195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151490 + }, + { + "epoch": 0.7347476214667551, + "grad_norm": 3.435760618231143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151500 + }, + { + "epoch": 0.7347961196595911, + "grad_norm": 2.8507458864623914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151510 + }, + { + "epoch": 0.7348446178524273, + "grad_norm": 2.7571138616622193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151520 + }, + { + "epoch": 0.7348931160452634, + "grad_norm": 2.5311119316029362e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151530 + }, + { + "epoch": 0.7349416142380994, + "grad_norm": 3.1564120490656933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151540 + }, + { + "epoch": 0.7349901124309356, + "grad_norm": 3.2248256047751056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151550 + }, + { + "epoch": 0.7350386106237716, + "grad_norm": 2.422516899969196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151560 + }, + { + "epoch": 0.7350871088166078, + "grad_norm": 2.742644255704363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151570 + }, + { + "epoch": 0.7351356070094438, + "grad_norm": 2.3361219518847065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151580 + }, + { + "epoch": 0.73518410520228, + "grad_norm": 3.227130719096749e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151590 + }, + { + "epoch": 0.735232603395116, + "grad_norm": 2.586967184470268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151600 + }, + { + "epoch": 0.7352811015879521, + "grad_norm": 2.390868075963226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151610 + }, + { + "epoch": 0.7353295997807882, + "grad_norm": 2.527632204873953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151620 + }, + { + "epoch": 0.7353780979736243, + "grad_norm": 2.133831685569021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151630 + }, + { + "epoch": 0.7354265961664603, + "grad_norm": 2.61619584307482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151640 + }, + { + "epoch": 0.7354750943592965, + "grad_norm": 2.457628625052166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151650 + }, + { + "epoch": 0.7355235925521325, + "grad_norm": 2.0843544916715473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151660 + }, + { + "epoch": 0.7355720907449687, + "grad_norm": 2.1087344066472724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151670 + }, + { + "epoch": 0.7356205889378047, + "grad_norm": 2.1594300960714463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151680 + }, + { + "epoch": 0.7356690871306408, + "grad_norm": 2.325765535715618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151690 + }, + { + "epoch": 0.7357175853234769, + "grad_norm": 2.531539621486445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151700 + }, + { + "epoch": 0.735766083516313, + "grad_norm": 1.987582891160855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151710 + }, + { + "epoch": 0.735814581709149, + "grad_norm": 1.866380216597463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151720 + }, + { + "epoch": 0.7358630799019852, + "grad_norm": 1.951530521182576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151730 + }, + { + "epoch": 0.7359115780948212, + "grad_norm": 2.2066772089601727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151740 + }, + { + "epoch": 0.7359600762876574, + "grad_norm": 1.884063522084034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151750 + }, + { + "epoch": 0.7360085744804934, + "grad_norm": 2.1827754608239047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151760 + }, + { + "epoch": 0.7360570726733295, + "grad_norm": 1.8299604107596679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151770 + }, + { + "epoch": 0.7361055708661656, + "grad_norm": 1.9575377336877864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151780 + }, + { + "epoch": 0.7361540690590017, + "grad_norm": 1.927806124513154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151790 + }, + { + "epoch": 0.7362025672518377, + "grad_norm": 1.9875831185345305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151800 + }, + { + "epoch": 0.7362510654446739, + "grad_norm": 1.9370725112821674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151810 + }, + { + "epoch": 0.7362995636375099, + "grad_norm": 1.8243907788928482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151820 + }, + { + "epoch": 0.7363480618303461, + "grad_norm": 1.8110653172698221e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151830 + }, + { + "epoch": 0.7363965600231821, + "grad_norm": 1.9123613128613215e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151840 + }, + { + "epoch": 0.7364450582160182, + "grad_norm": 1.787080691428855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151850 + }, + { + "epoch": 0.7364935564088543, + "grad_norm": 1.5974354710124317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151860 + }, + { + "epoch": 0.7365420546016904, + "grad_norm": 1.6919077552302042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151870 + }, + { + "epoch": 0.7365905527945265, + "grad_norm": 1.5349678506026976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151880 + }, + { + "epoch": 0.7366390509873626, + "grad_norm": 1.5507167745454353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151890 + }, + { + "epoch": 0.7366875491801986, + "grad_norm": 1.8683174403122393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151900 + }, + { + "epoch": 0.7367360473730348, + "grad_norm": 1.4656810662927455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151910 + }, + { + "epoch": 0.7367845455658708, + "grad_norm": 1.6766476846896694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151920 + }, + { + "epoch": 0.736833043758707, + "grad_norm": 1.45367937420815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151930 + }, + { + "epoch": 0.736881541951543, + "grad_norm": 2.359617610636633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151940 + }, + { + "epoch": 0.7369300401443791, + "grad_norm": 1.5545081168966135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151950 + }, + { + "epoch": 0.7369785383372152, + "grad_norm": 1.6081893363661948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151960 + }, + { + "epoch": 0.7370270365300513, + "grad_norm": 1.481885874454747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151970 + }, + { + "epoch": 0.7370755347228873, + "grad_norm": 1.4867287063680124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151980 + }, + { + "epoch": 0.7371240329157235, + "grad_norm": 1.5030333315735334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 151990 + }, + { + "epoch": 0.7371725311085595, + "grad_norm": 1.4926308722351678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152000 + }, + { + "epoch": 0.7372210293013957, + "grad_norm": 1.3202605941842194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152010 + }, + { + "epoch": 0.7372695274942317, + "grad_norm": 1.3887198520023958e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152020 + }, + { + "epoch": 0.7373180256870678, + "grad_norm": 1.3435088703772635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152030 + }, + { + "epoch": 0.737366523879904, + "grad_norm": 1.6170266690096469e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152040 + }, + { + "epoch": 0.73741502207274, + "grad_norm": 1.4440130371440318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152050 + }, + { + "epoch": 0.7374635202655762, + "grad_norm": 1.5436247622346855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152060 + }, + { + "epoch": 0.7375120184584122, + "grad_norm": 1.256819473383075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152070 + }, + { + "epoch": 0.7375605166512483, + "grad_norm": 1.4779435559830745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152080 + }, + { + "epoch": 0.7376090148440844, + "grad_norm": 1.4138961432763608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152090 + }, + { + "epoch": 0.7376575130369205, + "grad_norm": 1.3026278793404344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152100 + }, + { + "epoch": 0.7377060112297565, + "grad_norm": 1.2757554941345006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152110 + }, + { + "epoch": 0.7377545094225927, + "grad_norm": 1.3286330613482278e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152120 + }, + { + "epoch": 0.7378030076154287, + "grad_norm": 1.3262715583550744e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152130 + }, + { + "epoch": 0.7378515058082649, + "grad_norm": 1.284078962271451e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152140 + }, + { + "epoch": 0.7379000040011009, + "grad_norm": 1.2081337672498194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152150 + }, + { + "epoch": 0.737948502193937, + "grad_norm": 1.2236121165187797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152160 + }, + { + "epoch": 0.7379970003867731, + "grad_norm": 1.6276552514682407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152170 + }, + { + "epoch": 0.7380454985796092, + "grad_norm": 1.1904719485755777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152180 + }, + { + "epoch": 0.7380939967724452, + "grad_norm": 1.2412243677317747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152190 + }, + { + "epoch": 0.7381424949652814, + "grad_norm": 1.1611597301453003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152200 + }, + { + "epoch": 0.7381909931581174, + "grad_norm": 1.4479259107247344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152210 + }, + { + "epoch": 0.7382394913509536, + "grad_norm": 6.11719733569771e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152220 + }, + { + "epoch": 0.7382879895437896, + "grad_norm": 1.1822941132777487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152230 + }, + { + "epoch": 0.7383364877366257, + "grad_norm": 1.153860466729384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152240 + }, + { + "epoch": 0.7383849859294618, + "grad_norm": 1.107510001929768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152250 + }, + { + "epoch": 0.7384334841222979, + "grad_norm": 1.2075173572156928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152260 + }, + { + "epoch": 0.738481982315134, + "grad_norm": 1.0901679843300371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152270 + }, + { + "epoch": 0.7385304805079701, + "grad_norm": 1.046213128574891e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152280 + }, + { + "epoch": 0.7385789787008061, + "grad_norm": 1.1553430567801115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152290 + }, + { + "epoch": 0.7386274768936423, + "grad_norm": 1.2942221019329736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152300 + }, + { + "epoch": 0.7386759750864783, + "grad_norm": 1.566604169056518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152310 + }, + { + "epoch": 0.7387244732793145, + "grad_norm": 1.5954651644278783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152320 + }, + { + "epoch": 0.7387729714721505, + "grad_norm": 1.0760954864963423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152330 + }, + { + "epoch": 0.7388214696649866, + "grad_norm": 9.996806511480827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152340 + }, + { + "epoch": 0.7388699678578227, + "grad_norm": 1.116113708121702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152350 + }, + { + "epoch": 0.7389184660506588, + "grad_norm": 1.0841723678822746e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152360 + }, + { + "epoch": 0.7389669642434948, + "grad_norm": 9.765103641257156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152370 + }, + { + "epoch": 0.739015462436331, + "grad_norm": 1.0332067859053495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152380 + }, + { + "epoch": 0.739063960629167, + "grad_norm": 1.0438816389068961e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152390 + }, + { + "epoch": 0.7391124588220032, + "grad_norm": 1.0850999387912452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152400 + }, + { + "epoch": 0.7391609570148392, + "grad_norm": 1.5017502619230072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152410 + }, + { + "epoch": 0.7392094552076753, + "grad_norm": 1.075679506357119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152420 + }, + { + "epoch": 0.7392579534005114, + "grad_norm": 9.353842074233398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152430 + }, + { + "epoch": 0.7393064515933475, + "grad_norm": 9.68335598372505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152440 + }, + { + "epoch": 0.7393549497861835, + "grad_norm": 9.707944172987482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152450 + }, + { + "epoch": 0.7394034479790197, + "grad_norm": 8.731338994039106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152460 + }, + { + "epoch": 0.7394519461718557, + "grad_norm": 8.949807011049415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152470 + }, + { + "epoch": 0.7395004443646919, + "grad_norm": 9.380024152960686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152480 + }, + { + "epoch": 0.7395489425575279, + "grad_norm": 9.348383400720195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152490 + }, + { + "epoch": 0.739597440750364, + "grad_norm": 9.251962751477549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152500 + }, + { + "epoch": 0.7396459389432001, + "grad_norm": 1.0210944765276508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152510 + }, + { + "epoch": 0.7396944371360362, + "grad_norm": 1.4039529787623906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152520 + }, + { + "epoch": 0.7397429353288723, + "grad_norm": 1.150913590208802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152530 + }, + { + "epoch": 0.7397914335217084, + "grad_norm": 9.253029702449567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152540 + }, + { + "epoch": 0.7398399317145445, + "grad_norm": 1.0103132126459968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152550 + }, + { + "epoch": 0.7398884299073806, + "grad_norm": 9.945405281541753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152560 + }, + { + "epoch": 0.7399369281002167, + "grad_norm": 8.223697705034283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152570 + }, + { + "epoch": 0.7399854262930528, + "grad_norm": 8.885373858902312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152580 + }, + { + "epoch": 0.7400339244858889, + "grad_norm": 9.043176874001801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152590 + }, + { + "epoch": 0.7400824226787249, + "grad_norm": 8.22850950044085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152600 + }, + { + "epoch": 0.7401309208715611, + "grad_norm": 9.508946163805376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152610 + }, + { + "epoch": 0.7401794190643971, + "grad_norm": 8.126257853291463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152620 + }, + { + "epoch": 0.7402279172572332, + "grad_norm": 8.923686891648686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152630 + }, + { + "epoch": 0.7402764154500693, + "grad_norm": 9.101025284508069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152640 + }, + { + "epoch": 0.7403249136429054, + "grad_norm": 8.289381980830512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152650 + }, + { + "epoch": 0.7403734118357415, + "grad_norm": 8.578618349019962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152660 + }, + { + "epoch": 0.7404219100285776, + "grad_norm": 1.0453775303176371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152670 + }, + { + "epoch": 0.7404704082214136, + "grad_norm": 8.020160748856142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152680 + }, + { + "epoch": 0.7405189064142498, + "grad_norm": 7.523109957219276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152690 + }, + { + "epoch": 0.7405674046070858, + "grad_norm": 2.4162750378309283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152700 + }, + { + "epoch": 0.740615902799922, + "grad_norm": 9.664147455623606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152710 + }, + { + "epoch": 0.740664400992758, + "grad_norm": 8.926570558287494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152720 + }, + { + "epoch": 0.7407128991855941, + "grad_norm": 8.783146654423035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152730 + }, + { + "epoch": 0.7407613973784302, + "grad_norm": 8.449842994195933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152740 + }, + { + "epoch": 0.7408098955712663, + "grad_norm": 7.675025699427351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152750 + }, + { + "epoch": 0.7408583937641023, + "grad_norm": 8.900666443878436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152760 + }, + { + "epoch": 0.7409068919569385, + "grad_norm": 7.177138741099043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152770 + }, + { + "epoch": 0.7409553901497745, + "grad_norm": 7.246164841490099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152780 + }, + { + "epoch": 0.7410038883426107, + "grad_norm": 7.599499554089562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152790 + }, + { + "epoch": 0.7410523865354467, + "grad_norm": 9.409164363205491e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152800 + }, + { + "epoch": 0.7411008847282828, + "grad_norm": 9.61309410740796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152810 + }, + { + "epoch": 0.7411493829211189, + "grad_norm": 8.07993217222247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152820 + }, + { + "epoch": 0.741197881113955, + "grad_norm": 7.417431220346771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152830 + }, + { + "epoch": 0.741246379306791, + "grad_norm": 7.473251457668084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152840 + }, + { + "epoch": 0.7412948774996272, + "grad_norm": 7.057797120069154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152850 + }, + { + "epoch": 0.7413433756924632, + "grad_norm": 7.153434466999897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152860 + }, + { + "epoch": 0.7413918738852994, + "grad_norm": 7.281505531864241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152870 + }, + { + "epoch": 0.7414403720781354, + "grad_norm": 6.382557558026747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152880 + }, + { + "epoch": 0.7414888702709715, + "grad_norm": 6.934075145181851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152890 + }, + { + "epoch": 0.7415373684638076, + "grad_norm": 6.838567969680298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152900 + }, + { + "epoch": 0.7415858666566437, + "grad_norm": 6.66593621190259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152910 + }, + { + "epoch": 0.7416343648494798, + "grad_norm": 6.959169240872143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152920 + }, + { + "epoch": 0.7416828630423159, + "grad_norm": 6.607702971450635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152930 + }, + { + "epoch": 0.7417313612351519, + "grad_norm": 6.373664973580162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152940 + }, + { + "epoch": 0.7417798594279881, + "grad_norm": 6.06928267643525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152950 + }, + { + "epoch": 0.7418283576208241, + "grad_norm": 6.896001423228881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152960 + }, + { + "epoch": 0.7418768558136603, + "grad_norm": 6.593429020540498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152970 + }, + { + "epoch": 0.7419253540064963, + "grad_norm": 6.540646495523106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152980 + }, + { + "epoch": 0.7419738521993324, + "grad_norm": 8.215299089897599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 152990 + }, + { + "epoch": 0.7420223503921685, + "grad_norm": 6.406206694009597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153000 + }, + { + "epoch": 0.7420708485850046, + "grad_norm": 6.650944897046429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153010 + }, + { + "epoch": 0.7421193467778406, + "grad_norm": 7.868457601034606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153020 + }, + { + "epoch": 0.7421678449706768, + "grad_norm": 5.966708158666734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153030 + }, + { + "epoch": 0.7422163431635128, + "grad_norm": 6.151524871711445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153040 + }, + { + "epoch": 0.742264841356349, + "grad_norm": 6.966610612835211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153050 + }, + { + "epoch": 0.7423133395491851, + "grad_norm": 5.990739850858517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153060 + }, + { + "epoch": 0.7423618377420211, + "grad_norm": 6.084423489483015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153070 + }, + { + "epoch": 0.7424103359348573, + "grad_norm": 1.091134663511184e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153080 + }, + { + "epoch": 0.7424588341276933, + "grad_norm": 5.986285600556585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153090 + }, + { + "epoch": 0.7425073323205295, + "grad_norm": 5.780436254099186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153100 + }, + { + "epoch": 0.7425558305133655, + "grad_norm": 5.878237061551772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153110 + }, + { + "epoch": 0.7426043287062016, + "grad_norm": 5.643805138788593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153120 + }, + { + "epoch": 0.7426528268990377, + "grad_norm": 5.906459250581975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153130 + }, + { + "epoch": 0.7427013250918738, + "grad_norm": 5.891772048016719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153140 + }, + { + "epoch": 0.7427498232847098, + "grad_norm": 7.868140983191552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153150 + }, + { + "epoch": 0.742798321477546, + "grad_norm": 5.549970296669926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153160 + }, + { + "epoch": 0.742846819670382, + "grad_norm": 5.641897473651625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153170 + }, + { + "epoch": 0.7428953178632182, + "grad_norm": 5.598743086920877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153180 + }, + { + "epoch": 0.7429438160560542, + "grad_norm": 5.656392545461131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153190 + }, + { + "epoch": 0.7429923142488903, + "grad_norm": 6.396101639438712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153200 + }, + { + "epoch": 0.7430408124417264, + "grad_norm": 5.533852345251944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153210 + }, + { + "epoch": 0.7430893106345625, + "grad_norm": 7.01394640145736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153220 + }, + { + "epoch": 0.7431378088273985, + "grad_norm": 7.061407814035192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153230 + }, + { + "epoch": 0.7431863070202347, + "grad_norm": 5.724438665311027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153240 + }, + { + "epoch": 0.7432348052130707, + "grad_norm": 5.479231504068593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153250 + }, + { + "epoch": 0.7432833034059069, + "grad_norm": 5.453060225590889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153260 + }, + { + "epoch": 0.7433318015987429, + "grad_norm": 5.62022648864513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153270 + }, + { + "epoch": 0.743380299791579, + "grad_norm": 5.18295621532161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153280 + }, + { + "epoch": 0.7434287979844151, + "grad_norm": 5.600539338956878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153290 + }, + { + "epoch": 0.7434772961772512, + "grad_norm": 5.478686944115907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153300 + }, + { + "epoch": 0.7435257943700873, + "grad_norm": 5.515283305612684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153310 + }, + { + "epoch": 0.7435742925629234, + "grad_norm": 6.39755626252736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153320 + }, + { + "epoch": 0.7436227907557594, + "grad_norm": 6.130106271484692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153330 + }, + { + "epoch": 0.7436712889485956, + "grad_norm": 4.968405846739188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153340 + }, + { + "epoch": 0.7437197871414316, + "grad_norm": 4.941748557030223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153350 + }, + { + "epoch": 0.7437682853342678, + "grad_norm": 7.923571843093669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153360 + }, + { + "epoch": 0.7438167835271038, + "grad_norm": 5.319556066751829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153370 + }, + { + "epoch": 0.7438652817199399, + "grad_norm": 4.967522500010091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153380 + }, + { + "epoch": 0.743913779912776, + "grad_norm": 4.665294000005815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153390 + }, + { + "epoch": 0.7439622781056121, + "grad_norm": 5.422904223451042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153400 + }, + { + "epoch": 0.7440107762984481, + "grad_norm": 6.313853191386443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153410 + }, + { + "epoch": 0.7440592744912843, + "grad_norm": 5.397886866376211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153420 + }, + { + "epoch": 0.7441077726841203, + "grad_norm": 5.415844270828529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153430 + }, + { + "epoch": 0.7441562708769565, + "grad_norm": 4.605261665346916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153440 + }, + { + "epoch": 0.7442047690697925, + "grad_norm": 4.7159849714262236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153450 + }, + { + "epoch": 0.7442532672626286, + "grad_norm": 5.352563903215923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153460 + }, + { + "epoch": 0.7443017654554647, + "grad_norm": 6.355043069561361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153470 + }, + { + "epoch": 0.7443502636483008, + "grad_norm": 4.8022997134467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153480 + }, + { + "epoch": 0.7443987618411368, + "grad_norm": 6.037993216523319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153490 + }, + { + "epoch": 0.744447260033973, + "grad_norm": 4.545866545413446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153500 + }, + { + "epoch": 0.744495758226809, + "grad_norm": 4.864472202825709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153510 + }, + { + "epoch": 0.7445442564196452, + "grad_norm": 4.5738005383100244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153520 + }, + { + "epoch": 0.7445927546124812, + "grad_norm": 4.4971824308959185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153530 + }, + { + "epoch": 0.7446412528053173, + "grad_norm": 4.6421084221037745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153540 + }, + { + "epoch": 0.7446897509981534, + "grad_norm": 4.432904177065211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153550 + }, + { + "epoch": 0.7447382491909895, + "grad_norm": 5.110151164444687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153560 + }, + { + "epoch": 0.7447867473838257, + "grad_norm": 4.5026379780210846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153570 + }, + { + "epoch": 0.7448352455766617, + "grad_norm": 9.654399946157355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153580 + }, + { + "epoch": 0.7448837437694978, + "grad_norm": 4.244037654643762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153590 + }, + { + "epoch": 0.7449322419623339, + "grad_norm": 4.604540322361572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153600 + }, + { + "epoch": 0.74498074015517, + "grad_norm": 4.201837100481498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153610 + }, + { + "epoch": 0.745029238348006, + "grad_norm": 4.0933340983428934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153620 + }, + { + "epoch": 0.7450777365408422, + "grad_norm": 4.78233403100603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153630 + }, + { + "epoch": 0.7451262347336782, + "grad_norm": 4.093897416623804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153640 + }, + { + "epoch": 0.7451747329265144, + "grad_norm": 5.281427206682565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153650 + }, + { + "epoch": 0.7452232311193504, + "grad_norm": 4.41174648813103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153660 + }, + { + "epoch": 0.7452717293121865, + "grad_norm": 3.9881464886093454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153670 + }, + { + "epoch": 0.7453202275050226, + "grad_norm": 4.2715794279502006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153680 + }, + { + "epoch": 0.7453687256978587, + "grad_norm": 3.7887807025072107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153690 + }, + { + "epoch": 0.7454172238906948, + "grad_norm": 3.899658906902914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153700 + }, + { + "epoch": 0.7454657220835309, + "grad_norm": 3.968987130065216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153710 + }, + { + "epoch": 0.7455142202763669, + "grad_norm": 1.212610186485108e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 153720 + }, + { + "epoch": 0.7455627184692031, + "grad_norm": 0.000767607125453651, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 153730 + }, + { + "epoch": 0.7456112166620391, + "grad_norm": 4.6050547098275274e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 153740 + }, + { + "epoch": 0.7456597148548753, + "grad_norm": 2.4731165467528626e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153750 + }, + { + "epoch": 0.7457082130477113, + "grad_norm": 2.267110176035203e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153760 + }, + { + "epoch": 0.7457567112405474, + "grad_norm": 3.4515840525273234e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153770 + }, + { + "epoch": 0.7458052094333835, + "grad_norm": 1.6668533135089092e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153780 + }, + { + "epoch": 0.7458537076262196, + "grad_norm": 1.6061723727034405e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153790 + }, + { + "epoch": 0.7459022058190556, + "grad_norm": 1.4280375580710825e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153800 + }, + { + "epoch": 0.7459507040118918, + "grad_norm": 1.0813993867486715e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153810 + }, + { + "epoch": 0.7459992022047278, + "grad_norm": 8.557907676731702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153820 + }, + { + "epoch": 0.746047700397564, + "grad_norm": 8.465097380394582e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153830 + }, + { + "epoch": 0.7460961985904, + "grad_norm": 8.681171493662987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153840 + }, + { + "epoch": 0.7461446967832361, + "grad_norm": 8.960767445387319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153850 + }, + { + "epoch": 0.7461931949760722, + "grad_norm": 6.129895155027043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153860 + }, + { + "epoch": 0.7462416931689083, + "grad_norm": 1.4742793609912042e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153870 + }, + { + "epoch": 0.7462901913617443, + "grad_norm": 5.4526180974789895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153880 + }, + { + "epoch": 0.7463386895545805, + "grad_norm": 5.889583007956389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153890 + }, + { + "epoch": 0.7463871877474165, + "grad_norm": 6.68389202473918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153900 + }, + { + "epoch": 0.7464356859402527, + "grad_norm": 4.571846147882752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153910 + }, + { + "epoch": 0.7464841841330887, + "grad_norm": 3.827310592896538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153920 + }, + { + "epoch": 0.7465326823259248, + "grad_norm": 3.519777237670496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153930 + }, + { + "epoch": 0.7465811805187609, + "grad_norm": 4.649607035389636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153940 + }, + { + "epoch": 0.746629678711597, + "grad_norm": 4.513073690759484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153950 + }, + { + "epoch": 0.746678176904433, + "grad_norm": 3.4192114526376827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153960 + }, + { + "epoch": 0.7467266750972692, + "grad_norm": 5.920758667343762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153970 + }, + { + "epoch": 0.7467751732901052, + "grad_norm": 3.1238541851053014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153980 + }, + { + "epoch": 0.7468236714829414, + "grad_norm": 4.156486284045968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 153990 + }, + { + "epoch": 0.7468721696757774, + "grad_norm": 3.6503020055533852e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154000 + }, + { + "epoch": 0.7469206678686136, + "grad_norm": 2.1151949113118462e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154010 + }, + { + "epoch": 0.7469691660614496, + "grad_norm": 2.7737642085412517e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154020 + }, + { + "epoch": 0.7470176642542857, + "grad_norm": 3.3872238418553025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154030 + }, + { + "epoch": 0.7470661624471218, + "grad_norm": 3.5617524645203957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154040 + }, + { + "epoch": 0.7471146606399579, + "grad_norm": 2.7719056561181787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154050 + }, + { + "epoch": 0.7471631588327939, + "grad_norm": 1.9443359633442014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154060 + }, + { + "epoch": 0.7472116570256301, + "grad_norm": 2.24126415560022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154070 + }, + { + "epoch": 0.7472601552184662, + "grad_norm": 2.106307192661916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154080 + }, + { + "epoch": 0.7473086534113023, + "grad_norm": 2.518437895560055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154090 + }, + { + "epoch": 0.7473571516041384, + "grad_norm": 2.3706097636022605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154100 + }, + { + "epoch": 0.7474056497969744, + "grad_norm": 1.8429351484883227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154110 + }, + { + "epoch": 0.7474541479898106, + "grad_norm": 1.8221023765363498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154120 + }, + { + "epoch": 0.7475026461826466, + "grad_norm": 1.5852132264626562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154130 + }, + { + "epoch": 0.7475511443754828, + "grad_norm": 4.881695531366859e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154140 + }, + { + "epoch": 0.7475996425683188, + "grad_norm": 2.154516550945118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154150 + }, + { + "epoch": 0.7476481407611549, + "grad_norm": 2.0030179257446434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154160 + }, + { + "epoch": 0.747696638953991, + "grad_norm": 2.7114224394608755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154170 + }, + { + "epoch": 0.7477451371468271, + "grad_norm": 1.3559155149778235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154180 + }, + { + "epoch": 0.7477936353396631, + "grad_norm": 4.287652700440958e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154190 + }, + { + "epoch": 0.7478421335324993, + "grad_norm": 1.7243429510926944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154200 + }, + { + "epoch": 0.7478906317253353, + "grad_norm": 1.2726699196718982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154210 + }, + { + "epoch": 0.7479391299181715, + "grad_norm": 1.264150796487229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154220 + }, + { + "epoch": 0.7479876281110075, + "grad_norm": 1.2170768286523526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154230 + }, + { + "epoch": 0.7480361263038436, + "grad_norm": 1.6670327340762015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154240 + }, + { + "epoch": 0.7480846244966797, + "grad_norm": 1.6723062117307563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154250 + }, + { + "epoch": 0.7481331226895158, + "grad_norm": 1.2701106015811092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154260 + }, + { + "epoch": 0.7481816208823518, + "grad_norm": 1.1319316399749368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154270 + }, + { + "epoch": 0.748230119075188, + "grad_norm": 1.1132729014207143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154280 + }, + { + "epoch": 0.748278617268024, + "grad_norm": 1.4464758351095952e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154290 + }, + { + "epoch": 0.7483271154608602, + "grad_norm": 1.446920236958249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154300 + }, + { + "epoch": 0.7483756136536962, + "grad_norm": 1.1066800880144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154310 + }, + { + "epoch": 0.7484241118465323, + "grad_norm": 9.85479914561438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154320 + }, + { + "epoch": 0.7484726100393684, + "grad_norm": 1.0593905699352035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154330 + }, + { + "epoch": 0.7485211082322045, + "grad_norm": 1.4123810387900448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154340 + }, + { + "epoch": 0.7485696064250406, + "grad_norm": 2.3634613626200007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154350 + }, + { + "epoch": 0.7486181046178767, + "grad_norm": 1.012565348901262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154360 + }, + { + "epoch": 0.7486666028107127, + "grad_norm": 1.3421505400401657e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154370 + }, + { + "epoch": 0.7487151010035489, + "grad_norm": 1.0435138619868667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154380 + }, + { + "epoch": 0.7487635991963849, + "grad_norm": 1.3143126125214621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154390 + }, + { + "epoch": 0.748812097389221, + "grad_norm": 1.6240014701907057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154400 + }, + { + "epoch": 0.7488605955820571, + "grad_norm": 8.946768161877117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154410 + }, + { + "epoch": 0.7489090937748932, + "grad_norm": 7.838874580556876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154420 + }, + { + "epoch": 0.7489575919677293, + "grad_norm": 1.0510433412491693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154430 + }, + { + "epoch": 0.7490060901605654, + "grad_norm": 1.143104896073055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154440 + }, + { + "epoch": 0.7490545883534014, + "grad_norm": 1.0409269179945113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154450 + }, + { + "epoch": 0.7491030865462376, + "grad_norm": 8.660391586090554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154460 + }, + { + "epoch": 0.7491515847390736, + "grad_norm": 8.520553365087835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154470 + }, + { + "epoch": 0.7492000829319098, + "grad_norm": 1.3658307125297142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154480 + }, + { + "epoch": 0.7492485811247458, + "grad_norm": 1.0637519380907179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154490 + }, + { + "epoch": 0.7492970793175819, + "grad_norm": 1.135495494963834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154500 + }, + { + "epoch": 0.749345577510418, + "grad_norm": 7.250288263094262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154510 + }, + { + "epoch": 0.7493940757032541, + "grad_norm": 6.45038028324052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154520 + }, + { + "epoch": 0.7494425738960901, + "grad_norm": 7.353620503636193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154530 + }, + { + "epoch": 0.7494910720889263, + "grad_norm": 8.372425668312644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154540 + }, + { + "epoch": 0.7495395702817623, + "grad_norm": 8.852506994116993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154550 + }, + { + "epoch": 0.7495880684745985, + "grad_norm": 3.035971531062387e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154560 + }, + { + "epoch": 0.7496365666674345, + "grad_norm": 6.32577496162412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154570 + }, + { + "epoch": 0.7496850648602706, + "grad_norm": 6.040393145667622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154580 + }, + { + "epoch": 0.7497335630531068, + "grad_norm": 8.653432814753614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154590 + }, + { + "epoch": 0.7497820612459428, + "grad_norm": 7.913257604741375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154600 + }, + { + "epoch": 0.749830559438779, + "grad_norm": 7.115452831385483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154610 + }, + { + "epoch": 0.749879057631615, + "grad_norm": 5.607523121398117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154620 + }, + { + "epoch": 0.7499275558244511, + "grad_norm": 6.755868753316463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154630 + }, + { + "epoch": 0.7499760540172872, + "grad_norm": 7.03972375504236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154640 + }, + { + "epoch": 0.7500245522101233, + "grad_norm": 7.168209208430198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154650 + }, + { + "epoch": 0.7500730504029594, + "grad_norm": 1.3355342161958106e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154660 + }, + { + "epoch": 0.7501215485957955, + "grad_norm": 1.157818019237311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154670 + }, + { + "epoch": 0.7501700467886315, + "grad_norm": 4.840959491048125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154680 + }, + { + "epoch": 0.7502185449814677, + "grad_norm": 6.56206452731567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154690 + }, + { + "epoch": 0.7502670431743037, + "grad_norm": 6.9095028720767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154700 + }, + { + "epoch": 0.7503155413671398, + "grad_norm": 4.3708217845050967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154710 + }, + { + "epoch": 0.7503640395599759, + "grad_norm": 6.836461352577317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154720 + }, + { + "epoch": 0.750412537752812, + "grad_norm": 5.74278203657741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154730 + }, + { + "epoch": 0.7504610359456481, + "grad_norm": 6.92562593940238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154740 + }, + { + "epoch": 0.7505095341384842, + "grad_norm": 6.075057967791508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154750 + }, + { + "epoch": 0.7505580323313202, + "grad_norm": 5.502403723767202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154760 + }, + { + "epoch": 0.7506065305241564, + "grad_norm": 4.549337120351993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154770 + }, + { + "epoch": 0.7506550287169924, + "grad_norm": 5.356865813155309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154780 + }, + { + "epoch": 0.7507035269098286, + "grad_norm": 6.015019948790723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154790 + }, + { + "epoch": 0.7507520251026646, + "grad_norm": 7.111064519449428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154800 + }, + { + "epoch": 0.7508005232955007, + "grad_norm": 4.116928948860732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154810 + }, + { + "epoch": 0.7508490214883368, + "grad_norm": 4.3206571831433394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154820 + }, + { + "epoch": 0.7508975196811729, + "grad_norm": 4.592597235841822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154830 + }, + { + "epoch": 0.7509460178740089, + "grad_norm": 6.656389359704917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154840 + }, + { + "epoch": 0.7509945160668451, + "grad_norm": 6.04464503339841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154850 + }, + { + "epoch": 0.7510430142596811, + "grad_norm": 6.262390570554999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154860 + }, + { + "epoch": 0.7510915124525173, + "grad_norm": 4.6588721147600154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154870 + }, + { + "epoch": 0.7511400106453533, + "grad_norm": 5.216012368691736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154880 + }, + { + "epoch": 0.7511885088381894, + "grad_norm": 5.285202178129111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154890 + }, + { + "epoch": 0.7512370070310255, + "grad_norm": 5.812688073092431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154900 + }, + { + "epoch": 0.7512855052238616, + "grad_norm": 4.1481305856905237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154910 + }, + { + "epoch": 0.7513340034166976, + "grad_norm": 4.726034319446626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154920 + }, + { + "epoch": 0.7513825016095338, + "grad_norm": 1.5322881381507614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154930 + }, + { + "epoch": 0.7514309998023698, + "grad_norm": 4.532394939360529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154940 + }, + { + "epoch": 0.751479497995206, + "grad_norm": 5.502866997630917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154950 + }, + { + "epoch": 0.751527996188042, + "grad_norm": 4.842235057367361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154960 + }, + { + "epoch": 0.7515764943808781, + "grad_norm": 4.490642311338888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154970 + }, + { + "epoch": 0.7516249925737142, + "grad_norm": 4.306367600293015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154980 + }, + { + "epoch": 0.7516734907665503, + "grad_norm": 2.0074558051419444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 154990 + }, + { + "epoch": 0.7517219889593864, + "grad_norm": 4.839392886424321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155000 + }, + { + "epoch": 0.7517704871522225, + "grad_norm": 4.039726491100737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155010 + }, + { + "epoch": 0.7518189853450585, + "grad_norm": 3.6953218796043075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155020 + }, + { + "epoch": 0.7518674835378947, + "grad_norm": 3.7776752037643746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155030 + }, + { + "epoch": 0.7519159817307307, + "grad_norm": 4.212377859857952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155040 + }, + { + "epoch": 0.7519644799235669, + "grad_norm": 3.881926602389285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155050 + }, + { + "epoch": 0.7520129781164029, + "grad_norm": 4.426333362061996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155060 + }, + { + "epoch": 0.752061476309239, + "grad_norm": 4.3515001380001195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155070 + }, + { + "epoch": 0.7521099745020751, + "grad_norm": 4.530813271230727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155080 + }, + { + "epoch": 0.7521584726949112, + "grad_norm": 4.447961998721439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155090 + }, + { + "epoch": 0.7522069708877474, + "grad_norm": 4.525010126599227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155100 + }, + { + "epoch": 0.7522554690805834, + "grad_norm": 3.4529819004092133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155110 + }, + { + "epoch": 0.7523039672734195, + "grad_norm": 3.6381791801431973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155120 + }, + { + "epoch": 0.7523524654662556, + "grad_norm": 3.3901110896294995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155130 + }, + { + "epoch": 0.7524009636590917, + "grad_norm": 7.225917784126068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155140 + }, + { + "epoch": 0.7524494618519277, + "grad_norm": 4.554309498416842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155150 + }, + { + "epoch": 0.7524979600447639, + "grad_norm": 3.450346639510826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155160 + }, + { + "epoch": 0.7525464582375999, + "grad_norm": 3.617010122525244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155170 + }, + { + "epoch": 0.7525949564304361, + "grad_norm": 3.5054972613579594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155180 + }, + { + "epoch": 0.7526434546232721, + "grad_norm": 6.155964911158662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155190 + }, + { + "epoch": 0.7526919528161082, + "grad_norm": 4.6740225911889866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155200 + }, + { + "epoch": 0.7527404510089443, + "grad_norm": 3.650191899851052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155210 + }, + { + "epoch": 0.7527889492017804, + "grad_norm": 4.0641572240929236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155220 + }, + { + "epoch": 0.7528374473946164, + "grad_norm": 3.721644361576182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155230 + }, + { + "epoch": 0.7528859455874526, + "grad_norm": 4.321859137235151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155240 + }, + { + "epoch": 0.7529344437802886, + "grad_norm": 3.812953536908026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155250 + }, + { + "epoch": 0.7529829419731248, + "grad_norm": 3.1586924365001323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155260 + }, + { + "epoch": 0.7530314401659608, + "grad_norm": 3.149141036828951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155270 + }, + { + "epoch": 0.7530799383587969, + "grad_norm": 5.922286732129578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155280 + }, + { + "epoch": 0.753128436551633, + "grad_norm": 3.3743324934221164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155290 + }, + { + "epoch": 0.7531769347444691, + "grad_norm": 4.2843728920161084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155300 + }, + { + "epoch": 0.7532254329373052, + "grad_norm": 2.79348398635193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155310 + }, + { + "epoch": 0.7532739311301413, + "grad_norm": 3.697368526900391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155320 + }, + { + "epoch": 0.7533224293229773, + "grad_norm": 3.4489897871026187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155330 + }, + { + "epoch": 0.7533709275158135, + "grad_norm": 4.4504096763375856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155340 + }, + { + "epoch": 0.7534194257086495, + "grad_norm": 3.8777398003730923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155350 + }, + { + "epoch": 0.7534679239014856, + "grad_norm": 3.131409016532416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155360 + }, + { + "epoch": 0.7535164220943217, + "grad_norm": 3.991427206528897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155370 + }, + { + "epoch": 0.7535649202871578, + "grad_norm": 3.050476777843869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155380 + }, + { + "epoch": 0.7536134184799939, + "grad_norm": 3.818697393853654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155390 + }, + { + "epoch": 0.75366191667283, + "grad_norm": 3.6991067986491544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155400 + }, + { + "epoch": 0.753710414865666, + "grad_norm": 3.4021010719698097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155410 + }, + { + "epoch": 0.7537589130585022, + "grad_norm": 2.80398751328903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155420 + }, + { + "epoch": 0.7538074112513382, + "grad_norm": 2.600927757612226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155430 + }, + { + "epoch": 0.7538559094441744, + "grad_norm": 3.9741047430652543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155440 + }, + { + "epoch": 0.7539044076370104, + "grad_norm": 6.158973633318965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155450 + }, + { + "epoch": 0.7539529058298465, + "grad_norm": 3.359019160598109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155460 + }, + { + "epoch": 0.7540014040226826, + "grad_norm": 4.391707477680029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155470 + }, + { + "epoch": 0.7540499022155187, + "grad_norm": 2.672955474736227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155480 + }, + { + "epoch": 0.7540984004083547, + "grad_norm": 3.9891853020890267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155490 + }, + { + "epoch": 0.7541468986011909, + "grad_norm": 5.132956175657455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155500 + }, + { + "epoch": 0.7541953967940269, + "grad_norm": 2.5514566459605703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155510 + }, + { + "epoch": 0.7542438949868631, + "grad_norm": 2.772858351818286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155520 + }, + { + "epoch": 0.7542923931796991, + "grad_norm": 2.695865362056793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155530 + }, + { + "epoch": 0.7543408913725352, + "grad_norm": 3.1209273743115773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155540 + }, + { + "epoch": 0.7543893895653713, + "grad_norm": 5.8919854382111225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155550 + }, + { + "epoch": 0.7544378877582074, + "grad_norm": 2.671760626071773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155560 + }, + { + "epoch": 0.7544863859510434, + "grad_norm": 3.193994757566543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155570 + }, + { + "epoch": 0.7545348841438796, + "grad_norm": 2.744938001342234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155580 + }, + { + "epoch": 0.7545833823367156, + "grad_norm": 5.318386797625863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155590 + }, + { + "epoch": 0.7546318805295518, + "grad_norm": 2.8170191512799647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155600 + }, + { + "epoch": 0.7546803787223879, + "grad_norm": 3.1846502679400146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155610 + }, + { + "epoch": 0.754728876915224, + "grad_norm": 3.1488642093790986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155620 + }, + { + "epoch": 0.7547773751080601, + "grad_norm": 2.4879287252588256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155630 + }, + { + "epoch": 0.7548258733008961, + "grad_norm": 2.742369815678103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155640 + }, + { + "epoch": 0.7548743714937323, + "grad_norm": 2.7466282404020603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155650 + }, + { + "epoch": 0.7549228696865683, + "grad_norm": 2.742577578374039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155660 + }, + { + "epoch": 0.7549713678794044, + "grad_norm": 2.6534502239883295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155670 + }, + { + "epoch": 0.7550198660722405, + "grad_norm": 2.7405221203480323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155680 + }, + { + "epoch": 0.7550683642650766, + "grad_norm": 2.992833003645501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155690 + }, + { + "epoch": 0.7551168624579127, + "grad_norm": 2.564481178524147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155700 + }, + { + "epoch": 0.7551653606507488, + "grad_norm": 2.28626618081762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155710 + }, + { + "epoch": 0.7552138588435848, + "grad_norm": 2.537437922001118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155720 + }, + { + "epoch": 0.755262357036421, + "grad_norm": 2.423593343792163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155730 + }, + { + "epoch": 0.755310855229257, + "grad_norm": 2.7790716217168665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155740 + }, + { + "epoch": 0.7553593534220931, + "grad_norm": 6.079248464629927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155750 + }, + { + "epoch": 0.7554078516149292, + "grad_norm": 2.626992170462472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155760 + }, + { + "epoch": 0.7554563498077653, + "grad_norm": 2.602921824745863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155770 + }, + { + "epoch": 0.7555048480006014, + "grad_norm": 2.3765581147472403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155780 + }, + { + "epoch": 0.7555533461934375, + "grad_norm": 3.0760779168304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155790 + }, + { + "epoch": 0.7556018443862735, + "grad_norm": 2.8826477205257106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155800 + }, + { + "epoch": 0.7556503425791097, + "grad_norm": 2.3776395607910672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155810 + }, + { + "epoch": 0.7556988407719457, + "grad_norm": 2.4133709075613297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155820 + }, + { + "epoch": 0.7557473389647819, + "grad_norm": 2.3878124011389446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155830 + }, + { + "epoch": 0.7557958371576179, + "grad_norm": 3.2462395438415115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155840 + }, + { + "epoch": 0.755844335350454, + "grad_norm": 9.013368753585382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155850 + }, + { + "epoch": 0.7558928335432901, + "grad_norm": 2.282684050669559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155860 + }, + { + "epoch": 0.7559413317361262, + "grad_norm": 2.3961320039234124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155870 + }, + { + "epoch": 0.7559898299289622, + "grad_norm": 2.3776398450081615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155880 + }, + { + "epoch": 0.7560383281217984, + "grad_norm": 2.3654656899907422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155890 + }, + { + "epoch": 0.7560868263146344, + "grad_norm": 2.6080897441715933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155900 + }, + { + "epoch": 0.7561353245074706, + "grad_norm": 2.329325212713229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155910 + }, + { + "epoch": 0.7561838227003066, + "grad_norm": 2.3492567891025828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155920 + }, + { + "epoch": 0.7562323208931427, + "grad_norm": 2.4579082946729613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155930 + }, + { + "epoch": 0.7562808190859788, + "grad_norm": 3.2438612151963753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155940 + }, + { + "epoch": 0.7563293172788149, + "grad_norm": 2.564735268606455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155950 + }, + { + "epoch": 0.756377815471651, + "grad_norm": 2.023439265030902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155960 + }, + { + "epoch": 0.7564263136644871, + "grad_norm": 2.3234662194226985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155970 + }, + { + "epoch": 0.7564748118573231, + "grad_norm": 2.23107946339951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155980 + }, + { + "epoch": 0.7565233100501593, + "grad_norm": 2.832037750977179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 155990 + }, + { + "epoch": 0.7565718082429953, + "grad_norm": 2.155462368591543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156000 + }, + { + "epoch": 0.7566203064358314, + "grad_norm": 9.533685556561977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156010 + }, + { + "epoch": 0.7566688046286675, + "grad_norm": 2.0714007575861615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156020 + }, + { + "epoch": 0.7567173028215036, + "grad_norm": 2.1881577083604498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156030 + }, + { + "epoch": 0.7567658010143397, + "grad_norm": 2.484503909272462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156040 + }, + { + "epoch": 0.7568142992071758, + "grad_norm": 2.328967951825689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156050 + }, + { + "epoch": 0.7568627974000118, + "grad_norm": 2.1523668181089306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156060 + }, + { + "epoch": 0.756911295592848, + "grad_norm": 2.167694077570559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156070 + }, + { + "epoch": 0.756959793785684, + "grad_norm": 2.1114918524745008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156080 + }, + { + "epoch": 0.7570082919785202, + "grad_norm": 2.610979947803571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156090 + }, + { + "epoch": 0.7570567901713562, + "grad_norm": 1.9379707794087153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156100 + }, + { + "epoch": 0.7571052883641923, + "grad_norm": 2.0511882325990882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156110 + }, + { + "epoch": 0.7571537865570285, + "grad_norm": 2.0348387863577955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156120 + }, + { + "epoch": 0.7572022847498645, + "grad_norm": 1.8878681373735162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156130 + }, + { + "epoch": 0.7572507829427007, + "grad_norm": 2.1674603090104938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156140 + }, + { + "epoch": 0.7572992811355367, + "grad_norm": 2.002966255076899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156150 + }, + { + "epoch": 0.7573477793283728, + "grad_norm": 2.0507015108250926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156160 + }, + { + "epoch": 0.7573962775212089, + "grad_norm": 2.6625355076248525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156170 + }, + { + "epoch": 0.757444775714045, + "grad_norm": 2.355913721885372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156180 + }, + { + "epoch": 0.757493273906881, + "grad_norm": 2.6103333539140294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156190 + }, + { + "epoch": 0.7575417720997172, + "grad_norm": 2.2211091277313244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156200 + }, + { + "epoch": 0.7575902702925532, + "grad_norm": 2.73417072094162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156210 + }, + { + "epoch": 0.7576387684853894, + "grad_norm": 1.9598749645410862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156220 + }, + { + "epoch": 0.7576872666782254, + "grad_norm": 1.8892295372552326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156230 + }, + { + "epoch": 0.7577357648710615, + "grad_norm": 2.434564123632299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156240 + }, + { + "epoch": 0.7577842630638976, + "grad_norm": 2.0469909145504062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156250 + }, + { + "epoch": 0.7578327612567337, + "grad_norm": 1.994603451294097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156260 + }, + { + "epoch": 0.7578812594495697, + "grad_norm": 2.0126351785165753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156270 + }, + { + "epoch": 0.7579297576424059, + "grad_norm": 1.7888783077069093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156280 + }, + { + "epoch": 0.7579782558352419, + "grad_norm": 2.0913542186917766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156290 + }, + { + "epoch": 0.7580267540280781, + "grad_norm": 2.1715177922487783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156300 + }, + { + "epoch": 0.7580752522209141, + "grad_norm": 2.030569987709896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156310 + }, + { + "epoch": 0.7581237504137502, + "grad_norm": 2.0310154980052175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156320 + }, + { + "epoch": 0.7581722486065863, + "grad_norm": 1.8680560742723173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156330 + }, + { + "epoch": 0.7582207467994224, + "grad_norm": 1.8479568097973242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156340 + }, + { + "epoch": 0.7582692449922585, + "grad_norm": 1.8910230892288382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156350 + }, + { + "epoch": 0.7583177431850946, + "grad_norm": 1.8660985290352983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156360 + }, + { + "epoch": 0.7583662413779306, + "grad_norm": 1.887362230945655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156370 + }, + { + "epoch": 0.7584147395707668, + "grad_norm": 1.936055156193106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156380 + }, + { + "epoch": 0.7584632377636028, + "grad_norm": 2.3690533623721421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156390 + }, + { + "epoch": 0.758511735956439, + "grad_norm": 1.7811115071708628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156400 + }, + { + "epoch": 0.758560234149275, + "grad_norm": 1.7248770234346011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156410 + }, + { + "epoch": 0.7586087323421111, + "grad_norm": 1.763860808523532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156420 + }, + { + "epoch": 0.7586572305349472, + "grad_norm": 1.6809764247227577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156430 + }, + { + "epoch": 0.7587057287277833, + "grad_norm": 1.585603541798264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156440 + }, + { + "epoch": 0.7587542269206193, + "grad_norm": 2.0595551575297577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156450 + }, + { + "epoch": 0.7588027251134555, + "grad_norm": 1.7080347447517852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156460 + }, + { + "epoch": 0.7588512233062915, + "grad_norm": 2.0251225407719176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156470 + }, + { + "epoch": 0.7588997214991277, + "grad_norm": 1.777577267603192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156480 + }, + { + "epoch": 0.7589482196919637, + "grad_norm": 1.838824488231694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156490 + }, + { + "epoch": 0.7589967178847998, + "grad_norm": 2.797597744574887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156500 + }, + { + "epoch": 0.7590452160776359, + "grad_norm": 1.731669243554279e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156510 + }, + { + "epoch": 0.759093714270472, + "grad_norm": 1.7063997859168012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156520 + }, + { + "epoch": 0.759142212463308, + "grad_norm": 1.6412914760621788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156530 + }, + { + "epoch": 0.7591907106561442, + "grad_norm": 1.373061593312741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156540 + }, + { + "epoch": 0.7592392088489802, + "grad_norm": 1.7264785867610044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156550 + }, + { + "epoch": 0.7592877070418164, + "grad_norm": 1.698613658618342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156560 + }, + { + "epoch": 0.7593362052346524, + "grad_norm": 1.669099134460339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156570 + }, + { + "epoch": 0.7593847034274885, + "grad_norm": 1.7692076426101266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156580 + }, + { + "epoch": 0.7594332016203246, + "grad_norm": 1.6127501112350728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156590 + }, + { + "epoch": 0.7594816998131607, + "grad_norm": 1.8656322708920925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156600 + }, + { + "epoch": 0.7595301980059967, + "grad_norm": 1.4897786115852796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156610 + }, + { + "epoch": 0.7595786961988329, + "grad_norm": 1.1722533599822782e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156620 + }, + { + "epoch": 0.7596271943916689, + "grad_norm": 1.5769235517382185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156630 + }, + { + "epoch": 0.7596756925845051, + "grad_norm": 1.691894340183353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156640 + }, + { + "epoch": 0.7597241907773412, + "grad_norm": 1.9908694071091304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156650 + }, + { + "epoch": 0.7597726889701772, + "grad_norm": 1.64476631425714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156660 + }, + { + "epoch": 0.7598211871630134, + "grad_norm": 1.6844900585510914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156670 + }, + { + "epoch": 0.7598696853558494, + "grad_norm": 1.5320850366151717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156680 + }, + { + "epoch": 0.7599181835486856, + "grad_norm": 2.3818387262508622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156690 + }, + { + "epoch": 0.7599666817415216, + "grad_norm": 1.6098866240099596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156700 + }, + { + "epoch": 0.7600151799343577, + "grad_norm": 1.6286564630263456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156710 + }, + { + "epoch": 0.7600636781271938, + "grad_norm": 1.3915928320784587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156720 + }, + { + "epoch": 0.7601121763200299, + "grad_norm": 1.4207220999651327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156730 + }, + { + "epoch": 0.760160674512866, + "grad_norm": 1.3865533787793538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156740 + }, + { + "epoch": 0.7602091727057021, + "grad_norm": 1.411437011711314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156750 + }, + { + "epoch": 0.7602576708985381, + "grad_norm": 1.897250569982134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156760 + }, + { + "epoch": 0.7603061690913743, + "grad_norm": 1.4861974761970487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156770 + }, + { + "epoch": 0.7603546672842103, + "grad_norm": 1.5419693966123305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156780 + }, + { + "epoch": 0.7604031654770464, + "grad_norm": 1.401300977477149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156790 + }, + { + "epoch": 0.7604516636698825, + "grad_norm": 1.4592893649023608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156800 + }, + { + "epoch": 0.7605001618627186, + "grad_norm": 1.5832608823984629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156810 + }, + { + "epoch": 0.7605486600555547, + "grad_norm": 1.2896515499960515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156820 + }, + { + "epoch": 0.7605971582483908, + "grad_norm": 1.4097106770805112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156830 + }, + { + "epoch": 0.7606456564412268, + "grad_norm": 1.3885488669984625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156840 + }, + { + "epoch": 0.760694154634063, + "grad_norm": 1.382553449502666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156850 + }, + { + "epoch": 0.760742652826899, + "grad_norm": 1.2289832795886468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156860 + }, + { + "epoch": 0.7607911510197352, + "grad_norm": 1.7520247297397873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156870 + }, + { + "epoch": 0.7608396492125712, + "grad_norm": 1.3119020536578319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156880 + }, + { + "epoch": 0.7608881474054073, + "grad_norm": 1.3478964433488727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156890 + }, + { + "epoch": 0.7609366455982434, + "grad_norm": 1.275991792226705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156900 + }, + { + "epoch": 0.7609851437910795, + "grad_norm": 1.412220456131763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156910 + }, + { + "epoch": 0.7610336419839155, + "grad_norm": 1.3316766001025826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156920 + }, + { + "epoch": 0.7610821401767517, + "grad_norm": 1.363466708426131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156930 + }, + { + "epoch": 0.7611306383695877, + "grad_norm": 1.3227428041773237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156940 + }, + { + "epoch": 0.7611791365624239, + "grad_norm": 1.194399601445184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156950 + }, + { + "epoch": 0.7612276347552599, + "grad_norm": 1.3637392726195685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156960 + }, + { + "epoch": 0.761276132948096, + "grad_norm": 1.317525999411373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156970 + }, + { + "epoch": 0.7613246311409321, + "grad_norm": 1.358933872097623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156980 + }, + { + "epoch": 0.7613731293337682, + "grad_norm": 1.277268779631413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 156990 + }, + { + "epoch": 0.7614216275266042, + "grad_norm": 1.2624454370779858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157000 + }, + { + "epoch": 0.7614701257194404, + "grad_norm": 1.2883580779998738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157010 + }, + { + "epoch": 0.7615186239122764, + "grad_norm": 1.1982082469330635e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157020 + }, + { + "epoch": 0.7615671221051126, + "grad_norm": 1.1822292123042644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157030 + }, + { + "epoch": 0.7616156202979486, + "grad_norm": 1.1370632080343057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157040 + }, + { + "epoch": 0.7616641184907847, + "grad_norm": 1.1563235347011869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157050 + }, + { + "epoch": 0.7617126166836208, + "grad_norm": 1.1806605471065268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157060 + }, + { + "epoch": 0.7617611148764569, + "grad_norm": 1.2056136711180443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157070 + }, + { + "epoch": 0.761809613069293, + "grad_norm": 1.1719020420741799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157080 + }, + { + "epoch": 0.7618581112621291, + "grad_norm": 1.1001679922628682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157090 + }, + { + "epoch": 0.7619066094549651, + "grad_norm": 1.0875318423586577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157100 + }, + { + "epoch": 0.7619551076478013, + "grad_norm": 1.201917143589526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157110 + }, + { + "epoch": 0.7620036058406373, + "grad_norm": 1.3130103582170705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157120 + }, + { + "epoch": 0.7620521040334735, + "grad_norm": 1.2285680384138686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157130 + }, + { + "epoch": 0.7621006022263095, + "grad_norm": 1.0992275889520897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157140 + }, + { + "epoch": 0.7621491004191456, + "grad_norm": 1.1184344828052417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157150 + }, + { + "epoch": 0.7621975986119818, + "grad_norm": 1.1824017320805069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157160 + }, + { + "epoch": 0.7622460968048178, + "grad_norm": 1.171523464904567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157170 + }, + { + "epoch": 0.762294594997654, + "grad_norm": 1.493581862632709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157180 + }, + { + "epoch": 0.76234309319049, + "grad_norm": 1.0057387811457374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157190 + }, + { + "epoch": 0.7623915913833261, + "grad_norm": 1.0304201936151003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157200 + }, + { + "epoch": 0.7624400895761622, + "grad_norm": 1.0863515598202866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157210 + }, + { + "epoch": 0.7624885877689983, + "grad_norm": 2.1981213649269193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157220 + }, + { + "epoch": 0.7625370859618343, + "grad_norm": 1.1146327238975573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157230 + }, + { + "epoch": 0.7625855841546705, + "grad_norm": 1.044413906470254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157240 + }, + { + "epoch": 0.7626340823475065, + "grad_norm": 9.599836658935601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157250 + }, + { + "epoch": 0.7626825805403427, + "grad_norm": 1.5487862015106657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157260 + }, + { + "epoch": 0.7627310787331787, + "grad_norm": 1.0430240138248337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157270 + }, + { + "epoch": 0.7627795769260148, + "grad_norm": 1.0875816514044345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157280 + }, + { + "epoch": 0.7628280751188509, + "grad_norm": 1.0394981586614449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157290 + }, + { + "epoch": 0.762876573311687, + "grad_norm": 1.1823426149248917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157300 + }, + { + "epoch": 0.762925071504523, + "grad_norm": 1.0208086820284734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157310 + }, + { + "epoch": 0.7629735696973592, + "grad_norm": 1.09707073647769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157320 + }, + { + "epoch": 0.7630220678901952, + "grad_norm": 1.104251907690923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157330 + }, + { + "epoch": 0.7630705660830314, + "grad_norm": 1.1895554763441396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157340 + }, + { + "epoch": 0.7631190642758674, + "grad_norm": 1.1339408700905551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157350 + }, + { + "epoch": 0.7631675624687035, + "grad_norm": 1.0378329307059175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157360 + }, + { + "epoch": 0.7632160606615396, + "grad_norm": 9.949982171519878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157370 + }, + { + "epoch": 0.7632645588543757, + "grad_norm": 1.04047366278337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157380 + }, + { + "epoch": 0.7633130570472118, + "grad_norm": 9.902728947963624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157390 + }, + { + "epoch": 0.7633615552400479, + "grad_norm": 1.1387480469693401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157400 + }, + { + "epoch": 0.7634100534328839, + "grad_norm": 1.1575367864224972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157410 + }, + { + "epoch": 0.7634585516257201, + "grad_norm": 1.1486509521319022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157420 + }, + { + "epoch": 0.7635070498185561, + "grad_norm": 1.0207163825270982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157430 + }, + { + "epoch": 0.7635555480113922, + "grad_norm": 9.565255965071628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157440 + }, + { + "epoch": 0.7636040462042283, + "grad_norm": 9.688666580132121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157450 + }, + { + "epoch": 0.7636525443970644, + "grad_norm": 9.810789691755417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157460 + }, + { + "epoch": 0.7637010425899005, + "grad_norm": 1.1698461577225316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157470 + }, + { + "epoch": 0.7637495407827366, + "grad_norm": 9.456586269607214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157480 + }, + { + "epoch": 0.7637980389755726, + "grad_norm": 9.89651383065393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157490 + }, + { + "epoch": 0.7638465371684088, + "grad_norm": 1.0956781437698737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157500 + }, + { + "epoch": 0.7638950353612448, + "grad_norm": 9.213107432515244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157510 + }, + { + "epoch": 0.763943533554081, + "grad_norm": 9.733038552894868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157520 + }, + { + "epoch": 0.763992031746917, + "grad_norm": 9.282958046696876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157530 + }, + { + "epoch": 0.7640405299397531, + "grad_norm": 1.0562141028458427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157540 + }, + { + "epoch": 0.7640890281325892, + "grad_norm": 9.197241013225721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157550 + }, + { + "epoch": 0.7641375263254253, + "grad_norm": 9.452411120491888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157560 + }, + { + "epoch": 0.7641860245182613, + "grad_norm": 9.348483587245937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157570 + }, + { + "epoch": 0.7642345227110975, + "grad_norm": 9.508342913022716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157580 + }, + { + "epoch": 0.7642830209039335, + "grad_norm": 8.85579396481262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157590 + }, + { + "epoch": 0.7643315190967697, + "grad_norm": 8.954128105642667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157600 + }, + { + "epoch": 0.7643800172896057, + "grad_norm": 9.697749447923343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157610 + }, + { + "epoch": 0.7644285154824418, + "grad_norm": 1.0765860736228205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157620 + }, + { + "epoch": 0.7644770136752779, + "grad_norm": 1.0231852343167702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157630 + }, + { + "epoch": 0.764525511868114, + "grad_norm": 1.0169039654783774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157640 + }, + { + "epoch": 0.76457401006095, + "grad_norm": 8.371056736677929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157650 + }, + { + "epoch": 0.7646225082537862, + "grad_norm": 9.169655612595307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157660 + }, + { + "epoch": 0.7646710064466223, + "grad_norm": 8.750308211347146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157670 + }, + { + "epoch": 0.7647195046394584, + "grad_norm": 1.0007659057009732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157680 + }, + { + "epoch": 0.7647680028322945, + "grad_norm": 9.508118381518216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157690 + }, + { + "epoch": 0.7648165010251305, + "grad_norm": 8.576495247325511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157700 + }, + { + "epoch": 0.7648649992179667, + "grad_norm": 1.0086686330623706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157710 + }, + { + "epoch": 0.7649134974108027, + "grad_norm": 9.196062222827095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157720 + }, + { + "epoch": 0.7649619956036389, + "grad_norm": 9.192866201601646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157730 + }, + { + "epoch": 0.7650104937964749, + "grad_norm": 8.800567030675666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157740 + }, + { + "epoch": 0.765058991989311, + "grad_norm": 8.615803182010495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157750 + }, + { + "epoch": 0.7651074901821471, + "grad_norm": 9.349075469344825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157760 + }, + { + "epoch": 0.7651559883749832, + "grad_norm": 8.729514178185127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157770 + }, + { + "epoch": 0.7652044865678193, + "grad_norm": 9.343796136818128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157780 + }, + { + "epoch": 0.7652529847606554, + "grad_norm": 9.229822950374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157790 + }, + { + "epoch": 0.7653014829534914, + "grad_norm": 7.956546710374823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157800 + }, + { + "epoch": 0.7653499811463276, + "grad_norm": 9.006808454614657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157810 + }, + { + "epoch": 0.7653984793391636, + "grad_norm": 9.138342704773095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157820 + }, + { + "epoch": 0.7654469775319998, + "grad_norm": 1.11238193767349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157830 + }, + { + "epoch": 0.7654954757248358, + "grad_norm": 9.366009834366196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157840 + }, + { + "epoch": 0.7655439739176719, + "grad_norm": 9.200914519169601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157850 + }, + { + "epoch": 0.765592472110508, + "grad_norm": 8.896915204559264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157860 + }, + { + "epoch": 0.7656409703033441, + "grad_norm": 8.666965811698901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157870 + }, + { + "epoch": 0.7656894684961801, + "grad_norm": 8.814604512963342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157880 + }, + { + "epoch": 0.7657379666890163, + "grad_norm": 9.832880465410199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157890 + }, + { + "epoch": 0.7657864648818523, + "grad_norm": 7.970840698590109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157900 + }, + { + "epoch": 0.7658349630746885, + "grad_norm": 9.139823475834419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157910 + }, + { + "epoch": 0.7658834612675245, + "grad_norm": 9.04769024145935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157920 + }, + { + "epoch": 0.7659319594603606, + "grad_norm": 8.637704240754829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157930 + }, + { + "epoch": 0.7659804576531967, + "grad_norm": 8.278647811721385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157940 + }, + { + "epoch": 0.7660289558460328, + "grad_norm": 8.575560883627986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157950 + }, + { + "epoch": 0.7660774540388688, + "grad_norm": 8.843608156894334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157960 + }, + { + "epoch": 0.766125952231705, + "grad_norm": 9.894417729583438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157970 + }, + { + "epoch": 0.766174450424541, + "grad_norm": 8.868579470799887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157980 + }, + { + "epoch": 0.7662229486173772, + "grad_norm": 8.648608229577803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 157990 + }, + { + "epoch": 0.7662714468102132, + "grad_norm": 8.670060935855872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158000 + }, + { + "epoch": 0.7663199450030493, + "grad_norm": 8.64393641109018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158010 + }, + { + "epoch": 0.7663684431958854, + "grad_norm": 1.0570516195684831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158020 + }, + { + "epoch": 0.7664169413887215, + "grad_norm": 8.891072411643108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158030 + }, + { + "epoch": 0.7664654395815576, + "grad_norm": 9.585315297044872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158040 + }, + { + "epoch": 0.7665139377743937, + "grad_norm": 7.662949741416014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158050 + }, + { + "epoch": 0.7665624359672297, + "grad_norm": 9.11120991986536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158060 + }, + { + "epoch": 0.7666109341600659, + "grad_norm": 1.062637011273182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158070 + }, + { + "epoch": 0.7666594323529019, + "grad_norm": 8.480969881929923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158080 + }, + { + "epoch": 0.766707930545738, + "grad_norm": 8.241180182722019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158090 + }, + { + "epoch": 0.7667564287385741, + "grad_norm": 8.053841327182454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158100 + }, + { + "epoch": 0.7668049269314102, + "grad_norm": 8.650653171571321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158110 + }, + { + "epoch": 0.7668534251242463, + "grad_norm": 9.266202738444917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158120 + }, + { + "epoch": 0.7669019233170824, + "grad_norm": 8.646849636306797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158130 + }, + { + "epoch": 0.7669504215099184, + "grad_norm": 3.5536518794287986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158140 + }, + { + "epoch": 0.7669989197027546, + "grad_norm": 7.989554262621823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158150 + }, + { + "epoch": 0.7670474178955906, + "grad_norm": 8.186320599179453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158160 + }, + { + "epoch": 0.7670959160884268, + "grad_norm": 8.356747116522456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158170 + }, + { + "epoch": 0.7671444142812629, + "grad_norm": 9.806289114067113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158180 + }, + { + "epoch": 0.7671929124740989, + "grad_norm": 7.833989457139978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158190 + }, + { + "epoch": 0.7672414106669351, + "grad_norm": 7.664235113225004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158200 + }, + { + "epoch": 0.7672899088597711, + "grad_norm": 8.919053584577341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158210 + }, + { + "epoch": 0.7673384070526073, + "grad_norm": 8.660310157893036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158220 + }, + { + "epoch": 0.7673869052454433, + "grad_norm": 8.324178679686156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158230 + }, + { + "epoch": 0.7674354034382794, + "grad_norm": 7.737021689990797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158240 + }, + { + "epoch": 0.7674839016311155, + "grad_norm": 8.972054388323158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158250 + }, + { + "epoch": 0.7675323998239516, + "grad_norm": 8.73911787380166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158260 + }, + { + "epoch": 0.7675808980167876, + "grad_norm": 8.115686966903013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158270 + }, + { + "epoch": 0.7676293962096238, + "grad_norm": 8.807717932768355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158280 + }, + { + "epoch": 0.7676778944024598, + "grad_norm": 7.542488589251661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158290 + }, + { + "epoch": 0.767726392595296, + "grad_norm": 8.646166804737732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158300 + }, + { + "epoch": 0.767774890788132, + "grad_norm": 8.203061696576697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158310 + }, + { + "epoch": 0.7678233889809681, + "grad_norm": 8.069837775792621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158320 + }, + { + "epoch": 0.7678718871738042, + "grad_norm": 5.708611183763423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158330 + }, + { + "epoch": 0.7679203853666403, + "grad_norm": 7.409518332224252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158340 + }, + { + "epoch": 0.7679688835594763, + "grad_norm": 7.953878622402044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158350 + }, + { + "epoch": 0.7680173817523125, + "grad_norm": 8.336983370327289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158360 + }, + { + "epoch": 0.7680658799451485, + "grad_norm": 8.387142713672802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158370 + }, + { + "epoch": 0.7681143781379847, + "grad_norm": 8.628442316194196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158380 + }, + { + "epoch": 0.7681628763308207, + "grad_norm": 7.991332040546695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158390 + }, + { + "epoch": 0.7682113745236568, + "grad_norm": 7.290994830100317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158400 + }, + { + "epoch": 0.7682598727164929, + "grad_norm": 7.81148088435657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158410 + }, + { + "epoch": 0.768308370909329, + "grad_norm": 7.649055788760961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158420 + }, + { + "epoch": 0.768356869102165, + "grad_norm": 8.10249005667174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158430 + }, + { + "epoch": 0.7684053672950012, + "grad_norm": 8.74060788191855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158440 + }, + { + "epoch": 0.7684538654878372, + "grad_norm": 7.664843337806815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158450 + }, + { + "epoch": 0.7685023636806734, + "grad_norm": 8.112811400451392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158460 + }, + { + "epoch": 0.7685508618735094, + "grad_norm": 1.443861492589349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158470 + }, + { + "epoch": 0.7685993600663455, + "grad_norm": 9.090814501178102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158480 + }, + { + "epoch": 0.7686478582591816, + "grad_norm": 7.421375158855881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158490 + }, + { + "epoch": 0.7686963564520177, + "grad_norm": 8.992282829467513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158500 + }, + { + "epoch": 0.7687448546448538, + "grad_norm": 7.964099779655953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158510 + }, + { + "epoch": 0.7687933528376899, + "grad_norm": 1.3543539978400077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158520 + }, + { + "epoch": 0.7688418510305259, + "grad_norm": 7.815570057800869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158530 + }, + { + "epoch": 0.7688903492233621, + "grad_norm": 8.05616124921471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158540 + }, + { + "epoch": 0.7689388474161981, + "grad_norm": 7.707681959345791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158550 + }, + { + "epoch": 0.7689873456090343, + "grad_norm": 7.983002348055379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158560 + }, + { + "epoch": 0.7690358438018703, + "grad_norm": 7.782598743233393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158570 + }, + { + "epoch": 0.7690843419947064, + "grad_norm": 8.264669304480776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158580 + }, + { + "epoch": 0.7691328401875425, + "grad_norm": 7.191975726072997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158590 + }, + { + "epoch": 0.7691813383803786, + "grad_norm": 7.525146372699965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158600 + }, + { + "epoch": 0.7692298365732146, + "grad_norm": 7.956221281801845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158610 + }, + { + "epoch": 0.7692783347660508, + "grad_norm": 8.080341018512627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158620 + }, + { + "epoch": 0.7693268329588868, + "grad_norm": 7.752361597113122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158630 + }, + { + "epoch": 0.769375331151723, + "grad_norm": 9.269687950563821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158640 + }, + { + "epoch": 0.769423829344559, + "grad_norm": 7.049115424706542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158650 + }, + { + "epoch": 0.7694723275373951, + "grad_norm": 7.627445341995553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158660 + }, + { + "epoch": 0.7695208257302312, + "grad_norm": 8.534431827911249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158670 + }, + { + "epoch": 0.7695693239230673, + "grad_norm": 7.846357163998618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158680 + }, + { + "epoch": 0.7696178221159035, + "grad_norm": 7.089886366884457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158690 + }, + { + "epoch": 0.7696663203087395, + "grad_norm": 7.408164748312629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158700 + }, + { + "epoch": 0.7697148185015756, + "grad_norm": 8.017538277726999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158710 + }, + { + "epoch": 0.7697633166944117, + "grad_norm": 7.569153837039266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158720 + }, + { + "epoch": 0.7698118148872478, + "grad_norm": 8.028837328311056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158730 + }, + { + "epoch": 0.7698603130800838, + "grad_norm": 7.806993806980245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158740 + }, + { + "epoch": 0.76990881127292, + "grad_norm": 8.425324438121606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158750 + }, + { + "epoch": 0.769957309465756, + "grad_norm": 2.2591817128159164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158760 + }, + { + "epoch": 0.7700058076585922, + "grad_norm": 7.834666604367158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158770 + }, + { + "epoch": 0.7700543058514282, + "grad_norm": 7.55487334913596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158780 + }, + { + "epoch": 0.7701028040442643, + "grad_norm": 7.038637761525024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158790 + }, + { + "epoch": 0.7701513022371004, + "grad_norm": 7.443042449040149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158800 + }, + { + "epoch": 0.7701998004299365, + "grad_norm": 7.396852907959328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158810 + }, + { + "epoch": 0.7702482986227726, + "grad_norm": 7.386751832427763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158820 + }, + { + "epoch": 0.7702967968156087, + "grad_norm": 7.604835872143667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158830 + }, + { + "epoch": 0.7703452950084447, + "grad_norm": 7.269242985330493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158840 + }, + { + "epoch": 0.7703937932012809, + "grad_norm": 8.459350908651686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158850 + }, + { + "epoch": 0.7704422913941169, + "grad_norm": 8.096938586277247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158860 + }, + { + "epoch": 0.770490789586953, + "grad_norm": 7.435229321117731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158870 + }, + { + "epoch": 0.7705392877797891, + "grad_norm": 7.65744019304293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158880 + }, + { + "epoch": 0.7705877859726252, + "grad_norm": 6.955577447342876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158890 + }, + { + "epoch": 0.7706362841654613, + "grad_norm": 8.949834295890469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158900 + }, + { + "epoch": 0.7706847823582974, + "grad_norm": 7.485897413062048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158910 + }, + { + "epoch": 0.7707332805511334, + "grad_norm": 8.374636450980688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158920 + }, + { + "epoch": 0.7707817787439696, + "grad_norm": 7.350655550908414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158930 + }, + { + "epoch": 0.7708302769368056, + "grad_norm": 8.903983683694605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158940 + }, + { + "epoch": 0.7708787751296418, + "grad_norm": 8.079783953007791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158950 + }, + { + "epoch": 0.7709272733224778, + "grad_norm": 7.379674116236856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158960 + }, + { + "epoch": 0.7709757715153139, + "grad_norm": 7.855729222683294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158970 + }, + { + "epoch": 0.77102426970815, + "grad_norm": 7.29814004785112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158980 + }, + { + "epoch": 0.7710727679009861, + "grad_norm": 6.685637998771199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 158990 + }, + { + "epoch": 0.7711212660938221, + "grad_norm": 1.0271579412801657e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159000 + }, + { + "epoch": 0.7711697642866583, + "grad_norm": 7.349871111728135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159010 + }, + { + "epoch": 0.7712182624794943, + "grad_norm": 7.595605211463408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159020 + }, + { + "epoch": 0.7712667606723305, + "grad_norm": 7.200664242645871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159030 + }, + { + "epoch": 0.7713152588651665, + "grad_norm": 7.003639268532424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159040 + }, + { + "epoch": 0.7713637570580026, + "grad_norm": 7.035887961137632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159050 + }, + { + "epoch": 0.7714122552508387, + "grad_norm": 7.277593283561146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159060 + }, + { + "epoch": 0.7714607534436748, + "grad_norm": 6.9261488988559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159070 + }, + { + "epoch": 0.7715092516365109, + "grad_norm": 7.418061187536296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159080 + }, + { + "epoch": 0.771557749829347, + "grad_norm": 6.737035107562406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159090 + }, + { + "epoch": 0.771606248022183, + "grad_norm": 6.916377515153727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159100 + }, + { + "epoch": 0.7716547462150192, + "grad_norm": 7.638540466814447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159110 + }, + { + "epoch": 0.7717032444078552, + "grad_norm": 7.309432703550556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159120 + }, + { + "epoch": 0.7717517426006913, + "grad_norm": 7.686922742777824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159130 + }, + { + "epoch": 0.7718002407935274, + "grad_norm": 6.888136994120941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159140 + }, + { + "epoch": 0.7718487389863635, + "grad_norm": 7.12077508069342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159150 + }, + { + "epoch": 0.7718972371791996, + "grad_norm": 7.829432746575549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159160 + }, + { + "epoch": 0.7719457353720357, + "grad_norm": 7.300368309870464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159170 + }, + { + "epoch": 0.7719942335648717, + "grad_norm": 7.650155708915918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159180 + }, + { + "epoch": 0.7720427317577079, + "grad_norm": 7.379575350796586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159190 + }, + { + "epoch": 0.772091229950544, + "grad_norm": 6.68923760827056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159200 + }, + { + "epoch": 0.77213972814338, + "grad_norm": 7.147677649754769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159210 + }, + { + "epoch": 0.7721882263362162, + "grad_norm": 7.562955772755231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159220 + }, + { + "epoch": 0.7722367245290522, + "grad_norm": 7.60632019591867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159230 + }, + { + "epoch": 0.7722852227218884, + "grad_norm": 7.84269005293936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159240 + }, + { + "epoch": 0.7723337209147244, + "grad_norm": 6.960618748053093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159250 + }, + { + "epoch": 0.7723822191075606, + "grad_norm": 1.052178788540914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159260 + }, + { + "epoch": 0.7724307173003966, + "grad_norm": 9.45794269568978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159270 + }, + { + "epoch": 0.7724792154932327, + "grad_norm": 7.00469087178135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159280 + }, + { + "epoch": 0.7725277136860688, + "grad_norm": 6.878728697756742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159290 + }, + { + "epoch": 0.7725762118789049, + "grad_norm": 8.721670496925071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159300 + }, + { + "epoch": 0.7726247100717409, + "grad_norm": 7.651009781284301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159310 + }, + { + "epoch": 0.7726732082645771, + "grad_norm": 7.087316333809213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159320 + }, + { + "epoch": 0.7727217064574131, + "grad_norm": 6.739932700838835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159330 + }, + { + "epoch": 0.7727702046502493, + "grad_norm": 6.605955604754854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159340 + }, + { + "epoch": 0.7728187028430853, + "grad_norm": 6.54892389206907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159350 + }, + { + "epoch": 0.7728672010359214, + "grad_norm": 7.295673754015297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159360 + }, + { + "epoch": 0.7729156992287575, + "grad_norm": 7.321781936298066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159370 + }, + { + "epoch": 0.7729641974215936, + "grad_norm": 7.269244406415964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159380 + }, + { + "epoch": 0.7730126956144296, + "grad_norm": 6.611249148136267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159390 + }, + { + "epoch": 0.7730611938072658, + "grad_norm": 6.935649565775748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159400 + }, + { + "epoch": 0.7731096920001018, + "grad_norm": 7.252093325860187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159410 + }, + { + "epoch": 0.773158190192938, + "grad_norm": 7.027676929283189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159420 + }, + { + "epoch": 0.773206688385774, + "grad_norm": 7.035394844479015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159430 + }, + { + "epoch": 0.7732551865786101, + "grad_norm": 6.872552660297515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159440 + }, + { + "epoch": 0.7733036847714462, + "grad_norm": 6.786435591266127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159450 + }, + { + "epoch": 0.7733521829642823, + "grad_norm": 7.44400097119069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159460 + }, + { + "epoch": 0.7734006811571184, + "grad_norm": 7.227270515386408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159470 + }, + { + "epoch": 0.7734491793499545, + "grad_norm": 7.203768603858407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159480 + }, + { + "epoch": 0.7734976775427905, + "grad_norm": 6.343341851788864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159490 + }, + { + "epoch": 0.7735461757356267, + "grad_norm": 6.908344118983223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159500 + }, + { + "epoch": 0.7735946739284627, + "grad_norm": 6.826942211546339e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159510 + }, + { + "epoch": 0.7736431721212988, + "grad_norm": 6.918919837062276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159520 + }, + { + "epoch": 0.7736916703141349, + "grad_norm": 7.026405768328914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159530 + }, + { + "epoch": 0.773740168506971, + "grad_norm": 6.580306433079386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159540 + }, + { + "epoch": 0.7737886666998071, + "grad_norm": 6.481262460056314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159550 + }, + { + "epoch": 0.7738371648926432, + "grad_norm": 7.435409088429878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159560 + }, + { + "epoch": 0.7738856630854792, + "grad_norm": 6.914458339224439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159570 + }, + { + "epoch": 0.7739341612783154, + "grad_norm": 6.824593157261916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159580 + }, + { + "epoch": 0.7739826594711514, + "grad_norm": 1.4649269530764286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159590 + }, + { + "epoch": 0.7740311576639876, + "grad_norm": 6.744917158130193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159600 + }, + { + "epoch": 0.7740796558568236, + "grad_norm": 6.802969920727264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159610 + }, + { + "epoch": 0.7741281540496597, + "grad_norm": 7.022379833188097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159620 + }, + { + "epoch": 0.7741766522424958, + "grad_norm": 7.24546822539196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159630 + }, + { + "epoch": 0.7742251504353319, + "grad_norm": 6.484694381470035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159640 + }, + { + "epoch": 0.7742736486281679, + "grad_norm": 6.927211160245861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159650 + }, + { + "epoch": 0.7743221468210041, + "grad_norm": 6.901560567484921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159660 + }, + { + "epoch": 0.7743706450138401, + "grad_norm": 6.924068429725594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159670 + }, + { + "epoch": 0.7744191432066763, + "grad_norm": 7.16089658681085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159680 + }, + { + "epoch": 0.7744676413995123, + "grad_norm": 6.50915765731952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159690 + }, + { + "epoch": 0.7745161395923484, + "grad_norm": 6.575774591510708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159700 + }, + { + "epoch": 0.7745646377851846, + "grad_norm": 7.322641693008336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159710 + }, + { + "epoch": 0.7746131359780206, + "grad_norm": 7.216257102982127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159720 + }, + { + "epoch": 0.7746616341708568, + "grad_norm": 6.858520151808989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159730 + }, + { + "epoch": 0.7747101323636928, + "grad_norm": 6.582705225355312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159740 + }, + { + "epoch": 0.7747586305565289, + "grad_norm": 6.621483805702155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159750 + }, + { + "epoch": 0.774807128749365, + "grad_norm": 7.181940020473121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159760 + }, + { + "epoch": 0.7748556269422011, + "grad_norm": 6.526663298700441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159770 + }, + { + "epoch": 0.7749041251350371, + "grad_norm": 6.653367989883918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159780 + }, + { + "epoch": 0.7749526233278733, + "grad_norm": 6.905298022275019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159790 + }, + { + "epoch": 0.7750011215207093, + "grad_norm": 6.253326745309096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159800 + }, + { + "epoch": 0.7750496197135455, + "grad_norm": 7.017820280452725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159810 + }, + { + "epoch": 0.7750981179063815, + "grad_norm": 6.92529908974393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159820 + }, + { + "epoch": 0.7751466160992176, + "grad_norm": 6.947392705569655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159830 + }, + { + "epoch": 0.7751951142920537, + "grad_norm": 9.108683229896997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159840 + }, + { + "epoch": 0.7752436124848898, + "grad_norm": 6.383694284295416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159850 + }, + { + "epoch": 0.7752921106777259, + "grad_norm": 6.847134415011169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159860 + }, + { + "epoch": 0.775340608870562, + "grad_norm": 6.45925766207256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159870 + }, + { + "epoch": 0.775389107063398, + "grad_norm": 1.0264064798093386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159880 + }, + { + "epoch": 0.7754376052562342, + "grad_norm": 6.606989444435385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159890 + }, + { + "epoch": 0.7754861034490702, + "grad_norm": 6.27108320827574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159900 + }, + { + "epoch": 0.7755346016419064, + "grad_norm": 7.038207883169889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159910 + }, + { + "epoch": 0.7755830998347424, + "grad_norm": 7.271479773862666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159920 + }, + { + "epoch": 0.7756315980275785, + "grad_norm": 6.8449573120688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159930 + }, + { + "epoch": 0.7756800962204146, + "grad_norm": 6.324086854192501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159940 + }, + { + "epoch": 0.7757285944132507, + "grad_norm": 6.768943450197185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159950 + }, + { + "epoch": 0.7757770926060867, + "grad_norm": 6.535237417892859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159960 + }, + { + "epoch": 0.7758255907989229, + "grad_norm": 7.113366962130385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159970 + }, + { + "epoch": 0.7758740889917589, + "grad_norm": 7.225627740581331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159980 + }, + { + "epoch": 0.7759225871845951, + "grad_norm": 6.042772326964041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 159990 + }, + { + "epoch": 0.7759710853774311, + "grad_norm": 6.020729159672555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160000 + }, + { + "epoch": 0.7760195835702672, + "grad_norm": 6.344476588537873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160010 + }, + { + "epoch": 0.7760680817631033, + "grad_norm": 6.559639587067068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160020 + }, + { + "epoch": 0.7761165799559394, + "grad_norm": 6.38782964301754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160030 + }, + { + "epoch": 0.7761650781487754, + "grad_norm": 6.213224423845531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160040 + }, + { + "epoch": 0.7762135763416116, + "grad_norm": 6.000956886964559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160050 + }, + { + "epoch": 0.7762620745344476, + "grad_norm": 7.106756783059609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160060 + }, + { + "epoch": 0.7763105727272838, + "grad_norm": 6.655421458390265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160070 + }, + { + "epoch": 0.7763590709201198, + "grad_norm": 6.731741564180993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160080 + }, + { + "epoch": 0.7764075691129559, + "grad_norm": 6.305604927092645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160090 + }, + { + "epoch": 0.776456067305792, + "grad_norm": 6.16308000189747e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160100 + }, + { + "epoch": 0.7765045654986281, + "grad_norm": 6.678872210841291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160110 + }, + { + "epoch": 0.7765530636914642, + "grad_norm": 7.375796684527813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160120 + }, + { + "epoch": 0.7766015618843003, + "grad_norm": 6.458956391952597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160130 + }, + { + "epoch": 0.7766500600771363, + "grad_norm": 6.48140385806073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160140 + }, + { + "epoch": 0.7766985582699725, + "grad_norm": 6.063438462433623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160150 + }, + { + "epoch": 0.7767470564628085, + "grad_norm": 6.39743049646313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160160 + }, + { + "epoch": 0.7767955546556446, + "grad_norm": 6.483185899242017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160170 + }, + { + "epoch": 0.7768440528484807, + "grad_norm": 6.247798012282146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160180 + }, + { + "epoch": 0.7768925510413168, + "grad_norm": 5.938382940939846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160190 + }, + { + "epoch": 0.7769410492341529, + "grad_norm": 6.153012321874485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160200 + }, + { + "epoch": 0.776989547426989, + "grad_norm": 7.136453916700702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160210 + }, + { + "epoch": 0.7770380456198251, + "grad_norm": 6.906965666075848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160220 + }, + { + "epoch": 0.7770865438126612, + "grad_norm": 6.682462583285087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160230 + }, + { + "epoch": 0.7771350420054973, + "grad_norm": 6.141236497114733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160240 + }, + { + "epoch": 0.7771835401983334, + "grad_norm": 6.079363856770215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160250 + }, + { + "epoch": 0.7772320383911695, + "grad_norm": 6.109274863774772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160260 + }, + { + "epoch": 0.7772805365840055, + "grad_norm": 6.549535669364559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160270 + }, + { + "epoch": 0.7773290347768417, + "grad_norm": 6.183405787396623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160280 + }, + { + "epoch": 0.7773775329696777, + "grad_norm": 6.240479422103817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160290 + }, + { + "epoch": 0.7774260311625139, + "grad_norm": 7.372604926558779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160300 + }, + { + "epoch": 0.7774745293553499, + "grad_norm": 6.2490826735484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160310 + }, + { + "epoch": 0.777523027548186, + "grad_norm": 6.136775709819631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160320 + }, + { + "epoch": 0.7775715257410221, + "grad_norm": 6.240754402142557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160330 + }, + { + "epoch": 0.7776200239338582, + "grad_norm": 6.447050537872201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160340 + }, + { + "epoch": 0.7776685221266942, + "grad_norm": 6.079165615346938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160350 + }, + { + "epoch": 0.7777170203195304, + "grad_norm": 6.614807546156953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160360 + }, + { + "epoch": 0.7777655185123664, + "grad_norm": 6.240511396526927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160370 + }, + { + "epoch": 0.7778140167052026, + "grad_norm": 6.791437812125878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160380 + }, + { + "epoch": 0.7778625148980386, + "grad_norm": 6.181193157317466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160390 + }, + { + "epoch": 0.7779110130908747, + "grad_norm": 5.900797006574976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160400 + }, + { + "epoch": 0.7779595112837108, + "grad_norm": 6.15140933746261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160410 + }, + { + "epoch": 0.7780080094765469, + "grad_norm": 6.392833284962762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160420 + }, + { + "epoch": 0.778056507669383, + "grad_norm": 6.348990666538157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160430 + }, + { + "epoch": 0.7781050058622191, + "grad_norm": 5.969862115762226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160440 + }, + { + "epoch": 0.7781535040550551, + "grad_norm": 6.380948036621703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160450 + }, + { + "epoch": 0.7782020022478913, + "grad_norm": 7.167611215663783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160460 + }, + { + "epoch": 0.7782505004407273, + "grad_norm": 7.569929749706716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160470 + }, + { + "epoch": 0.7782989986335634, + "grad_norm": 6.218927239842742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160480 + }, + { + "epoch": 0.7783474968263995, + "grad_norm": 5.850721507272283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160490 + }, + { + "epoch": 0.7783959950192356, + "grad_norm": 5.6728172381781405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160500 + }, + { + "epoch": 0.7784444932120717, + "grad_norm": 6.104423988517738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160510 + }, + { + "epoch": 0.7784929914049078, + "grad_norm": 6.351188375219863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160520 + }, + { + "epoch": 0.7785414895977438, + "grad_norm": 5.987874374113744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160530 + }, + { + "epoch": 0.77858998779058, + "grad_norm": 6.209493363940055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160540 + }, + { + "epoch": 0.778638485983416, + "grad_norm": 5.849140194413849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160550 + }, + { + "epoch": 0.7786869841762522, + "grad_norm": 5.8318288864711576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160560 + }, + { + "epoch": 0.7787354823690882, + "grad_norm": 6.002422736628432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160570 + }, + { + "epoch": 0.7787839805619243, + "grad_norm": 5.901313215872506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160580 + }, + { + "epoch": 0.7788324787547604, + "grad_norm": 6.152182407959117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160590 + }, + { + "epoch": 0.7788809769475965, + "grad_norm": 5.7652950857800533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160600 + }, + { + "epoch": 0.7789294751404325, + "grad_norm": 6.296863830357324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160610 + }, + { + "epoch": 0.7789779733332687, + "grad_norm": 5.982611384069969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160620 + }, + { + "epoch": 0.7790264715261047, + "grad_norm": 5.683057580085915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160630 + }, + { + "epoch": 0.7790749697189409, + "grad_norm": 5.804166747225281e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160640 + }, + { + "epoch": 0.7791234679117769, + "grad_norm": 5.9003092189868767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160650 + }, + { + "epoch": 0.779171966104613, + "grad_norm": 5.9165376598002695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160660 + }, + { + "epoch": 0.7792204642974491, + "grad_norm": 6.007763175830405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160670 + }, + { + "epoch": 0.7792689624902852, + "grad_norm": 5.935104141485681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160680 + }, + { + "epoch": 0.7793174606831212, + "grad_norm": 5.3612627226584664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160690 + }, + { + "epoch": 0.7793659588759574, + "grad_norm": 5.8038338579535775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160700 + }, + { + "epoch": 0.7794144570687934, + "grad_norm": 6.052597001371396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160710 + }, + { + "epoch": 0.7794629552616296, + "grad_norm": 5.797873470214654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160720 + }, + { + "epoch": 0.7795114534544657, + "grad_norm": 6.055014267758452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160730 + }, + { + "epoch": 0.7795599516473017, + "grad_norm": 5.920229639855279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160740 + }, + { + "epoch": 0.7796084498401379, + "grad_norm": 5.69747236056628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160750 + }, + { + "epoch": 0.7796569480329739, + "grad_norm": 5.9065740742880735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160760 + }, + { + "epoch": 0.7797054462258101, + "grad_norm": 5.871516606248406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160770 + }, + { + "epoch": 0.7797539444186461, + "grad_norm": 5.790385415593846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160780 + }, + { + "epoch": 0.7798024426114822, + "grad_norm": 5.9158953291671423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160790 + }, + { + "epoch": 0.7798509408043183, + "grad_norm": 6.778219585612533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160800 + }, + { + "epoch": 0.7798994389971544, + "grad_norm": 5.864093211016552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160810 + }, + { + "epoch": 0.7799479371899904, + "grad_norm": 5.8628568666563297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160820 + }, + { + "epoch": 0.7799964353828266, + "grad_norm": 5.6732524456037936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160830 + }, + { + "epoch": 0.7800449335756626, + "grad_norm": 6.686960318802448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160840 + }, + { + "epoch": 0.7800934317684988, + "grad_norm": 5.988503204434892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160850 + }, + { + "epoch": 0.7801419299613348, + "grad_norm": 5.607302355770116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160860 + }, + { + "epoch": 0.780190428154171, + "grad_norm": 5.79595358374263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160870 + }, + { + "epoch": 0.780238926347007, + "grad_norm": 5.7550582965859576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160880 + }, + { + "epoch": 0.7802874245398431, + "grad_norm": 5.461657792693586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160890 + }, + { + "epoch": 0.7803359227326792, + "grad_norm": 5.757606302836393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160900 + }, + { + "epoch": 0.7803844209255153, + "grad_norm": 5.566903382714372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160910 + }, + { + "epoch": 0.7804329191183513, + "grad_norm": 5.654452550629685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160920 + }, + { + "epoch": 0.7804814173111875, + "grad_norm": 6.128509255631798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160930 + }, + { + "epoch": 0.7805299155040235, + "grad_norm": 5.6604189779818626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160940 + }, + { + "epoch": 0.7805784136968597, + "grad_norm": 5.249813384011759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160950 + }, + { + "epoch": 0.7806269118896957, + "grad_norm": 5.813665993059658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160960 + }, + { + "epoch": 0.7806754100825318, + "grad_norm": 5.7622141724777975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160970 + }, + { + "epoch": 0.7807239082753679, + "grad_norm": 6.346299841197833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160980 + }, + { + "epoch": 0.780772406468204, + "grad_norm": 5.20761034294992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 160990 + }, + { + "epoch": 0.78082090466104, + "grad_norm": 5.453964035950776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161000 + }, + { + "epoch": 0.7808694028538762, + "grad_norm": 5.5977789514827236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161010 + }, + { + "epoch": 0.7809179010467122, + "grad_norm": 5.374648637257451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161020 + }, + { + "epoch": 0.7809663992395484, + "grad_norm": 5.69596920740878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161030 + }, + { + "epoch": 0.7810148974323844, + "grad_norm": 5.3411781664181035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161040 + }, + { + "epoch": 0.7810633956252205, + "grad_norm": 6.240390604261847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161050 + }, + { + "epoch": 0.7811118938180566, + "grad_norm": 7.40990913072892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161060 + }, + { + "epoch": 0.7811603920108927, + "grad_norm": 5.396752555952844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161070 + }, + { + "epoch": 0.7812088902037287, + "grad_norm": 5.516339385280844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161080 + }, + { + "epoch": 0.7812573883965649, + "grad_norm": 7.803144086437896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161090 + }, + { + "epoch": 0.7813058865894009, + "grad_norm": 5.3600121674435286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161100 + }, + { + "epoch": 0.7813543847822371, + "grad_norm": 5.262058166977113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161110 + }, + { + "epoch": 0.7814028829750731, + "grad_norm": 7.138642388326844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161120 + }, + { + "epoch": 0.7814513811679092, + "grad_norm": 5.4263875171045584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161130 + }, + { + "epoch": 0.7814998793607453, + "grad_norm": 5.369263433863125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161140 + }, + { + "epoch": 0.7815483775535814, + "grad_norm": 6.675420394230969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161150 + }, + { + "epoch": 0.7815968757464175, + "grad_norm": 5.3205642558395994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161160 + }, + { + "epoch": 0.7816453739392536, + "grad_norm": 6.566645538441662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161170 + }, + { + "epoch": 0.7816938721320896, + "grad_norm": 5.2462127086982946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161180 + }, + { + "epoch": 0.7817423703249258, + "grad_norm": 5.8749535014612775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161190 + }, + { + "epoch": 0.7817908685177618, + "grad_norm": 5.2637563641155793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161200 + }, + { + "epoch": 0.781839366710598, + "grad_norm": 6.038102640104626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161210 + }, + { + "epoch": 0.781887864903434, + "grad_norm": 6.613824155010661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161220 + }, + { + "epoch": 0.7819363630962701, + "grad_norm": 5.221367516128339e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161230 + }, + { + "epoch": 0.7819848612891063, + "grad_norm": 5.1887056429222866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161240 + }, + { + "epoch": 0.7820333594819423, + "grad_norm": 5.2948905704397475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161250 + }, + { + "epoch": 0.7820818576747784, + "grad_norm": 5.0695778242015876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161260 + }, + { + "epoch": 0.7821303558676145, + "grad_norm": 5.9338113089779654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161270 + }, + { + "epoch": 0.7821788540604506, + "grad_norm": 5.383260770486231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161280 + }, + { + "epoch": 0.7822273522532867, + "grad_norm": 5.2702983310837226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161290 + }, + { + "epoch": 0.7822758504461228, + "grad_norm": 5.4920064940233715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161300 + }, + { + "epoch": 0.7823243486389588, + "grad_norm": 5.34397912588247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161310 + }, + { + "epoch": 0.782372846831795, + "grad_norm": 5.3107040542954564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161320 + }, + { + "epoch": 0.782421345024631, + "grad_norm": 5.994719742830057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161330 + }, + { + "epoch": 0.7824698432174672, + "grad_norm": 6.117605266808823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161340 + }, + { + "epoch": 0.7825183414103032, + "grad_norm": 5.4390337567156166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161350 + }, + { + "epoch": 0.7825668396031393, + "grad_norm": 5.3008523792641427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161360 + }, + { + "epoch": 0.7826153377959754, + "grad_norm": 5.160929461567321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161370 + }, + { + "epoch": 0.7826638359888115, + "grad_norm": 5.410907988334657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161380 + }, + { + "epoch": 0.7827123341816475, + "grad_norm": 5.02423063153401e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161390 + }, + { + "epoch": 0.7827608323744837, + "grad_norm": 5.200321595566493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161400 + }, + { + "epoch": 0.7828093305673197, + "grad_norm": 5.519364165706975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161410 + }, + { + "epoch": 0.7828578287601559, + "grad_norm": 5.268141123337955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161420 + }, + { + "epoch": 0.7829063269529919, + "grad_norm": 5.2070102896095705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161430 + }, + { + "epoch": 0.782954825145828, + "grad_norm": 5.571793693093241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161440 + }, + { + "epoch": 0.7830033233386641, + "grad_norm": 5.120272206227128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161450 + }, + { + "epoch": 0.7830518215315002, + "grad_norm": 5.5069193649615045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161460 + }, + { + "epoch": 0.7831003197243362, + "grad_norm": 5.564409377711854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161470 + }, + { + "epoch": 0.7831488179171724, + "grad_norm": 5.1678522794418313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161480 + }, + { + "epoch": 0.7831973161100084, + "grad_norm": 4.831155209217286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161490 + }, + { + "epoch": 0.7832458143028446, + "grad_norm": 5.701006955405319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161500 + }, + { + "epoch": 0.7832943124956806, + "grad_norm": 5.271351710689487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161510 + }, + { + "epoch": 0.7833428106885167, + "grad_norm": 5.3596036053704665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161520 + }, + { + "epoch": 0.7833913088813528, + "grad_norm": 5.1363766573331304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161530 + }, + { + "epoch": 0.7834398070741889, + "grad_norm": 4.774016204578402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161540 + }, + { + "epoch": 0.783488305267025, + "grad_norm": 5.1944969214900993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161550 + }, + { + "epoch": 0.7835368034598611, + "grad_norm": 5.059529684103836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161560 + }, + { + "epoch": 0.7835853016526971, + "grad_norm": 4.795129626700145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161570 + }, + { + "epoch": 0.7836337998455333, + "grad_norm": 6.694527598938294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161580 + }, + { + "epoch": 0.7836822980383693, + "grad_norm": 5.069704656079921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161590 + }, + { + "epoch": 0.7837307962312055, + "grad_norm": 4.9716273764488506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161600 + }, + { + "epoch": 0.7837792944240415, + "grad_norm": 4.9295071136157276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161610 + }, + { + "epoch": 0.7838277926168776, + "grad_norm": 7.304699778387658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161620 + }, + { + "epoch": 0.7838762908097137, + "grad_norm": 4.987974477899115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161630 + }, + { + "epoch": 0.7839247890025498, + "grad_norm": 6.051757139857727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161640 + }, + { + "epoch": 0.7839732871953858, + "grad_norm": 5.840677985702314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161650 + }, + { + "epoch": 0.784021785388222, + "grad_norm": 4.8896531268383114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161660 + }, + { + "epoch": 0.784070283581058, + "grad_norm": 5.223123267228402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161670 + }, + { + "epoch": 0.7841187817738942, + "grad_norm": 4.941247411238692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161680 + }, + { + "epoch": 0.7841672799667302, + "grad_norm": 5.3494826346423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161690 + }, + { + "epoch": 0.7842157781595663, + "grad_norm": 5.317453144471074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161700 + }, + { + "epoch": 0.7842642763524024, + "grad_norm": 5.182816664728307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161710 + }, + { + "epoch": 0.7843127745452385, + "grad_norm": 5.41418536670335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161720 + }, + { + "epoch": 0.7843612727380745, + "grad_norm": 5.375794032147496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161730 + }, + { + "epoch": 0.7844097709309107, + "grad_norm": 5.209851750009875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161740 + }, + { + "epoch": 0.7844582691237467, + "grad_norm": 5.620044518650502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161750 + }, + { + "epoch": 0.7845067673165829, + "grad_norm": 5.508666234277371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161760 + }, + { + "epoch": 0.784555265509419, + "grad_norm": 5.085615129019061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161770 + }, + { + "epoch": 0.784603763702255, + "grad_norm": 5.3472763994477646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161780 + }, + { + "epoch": 0.7846522618950912, + "grad_norm": 5.2393886562640546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161790 + }, + { + "epoch": 0.7847007600879272, + "grad_norm": 4.9920615197152074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161800 + }, + { + "epoch": 0.7847492582807634, + "grad_norm": 4.8476422875864955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161810 + }, + { + "epoch": 0.7847977564735994, + "grad_norm": 4.898021188637358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161820 + }, + { + "epoch": 0.7848462546664355, + "grad_norm": 5.3652811971005576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161830 + }, + { + "epoch": 0.7848947528592716, + "grad_norm": 4.8115765594047843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161840 + }, + { + "epoch": 0.7849432510521077, + "grad_norm": 4.794395636054105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161850 + }, + { + "epoch": 0.7849917492449437, + "grad_norm": 4.9175177707638795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161860 + }, + { + "epoch": 0.7850402474377799, + "grad_norm": 4.8850353095986065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161870 + }, + { + "epoch": 0.7850887456306159, + "grad_norm": 5.056635998812453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161880 + }, + { + "epoch": 0.7851372438234521, + "grad_norm": 4.999659708460058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161890 + }, + { + "epoch": 0.7851857420162881, + "grad_norm": 5.004205760883451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161900 + }, + { + "epoch": 0.7852342402091242, + "grad_norm": 5.584029949545766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161910 + }, + { + "epoch": 0.7852827384019603, + "grad_norm": 4.9609237606773604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161920 + }, + { + "epoch": 0.7853312365947964, + "grad_norm": 5.070313946475835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161930 + }, + { + "epoch": 0.7853797347876325, + "grad_norm": 4.880893555991861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161940 + }, + { + "epoch": 0.7854282329804686, + "grad_norm": 4.9553612768704625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161950 + }, + { + "epoch": 0.7854767311733046, + "grad_norm": 4.8251589390702065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161960 + }, + { + "epoch": 0.7855252293661408, + "grad_norm": 4.880608628354821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161970 + }, + { + "epoch": 0.7855737275589768, + "grad_norm": 4.711959533665322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161980 + }, + { + "epoch": 0.785622225751813, + "grad_norm": 4.938785025387915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 161990 + }, + { + "epoch": 0.785670723944649, + "grad_norm": 4.95766769859074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162000 + }, + { + "epoch": 0.7857192221374851, + "grad_norm": 4.782756946042355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162010 + }, + { + "epoch": 0.7857677203303212, + "grad_norm": 5.3486061801777396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162020 + }, + { + "epoch": 0.7858162185231573, + "grad_norm": 4.532729391826251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162030 + }, + { + "epoch": 0.7858647167159933, + "grad_norm": 4.707537115677951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162040 + }, + { + "epoch": 0.7859132149088295, + "grad_norm": 4.810425480172853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162050 + }, + { + "epoch": 0.7859617131016655, + "grad_norm": 4.565300670833494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162060 + }, + { + "epoch": 0.7860102112945017, + "grad_norm": 5.127236946123048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162070 + }, + { + "epoch": 0.7860587094873377, + "grad_norm": 4.5738033804809675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162080 + }, + { + "epoch": 0.7861072076801738, + "grad_norm": 5.048797291351548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162090 + }, + { + "epoch": 0.7861557058730099, + "grad_norm": 4.879430193227563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162100 + }, + { + "epoch": 0.786204204065846, + "grad_norm": 4.489555749387364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162110 + }, + { + "epoch": 0.786252702258682, + "grad_norm": 4.537275799521012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162120 + }, + { + "epoch": 0.7863012004515182, + "grad_norm": 4.8649511086296116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162130 + }, + { + "epoch": 0.7863496986443542, + "grad_norm": 5.587372697846149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162140 + }, + { + "epoch": 0.7863981968371904, + "grad_norm": 4.5410541105184166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162150 + }, + { + "epoch": 0.7864466950300264, + "grad_norm": 5.1261672240343614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162160 + }, + { + "epoch": 0.7864951932228625, + "grad_norm": 4.7237218581130946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162170 + }, + { + "epoch": 0.7865436914156986, + "grad_norm": 4.346709303604257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162180 + }, + { + "epoch": 0.7865921896085347, + "grad_norm": 4.996739377816084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162190 + }, + { + "epoch": 0.7866406878013708, + "grad_norm": 4.722983959482008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162200 + }, + { + "epoch": 0.7866891859942069, + "grad_norm": 4.718733137565323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162210 + }, + { + "epoch": 0.7867376841870429, + "grad_norm": 4.4731162773814503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162220 + }, + { + "epoch": 0.7867861823798791, + "grad_norm": 4.6612598225692636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162230 + }, + { + "epoch": 0.7868346805727151, + "grad_norm": 4.571524314656017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162240 + }, + { + "epoch": 0.7868831787655512, + "grad_norm": 4.5435214701683435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162250 + }, + { + "epoch": 0.7869316769583873, + "grad_norm": 4.4174839786137454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162260 + }, + { + "epoch": 0.7869801751512234, + "grad_norm": 4.6628006344917594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162270 + }, + { + "epoch": 0.7870286733440596, + "grad_norm": 4.350714633005737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162280 + }, + { + "epoch": 0.7870771715368956, + "grad_norm": 4.7480867237936764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162290 + }, + { + "epoch": 0.7871256697297317, + "grad_norm": 4.8559737564346506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162300 + }, + { + "epoch": 0.7871741679225678, + "grad_norm": 4.785936980056249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162310 + }, + { + "epoch": 0.7872226661154039, + "grad_norm": 4.766376449083509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162320 + }, + { + "epoch": 0.78727116430824, + "grad_norm": 4.632147110328333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162330 + }, + { + "epoch": 0.7873196625010761, + "grad_norm": 4.56380817581703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162340 + }, + { + "epoch": 0.7873681606939121, + "grad_norm": 4.3961584594853775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162350 + }, + { + "epoch": 0.7874166588867483, + "grad_norm": 4.6370622897029534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162360 + }, + { + "epoch": 0.7874651570795843, + "grad_norm": 4.414787824202904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162370 + }, + { + "epoch": 0.7875136552724205, + "grad_norm": 4.674349085576068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162380 + }, + { + "epoch": 0.7875621534652565, + "grad_norm": 4.5743330900904766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162390 + }, + { + "epoch": 0.7876106516580926, + "grad_norm": 4.839586864591183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162400 + }, + { + "epoch": 0.7876591498509287, + "grad_norm": 4.5006398607938536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162410 + }, + { + "epoch": 0.7877076480437648, + "grad_norm": 4.409529097415543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162420 + }, + { + "epoch": 0.7877561462366008, + "grad_norm": 4.3555015594165525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162430 + }, + { + "epoch": 0.787804644429437, + "grad_norm": 4.6289940769383975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162440 + }, + { + "epoch": 0.787853142622273, + "grad_norm": 4.258823693703562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162450 + }, + { + "epoch": 0.7879016408151092, + "grad_norm": 4.068121839395644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162460 + }, + { + "epoch": 0.7879501390079452, + "grad_norm": 4.469233516601889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162470 + }, + { + "epoch": 0.7879986372007813, + "grad_norm": 4.157059763088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162480 + }, + { + "epoch": 0.7880471353936174, + "grad_norm": 4.38780389799831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162490 + }, + { + "epoch": 0.7880956335864535, + "grad_norm": 4.563698041692987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162500 + }, + { + "epoch": 0.7881441317792895, + "grad_norm": 4.4312354674502785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162510 + }, + { + "epoch": 0.7881926299721257, + "grad_norm": 4.621335847332375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162520 + }, + { + "epoch": 0.7882411281649617, + "grad_norm": 4.055460678387135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162530 + }, + { + "epoch": 0.7882896263577979, + "grad_norm": 4.6194450931125175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162540 + }, + { + "epoch": 0.7883381245506339, + "grad_norm": 4.343256776451199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162550 + }, + { + "epoch": 0.78838662274347, + "grad_norm": 4.120496654991257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162560 + }, + { + "epoch": 0.7884351209363061, + "grad_norm": 4.899570527072683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162570 + }, + { + "epoch": 0.7884836191291422, + "grad_norm": 4.118941276942678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162580 + }, + { + "epoch": 0.7885321173219783, + "grad_norm": 4.718771151601686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162590 + }, + { + "epoch": 0.7885806155148144, + "grad_norm": 4.517480078902736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162600 + }, + { + "epoch": 0.7886291137076504, + "grad_norm": 4.52109247817134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162610 + }, + { + "epoch": 0.7886776119004866, + "grad_norm": 4.074427195632779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162620 + }, + { + "epoch": 0.7887261100933226, + "grad_norm": 4.092549232836973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162630 + }, + { + "epoch": 0.7887746082861588, + "grad_norm": 4.3612679689886136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162640 + }, + { + "epoch": 0.7888231064789948, + "grad_norm": 4.304580514258305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162650 + }, + { + "epoch": 0.7888716046718309, + "grad_norm": 3.9811705931924735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162660 + }, + { + "epoch": 0.788920102864667, + "grad_norm": 4.181632462518792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162670 + }, + { + "epoch": 0.7889686010575031, + "grad_norm": 3.9538118556947666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162680 + }, + { + "epoch": 0.7890170992503391, + "grad_norm": 5.656745472037983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162690 + }, + { + "epoch": 0.7890655974431753, + "grad_norm": 4.258475527763039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162700 + }, + { + "epoch": 0.7891140956360113, + "grad_norm": 4.0744588147845207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162710 + }, + { + "epoch": 0.7891625938288475, + "grad_norm": 4.27035722339042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162720 + }, + { + "epoch": 0.7892110920216835, + "grad_norm": 4.524311947307069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162730 + }, + { + "epoch": 0.7892595902145196, + "grad_norm": 4.017062948946659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162740 + }, + { + "epoch": 0.7893080884073557, + "grad_norm": 4.3229455570781283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162750 + }, + { + "epoch": 0.7893565866001918, + "grad_norm": 3.807944182199208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162760 + }, + { + "epoch": 0.7894050847930278, + "grad_norm": 3.838523809918115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162770 + }, + { + "epoch": 0.789453582985864, + "grad_norm": 4.009102738677939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162780 + }, + { + "epoch": 0.7895020811787001, + "grad_norm": 3.8631153387314043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162790 + }, + { + "epoch": 0.7895505793715362, + "grad_norm": 3.993194397367006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162800 + }, + { + "epoch": 0.7895990775643723, + "grad_norm": 3.8365623566960494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162810 + }, + { + "epoch": 0.7896475757572083, + "grad_norm": 4.741391990137345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162820 + }, + { + "epoch": 0.7896960739500445, + "grad_norm": 4.044413515202905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162830 + }, + { + "epoch": 0.7897445721428805, + "grad_norm": 4.397874420192238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162840 + }, + { + "epoch": 0.7897930703357167, + "grad_norm": 5.698009530874515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162850 + }, + { + "epoch": 0.7898415685285527, + "grad_norm": 4.126579966623467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162860 + }, + { + "epoch": 0.7898900667213888, + "grad_norm": 4.1330739719569465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162870 + }, + { + "epoch": 0.7899385649142249, + "grad_norm": 3.92313488362106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162880 + }, + { + "epoch": 0.789987063107061, + "grad_norm": 4.081791615817565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162890 + }, + { + "epoch": 0.790035561299897, + "grad_norm": 4.501813677393329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162900 + }, + { + "epoch": 0.7900840594927332, + "grad_norm": 4.115531737625133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162910 + }, + { + "epoch": 0.7901325576855692, + "grad_norm": 4.265004704961939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162920 + }, + { + "epoch": 0.7901810558784054, + "grad_norm": 3.8953540837383116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162930 + }, + { + "epoch": 0.7902295540712414, + "grad_norm": 4.8047368750303576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162940 + }, + { + "epoch": 0.7902780522640775, + "grad_norm": 3.7849876122209025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162950 + }, + { + "epoch": 0.7903265504569136, + "grad_norm": 3.7017102272329794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162960 + }, + { + "epoch": 0.7903750486497497, + "grad_norm": 3.838113116216846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162970 + }, + { + "epoch": 0.7904235468425858, + "grad_norm": 3.954737337608094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162980 + }, + { + "epoch": 0.7904720450354219, + "grad_norm": 4.843835554879661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 162990 + }, + { + "epoch": 0.7905205432282579, + "grad_norm": 4.4115989084048124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163000 + }, + { + "epoch": 0.7905690414210941, + "grad_norm": 3.98776727195127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163010 + }, + { + "epoch": 0.7906175396139301, + "grad_norm": 3.88521215199944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163020 + }, + { + "epoch": 0.7906660378067663, + "grad_norm": 8.918414096115157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163030 + }, + { + "epoch": 0.7907145359996023, + "grad_norm": 3.938422921123674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163040 + }, + { + "epoch": 0.7907630341924384, + "grad_norm": 4.0335208950637025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163050 + }, + { + "epoch": 0.7908115323852745, + "grad_norm": 4.399593223070042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163060 + }, + { + "epoch": 0.7908600305781106, + "grad_norm": 3.635467038520801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163070 + }, + { + "epoch": 0.7909085287709466, + "grad_norm": 4.706370404505833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163080 + }, + { + "epoch": 0.7909570269637828, + "grad_norm": 3.837682172047607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163090 + }, + { + "epoch": 0.7910055251566188, + "grad_norm": 4.011691245864313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163100 + }, + { + "epoch": 0.791054023349455, + "grad_norm": 4.034300715716199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163110 + }, + { + "epoch": 0.791102521542291, + "grad_norm": 4.610825143913644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163120 + }, + { + "epoch": 0.7911510197351271, + "grad_norm": 3.737415354976292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163130 + }, + { + "epoch": 0.7911995179279632, + "grad_norm": 3.8942886249060393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163140 + }, + { + "epoch": 0.7912480161207993, + "grad_norm": 4.018733079647063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163150 + }, + { + "epoch": 0.7912965143136353, + "grad_norm": 3.7558674392812463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163160 + }, + { + "epoch": 0.7913450125064715, + "grad_norm": 3.953574179149655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163170 + }, + { + "epoch": 0.7913935106993075, + "grad_norm": 3.659034319980492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163180 + }, + { + "epoch": 0.7914420088921437, + "grad_norm": 3.796126080146678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163190 + }, + { + "epoch": 0.7914905070849797, + "grad_norm": 3.695154049410121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163200 + }, + { + "epoch": 0.7915390052778158, + "grad_norm": 3.662858816255721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163210 + }, + { + "epoch": 0.7915875034706519, + "grad_norm": 4.2628055751947613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163220 + }, + { + "epoch": 0.791636001663488, + "grad_norm": 3.631251388469536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163230 + }, + { + "epoch": 0.791684499856324, + "grad_norm": 3.601873999059535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163240 + }, + { + "epoch": 0.7917329980491602, + "grad_norm": 3.8014135839148366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163250 + }, + { + "epoch": 0.7917814962419962, + "grad_norm": 3.4846308949454396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163260 + }, + { + "epoch": 0.7918299944348324, + "grad_norm": 3.5419546406956215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163270 + }, + { + "epoch": 0.7918784926276684, + "grad_norm": 4.617656657046609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163280 + }, + { + "epoch": 0.7919269908205046, + "grad_norm": 3.904033718526989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163290 + }, + { + "epoch": 0.7919754890133407, + "grad_norm": 3.683461002879085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163300 + }, + { + "epoch": 0.7920239872061767, + "grad_norm": 3.486967159460619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163310 + }, + { + "epoch": 0.7920724853990129, + "grad_norm": 3.6133261716031484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163320 + }, + { + "epoch": 0.7921209835918489, + "grad_norm": 3.585734376088112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163330 + }, + { + "epoch": 0.792169481784685, + "grad_norm": 3.701592277138843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163340 + }, + { + "epoch": 0.7922179799775211, + "grad_norm": 4.6652854024387125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163350 + }, + { + "epoch": 0.7922664781703572, + "grad_norm": 3.75965925059063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163360 + }, + { + "epoch": 0.7923149763631933, + "grad_norm": 3.777640600333143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163370 + }, + { + "epoch": 0.7923634745560294, + "grad_norm": 3.614974630750112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163380 + }, + { + "epoch": 0.7924119727488654, + "grad_norm": 4.0254231947756125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163390 + }, + { + "epoch": 0.7924604709417016, + "grad_norm": 3.562647066246427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163400 + }, + { + "epoch": 0.7925089691345376, + "grad_norm": 3.4516041580445744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163410 + }, + { + "epoch": 0.7925574673273738, + "grad_norm": 3.896998990171596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163420 + }, + { + "epoch": 0.7926059655202098, + "grad_norm": 3.524406722021922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163430 + }, + { + "epoch": 0.7926544637130459, + "grad_norm": 4.2281513401576376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163440 + }, + { + "epoch": 0.792702961905882, + "grad_norm": 4.813059462094316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163450 + }, + { + "epoch": 0.7927514600987181, + "grad_norm": 3.860909458808237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163460 + }, + { + "epoch": 0.7927999582915541, + "grad_norm": 3.371417633957208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163470 + }, + { + "epoch": 0.7928484564843903, + "grad_norm": 3.5124550379350694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163480 + }, + { + "epoch": 0.7928969546772263, + "grad_norm": 4.617730908762496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163490 + }, + { + "epoch": 0.7929454528700625, + "grad_norm": 4.520396146290295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163500 + }, + { + "epoch": 0.7929939510628985, + "grad_norm": 3.462534436948772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163510 + }, + { + "epoch": 0.7930424492557346, + "grad_norm": 3.1958645507756955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163520 + }, + { + "epoch": 0.7930909474485707, + "grad_norm": 3.7673022035278336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163530 + }, + { + "epoch": 0.7931394456414068, + "grad_norm": 3.789681102261966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163540 + }, + { + "epoch": 0.7931879438342428, + "grad_norm": 4.410139453625561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163550 + }, + { + "epoch": 0.793236442027079, + "grad_norm": 4.099026185144794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163560 + }, + { + "epoch": 0.793284940219915, + "grad_norm": 3.332987574822255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163570 + }, + { + "epoch": 0.7933334384127512, + "grad_norm": 3.738067988479088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163580 + }, + { + "epoch": 0.7933819366055872, + "grad_norm": 3.392967329318708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163590 + }, + { + "epoch": 0.7934304347984233, + "grad_norm": 3.50803404103317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163600 + }, + { + "epoch": 0.7934789329912594, + "grad_norm": 3.203519227668039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163610 + }, + { + "epoch": 0.7935274311840955, + "grad_norm": 4.153455890332225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163620 + }, + { + "epoch": 0.7935759293769316, + "grad_norm": 3.772123591261334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163630 + }, + { + "epoch": 0.7936244275697677, + "grad_norm": 3.6861866448134606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163640 + }, + { + "epoch": 0.7936729257626037, + "grad_norm": 3.425180494787128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163650 + }, + { + "epoch": 0.7937214239554399, + "grad_norm": 3.623208755243468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163660 + }, + { + "epoch": 0.7937699221482759, + "grad_norm": 3.257922642774247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163670 + }, + { + "epoch": 0.793818420341112, + "grad_norm": 3.0202372158782964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163680 + }, + { + "epoch": 0.7938669185339481, + "grad_norm": 3.452482033594606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163690 + }, + { + "epoch": 0.7939154167267842, + "grad_norm": 3.8561104531709134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163700 + }, + { + "epoch": 0.7939639149196203, + "grad_norm": 3.832044370710719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163710 + }, + { + "epoch": 0.7940124131124564, + "grad_norm": 3.2279917405730885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163720 + }, + { + "epoch": 0.7940609113052924, + "grad_norm": 3.518259106272126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163730 + }, + { + "epoch": 0.7941094094981286, + "grad_norm": 3.597886433226449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163740 + }, + { + "epoch": 0.7941579076909646, + "grad_norm": 3.527141245740495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163750 + }, + { + "epoch": 0.7942064058838008, + "grad_norm": 3.189065722608575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163760 + }, + { + "epoch": 0.7942549040766368, + "grad_norm": 3.5705774337202456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163770 + }, + { + "epoch": 0.7943034022694729, + "grad_norm": 3.700554174201898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163780 + }, + { + "epoch": 0.794351900462309, + "grad_norm": 3.930261982532102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163790 + }, + { + "epoch": 0.7944003986551451, + "grad_norm": 3.858049169025435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163800 + }, + { + "epoch": 0.7944488968479813, + "grad_norm": 3.854766816857591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163810 + }, + { + "epoch": 0.7944973950408173, + "grad_norm": 3.4837668749787554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163820 + }, + { + "epoch": 0.7945458932336534, + "grad_norm": 3.2791568571610696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163830 + }, + { + "epoch": 0.7945943914264895, + "grad_norm": 3.8535240776127466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163840 + }, + { + "epoch": 0.7946428896193256, + "grad_norm": 3.880783694398815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163850 + }, + { + "epoch": 0.7946913878121616, + "grad_norm": 3.001108339617531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163860 + }, + { + "epoch": 0.7947398860049978, + "grad_norm": 3.562573880344644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163870 + }, + { + "epoch": 0.7947883841978338, + "grad_norm": 3.9366451431988025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163880 + }, + { + "epoch": 0.79483688239067, + "grad_norm": 4.8327500223876996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163890 + }, + { + "epoch": 0.794885380583506, + "grad_norm": 3.265004977492936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163900 + }, + { + "epoch": 0.7949338787763421, + "grad_norm": 3.357854083674283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163910 + }, + { + "epoch": 0.7949823769691782, + "grad_norm": 3.812243676293292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163920 + }, + { + "epoch": 0.7950308751620143, + "grad_norm": 3.0189401201141663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163930 + }, + { + "epoch": 0.7950793733548503, + "grad_norm": 3.565698136753781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163940 + }, + { + "epoch": 0.7951278715476865, + "grad_norm": 3.367258116782068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163950 + }, + { + "epoch": 0.7951763697405225, + "grad_norm": 3.258974246023172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163960 + }, + { + "epoch": 0.7952248679333587, + "grad_norm": 3.434074358210637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163970 + }, + { + "epoch": 0.7952733661261947, + "grad_norm": 3.764514744375447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163980 + }, + { + "epoch": 0.7953218643190308, + "grad_norm": 3.737215337196176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 163990 + }, + { + "epoch": 0.7953703625118669, + "grad_norm": 3.2783518122414534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164000 + }, + { + "epoch": 0.795418860704703, + "grad_norm": 2.9470944795662035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164010 + }, + { + "epoch": 0.7954673588975391, + "grad_norm": 3.175417262468727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164020 + }, + { + "epoch": 0.7955158570903752, + "grad_norm": 3.4222459532884386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164030 + }, + { + "epoch": 0.7955643552832112, + "grad_norm": 3.4588726549600324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164040 + }, + { + "epoch": 0.7956128534760474, + "grad_norm": 3.6116343693493036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164050 + }, + { + "epoch": 0.7956613516688834, + "grad_norm": 2.9669308787561022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164060 + }, + { + "epoch": 0.7957098498617196, + "grad_norm": 3.231075496046287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164070 + }, + { + "epoch": 0.7957583480545556, + "grad_norm": 3.052536712289111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164080 + }, + { + "epoch": 0.7958068462473917, + "grad_norm": 4.052218116612494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164090 + }, + { + "epoch": 0.7958553444402278, + "grad_norm": 3.4114123081963044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164100 + }, + { + "epoch": 0.7959038426330639, + "grad_norm": 3.248305446845734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164110 + }, + { + "epoch": 0.7959523408258999, + "grad_norm": 3.173246199139612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164120 + }, + { + "epoch": 0.7960008390187361, + "grad_norm": 3.343747678741238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164130 + }, + { + "epoch": 0.7960493372115721, + "grad_norm": 3.16260830857118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164140 + }, + { + "epoch": 0.7960978354044083, + "grad_norm": 3.604184684036227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164150 + }, + { + "epoch": 0.7961463335972443, + "grad_norm": 3.039547280536681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164160 + }, + { + "epoch": 0.7961948317900804, + "grad_norm": 3.4075558374979664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164170 + }, + { + "epoch": 0.7962433299829165, + "grad_norm": 2.946947574855585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164180 + }, + { + "epoch": 0.7962918281757526, + "grad_norm": 3.62159617850466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164190 + }, + { + "epoch": 0.7963403263685886, + "grad_norm": 3.365979850400436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164200 + }, + { + "epoch": 0.7963888245614248, + "grad_norm": 3.1685623014254816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164210 + }, + { + "epoch": 0.7964373227542608, + "grad_norm": 3.1761356211745806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164220 + }, + { + "epoch": 0.796485820947097, + "grad_norm": 2.9224219488810377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164230 + }, + { + "epoch": 0.796534319139933, + "grad_norm": 3.43873942654227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164240 + }, + { + "epoch": 0.7965828173327691, + "grad_norm": 3.4596990161617214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164250 + }, + { + "epoch": 0.7966313155256052, + "grad_norm": 3.111015445256271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164260 + }, + { + "epoch": 0.7966798137184413, + "grad_norm": 2.9305377680088895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164270 + }, + { + "epoch": 0.7967283119112774, + "grad_norm": 3.4621123745637306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164280 + }, + { + "epoch": 0.7967768101041135, + "grad_norm": 3.0661361449801916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164290 + }, + { + "epoch": 0.7968253082969495, + "grad_norm": 3.399237868961791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164300 + }, + { + "epoch": 0.7968738064897857, + "grad_norm": 3.6826978799808785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164310 + }, + { + "epoch": 0.7969223046826218, + "grad_norm": 2.9120894140532982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164320 + }, + { + "epoch": 0.7969708028754579, + "grad_norm": 3.802052006562917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164330 + }, + { + "epoch": 0.797019301068294, + "grad_norm": 3.754470512262742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164340 + }, + { + "epoch": 0.79706779926113, + "grad_norm": 3.476553445125319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164350 + }, + { + "epoch": 0.7971162974539662, + "grad_norm": 3.222493916155145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164360 + }, + { + "epoch": 0.7971647956468022, + "grad_norm": 3.349542865294097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164370 + }, + { + "epoch": 0.7972132938396383, + "grad_norm": 3.351974697807236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164380 + }, + { + "epoch": 0.7972617920324744, + "grad_norm": 3.2696469531856565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164390 + }, + { + "epoch": 0.7973102902253105, + "grad_norm": 3.260134917582036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164400 + }, + { + "epoch": 0.7973587884181466, + "grad_norm": 3.239016521661142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164410 + }, + { + "epoch": 0.7974072866109827, + "grad_norm": 3.18398711840473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164420 + }, + { + "epoch": 0.7974557848038187, + "grad_norm": 2.960492473391696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164430 + }, + { + "epoch": 0.7975042829966549, + "grad_norm": 3.4106179214177246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164440 + }, + { + "epoch": 0.7975527811894909, + "grad_norm": 3.216036859043925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164450 + }, + { + "epoch": 0.797601279382327, + "grad_norm": 3.589867247910661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164460 + }, + { + "epoch": 0.7976497775751631, + "grad_norm": 3.159814809805539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164470 + }, + { + "epoch": 0.7976982757679992, + "grad_norm": 3.6080241017089065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164480 + }, + { + "epoch": 0.7977467739608353, + "grad_norm": 3.1644756148807573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164490 + }, + { + "epoch": 0.7977952721536714, + "grad_norm": 4.2249148179962503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164500 + }, + { + "epoch": 0.7978437703465074, + "grad_norm": 2.900772955172215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164510 + }, + { + "epoch": 0.7978922685393436, + "grad_norm": 4.4297870260834316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164520 + }, + { + "epoch": 0.7979407667321796, + "grad_norm": 4.0799470468755317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164530 + }, + { + "epoch": 0.7979892649250158, + "grad_norm": 3.1820444945651616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164540 + }, + { + "epoch": 0.7980377631178518, + "grad_norm": 2.9255259548222057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164550 + }, + { + "epoch": 0.7980862613106879, + "grad_norm": 3.7676670672226464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164560 + }, + { + "epoch": 0.798134759503524, + "grad_norm": 2.6962808874486655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164570 + }, + { + "epoch": 0.7981832576963601, + "grad_norm": 2.635582596610675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164580 + }, + { + "epoch": 0.7982317558891961, + "grad_norm": 3.029899175999162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164590 + }, + { + "epoch": 0.7982802540820323, + "grad_norm": 3.1826075996832515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164600 + }, + { + "epoch": 0.7983287522748683, + "grad_norm": 2.864810610958557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164610 + }, + { + "epoch": 0.7983772504677045, + "grad_norm": 3.244366553190048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164620 + }, + { + "epoch": 0.7984257486605405, + "grad_norm": 2.7714758488173175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164630 + }, + { + "epoch": 0.7984742468533766, + "grad_norm": 3.391898317772757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164640 + }, + { + "epoch": 0.7985227450462127, + "grad_norm": 3.0111319659908986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164650 + }, + { + "epoch": 0.7985712432390488, + "grad_norm": 2.776852525698814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164660 + }, + { + "epoch": 0.7986197414318849, + "grad_norm": 3.416895211216797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164670 + }, + { + "epoch": 0.798668239624721, + "grad_norm": 3.433433803934349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164680 + }, + { + "epoch": 0.798716737817557, + "grad_norm": 3.753834576514237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164690 + }, + { + "epoch": 0.7987652360103932, + "grad_norm": 2.9113184751849985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164700 + }, + { + "epoch": 0.7988137342032292, + "grad_norm": 2.5595687347390594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164710 + }, + { + "epoch": 0.7988622323960654, + "grad_norm": 3.0510811654949066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164720 + }, + { + "epoch": 0.7989107305889014, + "grad_norm": 2.5993525554213193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164730 + }, + { + "epoch": 0.7989592287817375, + "grad_norm": 3.0467013800716813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164740 + }, + { + "epoch": 0.7990077269745736, + "grad_norm": 3.311576435294228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164750 + }, + { + "epoch": 0.7990562251674097, + "grad_norm": 2.7766848376131748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164760 + }, + { + "epoch": 0.7991047233602457, + "grad_norm": 3.1812980694212456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164770 + }, + { + "epoch": 0.7991532215530819, + "grad_norm": 2.5320879615264857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164780 + }, + { + "epoch": 0.7992017197459179, + "grad_norm": 3.099784606774847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164790 + }, + { + "epoch": 0.7992502179387541, + "grad_norm": 3.86818044262327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164800 + }, + { + "epoch": 0.7992987161315901, + "grad_norm": 2.597176695928738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164810 + }, + { + "epoch": 0.7993472143244262, + "grad_norm": 2.8041005961654264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164820 + }, + { + "epoch": 0.7993957125172624, + "grad_norm": 2.695231238192264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164830 + }, + { + "epoch": 0.7994442107100984, + "grad_norm": 2.887698968834229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164840 + }, + { + "epoch": 0.7994927089029346, + "grad_norm": 3.2733286303709974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164850 + }, + { + "epoch": 0.7995412070957706, + "grad_norm": 2.927900055738064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164860 + }, + { + "epoch": 0.7995897052886067, + "grad_norm": 2.9518513855464334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164870 + }, + { + "epoch": 0.7996382034814428, + "grad_norm": 2.8954689312854498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164880 + }, + { + "epoch": 0.7996867016742789, + "grad_norm": 2.9373854459890936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164890 + }, + { + "epoch": 0.7997351998671149, + "grad_norm": 3.5061816561210435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164900 + }, + { + "epoch": 0.7997836980599511, + "grad_norm": 2.596287629330618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164910 + }, + { + "epoch": 0.7998321962527871, + "grad_norm": 3.284640825995666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164920 + }, + { + "epoch": 0.7998806944456233, + "grad_norm": 3.850442809039123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164930 + }, + { + "epoch": 0.7999291926384593, + "grad_norm": 2.8753705194617396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164940 + }, + { + "epoch": 0.7999776908312954, + "grad_norm": 3.003951931646043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164950 + }, + { + "epoch": 0.8000261890241315, + "grad_norm": 2.6911797235129598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164960 + }, + { + "epoch": 0.8000746872169676, + "grad_norm": 2.8596730317076435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164970 + }, + { + "epoch": 0.8001231854098037, + "grad_norm": 2.6789034990315486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164980 + }, + { + "epoch": 0.8001716836026398, + "grad_norm": 3.631823020100455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 164990 + }, + { + "epoch": 0.8002201817954758, + "grad_norm": 2.7603251240293503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165000 + }, + { + "epoch": 0.800268679988312, + "grad_norm": 2.709411717205512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165010 + }, + { + "epoch": 0.800317178181148, + "grad_norm": 2.582222435876247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165020 + }, + { + "epoch": 0.8003656763739841, + "grad_norm": 2.57788315138896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165030 + }, + { + "epoch": 0.8004141745668202, + "grad_norm": 3.2861439791531666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165040 + }, + { + "epoch": 0.8004626727596563, + "grad_norm": 3.3149891720540836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165050 + }, + { + "epoch": 0.8005111709524924, + "grad_norm": 3.107538404378829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165060 + }, + { + "epoch": 0.8005596691453285, + "grad_norm": 2.8349649738856897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165070 + }, + { + "epoch": 0.8006081673381645, + "grad_norm": 2.6302462430294327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165080 + }, + { + "epoch": 0.8006566655310007, + "grad_norm": 2.830630485561869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165090 + }, + { + "epoch": 0.8007051637238367, + "grad_norm": 3.837493878222631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165100 + }, + { + "epoch": 0.8007536619166729, + "grad_norm": 2.3484878042268065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165110 + }, + { + "epoch": 0.8008021601095089, + "grad_norm": 3.650425028922655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165120 + }, + { + "epoch": 0.800850658302345, + "grad_norm": 2.6256493868004327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165130 + }, + { + "epoch": 0.8008991564951811, + "grad_norm": 2.953514233183796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165140 + }, + { + "epoch": 0.8009476546880172, + "grad_norm": 2.9459581440960392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165150 + }, + { + "epoch": 0.8009961528808532, + "grad_norm": 2.539204047025123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165160 + }, + { + "epoch": 0.8010446510736894, + "grad_norm": 2.5091502209306782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165170 + }, + { + "epoch": 0.8010931492665254, + "grad_norm": 2.529898246450557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165180 + }, + { + "epoch": 0.8011416474593616, + "grad_norm": 2.7942768099364912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165190 + }, + { + "epoch": 0.8011901456521976, + "grad_norm": 3.700374762161118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165200 + }, + { + "epoch": 0.8012386438450337, + "grad_norm": 3.5587330415864926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165210 + }, + { + "epoch": 0.8012871420378698, + "grad_norm": 2.6217733761768613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165220 + }, + { + "epoch": 0.8013356402307059, + "grad_norm": 2.7453433304458486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165230 + }, + { + "epoch": 0.801384138423542, + "grad_norm": 2.7808845004528848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165240 + }, + { + "epoch": 0.8014326366163781, + "grad_norm": 2.8979350474855892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165250 + }, + { + "epoch": 0.8014811348092141, + "grad_norm": 2.5786455637444305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165260 + }, + { + "epoch": 0.8015296330020503, + "grad_norm": 2.6008688536194313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165270 + }, + { + "epoch": 0.8015781311948863, + "grad_norm": 2.985110469921892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165280 + }, + { + "epoch": 0.8016266293877224, + "grad_norm": 3.108678825469724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165290 + }, + { + "epoch": 0.8016751275805585, + "grad_norm": 3.166064033166549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165300 + }, + { + "epoch": 0.8017236257733946, + "grad_norm": 2.777469276793454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165310 + }, + { + "epoch": 0.8017721239662307, + "grad_norm": 2.602172521903867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165320 + }, + { + "epoch": 0.8018206221590668, + "grad_norm": 3.533379100417733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165330 + }, + { + "epoch": 0.8018691203519029, + "grad_norm": 2.6933282271102144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165340 + }, + { + "epoch": 0.801917618544739, + "grad_norm": 3.5117125207762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165350 + }, + { + "epoch": 0.8019661167375751, + "grad_norm": 3.3152026901461795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165360 + }, + { + "epoch": 0.8020146149304112, + "grad_norm": 2.5653758228827428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165370 + }, + { + "epoch": 0.8020631131232473, + "grad_norm": 2.3443421426350142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165380 + }, + { + "epoch": 0.8021116113160833, + "grad_norm": 3.075461307844307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165390 + }, + { + "epoch": 0.8021601095089195, + "grad_norm": 2.818206290555736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165400 + }, + { + "epoch": 0.8022086077017555, + "grad_norm": 2.694164713545888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165410 + }, + { + "epoch": 0.8022571058945916, + "grad_norm": 2.820258160340927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165420 + }, + { + "epoch": 0.8023056040874277, + "grad_norm": 4.061674729882725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165430 + }, + { + "epoch": 0.8023541022802638, + "grad_norm": 3.331056319666459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165440 + }, + { + "epoch": 0.8024026004730999, + "grad_norm": 2.8288289044553494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165450 + }, + { + "epoch": 0.802451098665936, + "grad_norm": 2.738171822613822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165460 + }, + { + "epoch": 0.802499596858772, + "grad_norm": 2.4916765539728658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165470 + }, + { + "epoch": 0.8025480950516082, + "grad_norm": 2.4078937954641333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165480 + }, + { + "epoch": 0.8025965932444442, + "grad_norm": 3.4060779086075854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165490 + }, + { + "epoch": 0.8026450914372804, + "grad_norm": 4.0639154263999444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165500 + }, + { + "epoch": 0.8026935896301164, + "grad_norm": 2.466491721975217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165510 + }, + { + "epoch": 0.8027420878229525, + "grad_norm": 2.4187579938939052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165520 + }, + { + "epoch": 0.8027905860157886, + "grad_norm": 2.4707871304485707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165530 + }, + { + "epoch": 0.8028390842086247, + "grad_norm": 6.258164830796886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165540 + }, + { + "epoch": 0.8028875824014607, + "grad_norm": 3.35838414855516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165550 + }, + { + "epoch": 0.8029360805942969, + "grad_norm": 3.116512914402847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165560 + }, + { + "epoch": 0.8029845787871329, + "grad_norm": 3.3708584368241645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165570 + }, + { + "epoch": 0.8030330769799691, + "grad_norm": 2.899869144812328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165580 + }, + { + "epoch": 0.8030815751728051, + "grad_norm": 3.1035501280030076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165590 + }, + { + "epoch": 0.8031300733656412, + "grad_norm": 3.0709728093825106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165600 + }, + { + "epoch": 0.8031785715584773, + "grad_norm": 3.075799170915161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165610 + }, + { + "epoch": 0.8032270697513134, + "grad_norm": 3.064902642790912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165620 + }, + { + "epoch": 0.8032755679441494, + "grad_norm": 3.0370244985533645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165630 + }, + { + "epoch": 0.8033240661369856, + "grad_norm": 2.680510213792786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165640 + }, + { + "epoch": 0.8033725643298216, + "grad_norm": 3.777324053544362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165650 + }, + { + "epoch": 0.8034210625226578, + "grad_norm": 3.26851008480844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165660 + }, + { + "epoch": 0.8034695607154938, + "grad_norm": 2.507397134365874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165670 + }, + { + "epoch": 0.80351805890833, + "grad_norm": 2.4163943734833992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165680 + }, + { + "epoch": 0.803566557101166, + "grad_norm": 3.2791483306482405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165690 + }, + { + "epoch": 0.8036150552940021, + "grad_norm": 2.6350033266453465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165700 + }, + { + "epoch": 0.8036635534868382, + "grad_norm": 2.5890365407121863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165710 + }, + { + "epoch": 0.8037120516796743, + "grad_norm": 2.5875278808484836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165720 + }, + { + "epoch": 0.8037605498725103, + "grad_norm": 2.5182577800819672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165730 + }, + { + "epoch": 0.8038090480653465, + "grad_norm": 2.519490926999879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165740 + }, + { + "epoch": 0.8038575462581825, + "grad_norm": 3.4547525729067274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165750 + }, + { + "epoch": 0.8039060444510187, + "grad_norm": 2.341594296240146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165760 + }, + { + "epoch": 0.8039545426438547, + "grad_norm": 2.611994176504595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165770 + }, + { + "epoch": 0.8040030408366908, + "grad_norm": 2.8831180998167838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165780 + }, + { + "epoch": 0.8040515390295269, + "grad_norm": 2.987570013601726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165790 + }, + { + "epoch": 0.804100037222363, + "grad_norm": 2.982924840466694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165800 + }, + { + "epoch": 0.804148535415199, + "grad_norm": 2.9282659852469806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165810 + }, + { + "epoch": 0.8041970336080352, + "grad_norm": 2.2228496376897056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165820 + }, + { + "epoch": 0.8042455318008712, + "grad_norm": 2.367122498014851e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165830 + }, + { + "epoch": 0.8042940299937074, + "grad_norm": 2.9047042104934917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165840 + }, + { + "epoch": 0.8043425281865435, + "grad_norm": 3.378243107476919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165850 + }, + { + "epoch": 0.8043910263793795, + "grad_norm": 2.5779476331422302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165860 + }, + { + "epoch": 0.8044395245722157, + "grad_norm": 2.291794309883244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165870 + }, + { + "epoch": 0.8044880227650517, + "grad_norm": 2.3701275608800643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165880 + }, + { + "epoch": 0.8045365209578879, + "grad_norm": 2.797325571179954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165890 + }, + { + "epoch": 0.8045850191507239, + "grad_norm": 3.1385862797606023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165900 + }, + { + "epoch": 0.80463351734356, + "grad_norm": 2.748748073599927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165910 + }, + { + "epoch": 0.8046820155363961, + "grad_norm": 3.042802632080566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165920 + }, + { + "epoch": 0.8047305137292322, + "grad_norm": 2.3874154209124754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165930 + }, + { + "epoch": 0.8047790119220682, + "grad_norm": 2.9893328701291466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165940 + }, + { + "epoch": 0.8048275101149044, + "grad_norm": 3.753615018808887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165950 + }, + { + "epoch": 0.8048760083077404, + "grad_norm": 2.604900828373502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165960 + }, + { + "epoch": 0.8049245065005766, + "grad_norm": 2.403048959820353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165970 + }, + { + "epoch": 0.8049730046934126, + "grad_norm": 3.3713565272819324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165980 + }, + { + "epoch": 0.8050215028862487, + "grad_norm": 2.779995611490449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 165990 + }, + { + "epoch": 0.8050700010790848, + "grad_norm": 3.5135805376285134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166000 + }, + { + "epoch": 0.8051184992719209, + "grad_norm": 2.6048549983670455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166010 + }, + { + "epoch": 0.805166997464757, + "grad_norm": 2.6149818310727824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166020 + }, + { + "epoch": 0.8052154956575931, + "grad_norm": 2.5196555952788913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166030 + }, + { + "epoch": 0.8052639938504291, + "grad_norm": 4.883941073785536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166040 + }, + { + "epoch": 0.8053124920432653, + "grad_norm": 2.8857837008899878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166050 + }, + { + "epoch": 0.8053609902361013, + "grad_norm": 2.3049878450365213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166060 + }, + { + "epoch": 0.8054094884289374, + "grad_norm": 2.8888381464753365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166070 + }, + { + "epoch": 0.8054579866217735, + "grad_norm": 2.4059422898403682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166080 + }, + { + "epoch": 0.8055064848146096, + "grad_norm": 2.6299021627096408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166090 + }, + { + "epoch": 0.8055549830074457, + "grad_norm": 2.982762481451573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166100 + }, + { + "epoch": 0.8056034812002818, + "grad_norm": 2.3654699532471568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166110 + }, + { + "epoch": 0.8056519793931178, + "grad_norm": 2.4383259855653705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166120 + }, + { + "epoch": 0.805700477585954, + "grad_norm": 2.2507832042606424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166130 + }, + { + "epoch": 0.80574897577879, + "grad_norm": 2.708591573252761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166140 + }, + { + "epoch": 0.8057974739716262, + "grad_norm": 2.5518286150827407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166150 + }, + { + "epoch": 0.8058459721644622, + "grad_norm": 3.2355803369910063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166160 + }, + { + "epoch": 0.8058944703572983, + "grad_norm": 4.4528743359251166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166170 + }, + { + "epoch": 0.8059429685501344, + "grad_norm": 2.2234603491710914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166180 + }, + { + "epoch": 0.8059914667429705, + "grad_norm": 2.721620440127026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166190 + }, + { + "epoch": 0.8060399649358065, + "grad_norm": 2.9552181146641487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166200 + }, + { + "epoch": 0.8060884631286427, + "grad_norm": 2.2052205395084457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166210 + }, + { + "epoch": 0.8061369613214787, + "grad_norm": 2.173385738046818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166220 + }, + { + "epoch": 0.8061854595143149, + "grad_norm": 2.185706549084898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166230 + }, + { + "epoch": 0.8062339577071509, + "grad_norm": 2.5633243083689194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166240 + }, + { + "epoch": 0.806282455899987, + "grad_norm": 2.7807308455862767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166250 + }, + { + "epoch": 0.8063309540928231, + "grad_norm": 2.251291242316711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166260 + }, + { + "epoch": 0.8063794522856592, + "grad_norm": 2.0409872902860116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166270 + }, + { + "epoch": 0.8064279504784952, + "grad_norm": 2.7606642305499918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166280 + }, + { + "epoch": 0.8064764486713314, + "grad_norm": 3.362145406526906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166290 + }, + { + "epoch": 0.8065249468641674, + "grad_norm": 2.8124551576524937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166300 + }, + { + "epoch": 0.8065734450570036, + "grad_norm": 2.2028869395285255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166310 + }, + { + "epoch": 0.8066219432498396, + "grad_norm": 3.075437504662659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166320 + }, + { + "epoch": 0.8066704414426757, + "grad_norm": 2.0896495200872778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166330 + }, + { + "epoch": 0.8067189396355118, + "grad_norm": 2.842140922609815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166340 + }, + { + "epoch": 0.8067674378283479, + "grad_norm": 2.648632246859961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166350 + }, + { + "epoch": 0.8068159360211841, + "grad_norm": 2.6585942336510016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166360 + }, + { + "epoch": 0.8068644342140201, + "grad_norm": 2.1662630800278748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166370 + }, + { + "epoch": 0.8069129324068562, + "grad_norm": 2.8810124064193587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166380 + }, + { + "epoch": 0.8069614305996923, + "grad_norm": 2.606227411661166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166390 + }, + { + "epoch": 0.8070099287925284, + "grad_norm": 2.5217792298803943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166400 + }, + { + "epoch": 0.8070584269853645, + "grad_norm": 2.33331824972538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166410 + }, + { + "epoch": 0.8071069251782006, + "grad_norm": 2.2247165887279152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166420 + }, + { + "epoch": 0.8071554233710366, + "grad_norm": 2.363058726473355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166430 + }, + { + "epoch": 0.8072039215638728, + "grad_norm": 2.4233596462863716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166440 + }, + { + "epoch": 0.8072524197567088, + "grad_norm": 2.759073147728941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166450 + }, + { + "epoch": 0.807300917949545, + "grad_norm": 2.057267423083431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166460 + }, + { + "epoch": 0.807349416142381, + "grad_norm": 2.2575243008304824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166470 + }, + { + "epoch": 0.8073979143352171, + "grad_norm": 2.205533888854916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166480 + }, + { + "epoch": 0.8074464125280532, + "grad_norm": 2.840117829805422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166490 + }, + { + "epoch": 0.8074949107208893, + "grad_norm": 2.7055184759205986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166500 + }, + { + "epoch": 0.8075434089137253, + "grad_norm": 2.94055340077648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166510 + }, + { + "epoch": 0.8075919071065615, + "grad_norm": 2.0821975255103098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166520 + }, + { + "epoch": 0.8076404052993975, + "grad_norm": 2.3237745949700184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166530 + }, + { + "epoch": 0.8076889034922337, + "grad_norm": 3.242077539766797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166540 + }, + { + "epoch": 0.8077374016850697, + "grad_norm": 2.6282057419280136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166550 + }, + { + "epoch": 0.8077858998779058, + "grad_norm": 2.2342948824416453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166560 + }, + { + "epoch": 0.8078343980707419, + "grad_norm": 2.2003515454116496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166570 + }, + { + "epoch": 0.807882896263578, + "grad_norm": 2.1614479095433126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166580 + }, + { + "epoch": 0.807931394456414, + "grad_norm": 2.506911300770298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166590 + }, + { + "epoch": 0.8079798926492502, + "grad_norm": 2.679456478915654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166600 + }, + { + "epoch": 0.8080283908420862, + "grad_norm": 2.950641686538802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166610 + }, + { + "epoch": 0.8080768890349224, + "grad_norm": 2.0815562606912863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166620 + }, + { + "epoch": 0.8081253872277584, + "grad_norm": 2.34604300430874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166630 + }, + { + "epoch": 0.8081738854205945, + "grad_norm": 2.6436040911903547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166640 + }, + { + "epoch": 0.8082223836134306, + "grad_norm": 2.9629559250565762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166650 + }, + { + "epoch": 0.8082708818062667, + "grad_norm": 2.149699795950255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166660 + }, + { + "epoch": 0.8083193799991027, + "grad_norm": 2.7215572018235434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166670 + }, + { + "epoch": 0.8083678781919389, + "grad_norm": 2.6462178226438482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166680 + }, + { + "epoch": 0.8084163763847749, + "grad_norm": 2.4091928452207867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166690 + }, + { + "epoch": 0.8084648745776111, + "grad_norm": 3.824666450213954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166700 + }, + { + "epoch": 0.8085133727704471, + "grad_norm": 2.2978229097248004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166710 + }, + { + "epoch": 0.8085618709632832, + "grad_norm": 2.884690175619653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166720 + }, + { + "epoch": 0.8086103691561193, + "grad_norm": 2.2797712162514472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166730 + }, + { + "epoch": 0.8086588673489554, + "grad_norm": 2.5094358591104537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166740 + }, + { + "epoch": 0.8087073655417915, + "grad_norm": 3.255962255366285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166750 + }, + { + "epoch": 0.8087558637346276, + "grad_norm": 2.3967242412936685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166760 + }, + { + "epoch": 0.8088043619274636, + "grad_norm": 2.2909548036409433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166770 + }, + { + "epoch": 0.8088528601202998, + "grad_norm": 1.9987222543704775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166780 + }, + { + "epoch": 0.8089013583131358, + "grad_norm": 2.8520069861315278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166790 + }, + { + "epoch": 0.808949856505972, + "grad_norm": 2.3814466842964066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166800 + }, + { + "epoch": 0.808998354698808, + "grad_norm": 2.5535914716101615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166810 + }, + { + "epoch": 0.8090468528916441, + "grad_norm": 2.038450830355032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166820 + }, + { + "epoch": 0.8090953510844802, + "grad_norm": 2.5178422902172315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166830 + }, + { + "epoch": 0.8091438492773163, + "grad_norm": 3.590947272869016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166840 + }, + { + "epoch": 0.8091923474701523, + "grad_norm": 2.340723526117472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166850 + }, + { + "epoch": 0.8092408456629885, + "grad_norm": 3.040882390337174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166860 + }, + { + "epoch": 0.8092893438558246, + "grad_norm": 2.4606141124650094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166870 + }, + { + "epoch": 0.8093378420486607, + "grad_norm": 2.115396036117545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166880 + }, + { + "epoch": 0.8093863402414968, + "grad_norm": 2.8116522443610847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166890 + }, + { + "epoch": 0.8094348384343328, + "grad_norm": 2.359945305840938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166900 + }, + { + "epoch": 0.809483336627169, + "grad_norm": 2.5172649742444264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166910 + }, + { + "epoch": 0.809531834820005, + "grad_norm": 2.0724371552205412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166920 + }, + { + "epoch": 0.8095803330128412, + "grad_norm": 2.633251838801698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166930 + }, + { + "epoch": 0.8096288312056772, + "grad_norm": 2.8481753844289415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166940 + }, + { + "epoch": 0.8096773293985133, + "grad_norm": 2.799960974186888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166950 + }, + { + "epoch": 0.8097258275913494, + "grad_norm": 1.97547027624978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166960 + }, + { + "epoch": 0.8097743257841855, + "grad_norm": 3.616477783907612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166970 + }, + { + "epoch": 0.8098228239770215, + "grad_norm": 2.8738075030787513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166980 + }, + { + "epoch": 0.8098713221698577, + "grad_norm": 3.410060500641521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 166990 + }, + { + "epoch": 0.8099198203626937, + "grad_norm": 2.2674068844708017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167000 + }, + { + "epoch": 0.8099683185555299, + "grad_norm": 2.1500888180980837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167010 + }, + { + "epoch": 0.8100168167483659, + "grad_norm": 1.850176367668155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167020 + }, + { + "epoch": 0.810065314941202, + "grad_norm": 2.8768896598307947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167030 + }, + { + "epoch": 0.8101138131340381, + "grad_norm": 3.021008865289332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167040 + }, + { + "epoch": 0.8101623113268742, + "grad_norm": 2.2539428101708836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167050 + }, + { + "epoch": 0.8102108095197103, + "grad_norm": 2.4393145281464967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167060 + }, + { + "epoch": 0.8102593077125464, + "grad_norm": 2.4255541575257666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167070 + }, + { + "epoch": 0.8103078059053824, + "grad_norm": 2.4664693398790405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167080 + }, + { + "epoch": 0.8103563040982186, + "grad_norm": 2.514227581684736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167090 + }, + { + "epoch": 0.8104048022910546, + "grad_norm": 3.2493193913296636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167100 + }, + { + "epoch": 0.8104533004838907, + "grad_norm": 2.7097573962464594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167110 + }, + { + "epoch": 0.8105017986767268, + "grad_norm": 2.1345806899830677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167120 + }, + { + "epoch": 0.8105502968695629, + "grad_norm": 2.7200917074310382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167130 + }, + { + "epoch": 0.810598795062399, + "grad_norm": 2.265377752053155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167140 + }, + { + "epoch": 0.8106472932552351, + "grad_norm": 2.9486644237408655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167150 + }, + { + "epoch": 0.8106957914480711, + "grad_norm": 2.200314241918022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167160 + }, + { + "epoch": 0.8107442896409073, + "grad_norm": 2.1498960833810088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167170 + }, + { + "epoch": 0.8107927878337433, + "grad_norm": 2.7041890504619914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167180 + }, + { + "epoch": 0.8108412860265795, + "grad_norm": 2.980342728164942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167190 + }, + { + "epoch": 0.8108897842194155, + "grad_norm": 2.95877402578526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167200 + }, + { + "epoch": 0.8109382824122516, + "grad_norm": 2.5517762125559784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167210 + }, + { + "epoch": 0.8109867806050877, + "grad_norm": 1.906758484437887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167220 + }, + { + "epoch": 0.8110352787979238, + "grad_norm": 2.2813141598021502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167230 + }, + { + "epoch": 0.8110837769907598, + "grad_norm": 2.4301005652205276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167240 + }, + { + "epoch": 0.811132275183596, + "grad_norm": 2.9305139648272416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167250 + }, + { + "epoch": 0.811180773376432, + "grad_norm": 2.2998833060228208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167260 + }, + { + "epoch": 0.8112292715692682, + "grad_norm": 2.1088581547701324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167270 + }, + { + "epoch": 0.8112777697621042, + "grad_norm": 2.5044872842272525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167280 + }, + { + "epoch": 0.8113262679549403, + "grad_norm": 2.5136863257557707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167290 + }, + { + "epoch": 0.8113747661477764, + "grad_norm": 2.2791416753875637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167300 + }, + { + "epoch": 0.8114232643406125, + "grad_norm": 2.101701035428505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167310 + }, + { + "epoch": 0.8114717625334485, + "grad_norm": 2.120525977034049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167320 + }, + { + "epoch": 0.8115202607262847, + "grad_norm": 2.6680188724981235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167330 + }, + { + "epoch": 0.8115687589191207, + "grad_norm": 2.752459415944486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167340 + }, + { + "epoch": 0.8116172571119569, + "grad_norm": 2.8610063651512974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167350 + }, + { + "epoch": 0.8116657553047929, + "grad_norm": 2.0977960701884513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167360 + }, + { + "epoch": 0.811714253497629, + "grad_norm": 2.8330136458976085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167370 + }, + { + "epoch": 0.8117627516904651, + "grad_norm": 2.1439442221549143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167380 + }, + { + "epoch": 0.8118112498833012, + "grad_norm": 2.395333709159786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167390 + }, + { + "epoch": 0.8118597480761374, + "grad_norm": 2.617681182925935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167400 + }, + { + "epoch": 0.8119082462689734, + "grad_norm": 2.3867372078711924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167410 + }, + { + "epoch": 0.8119567444618095, + "grad_norm": 3.026545059015007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167420 + }, + { + "epoch": 0.8120052426546456, + "grad_norm": 1.9909599302536662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167430 + }, + { + "epoch": 0.8120537408474817, + "grad_norm": 9.145571766566718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167440 + }, + { + "epoch": 0.8121022390403178, + "grad_norm": 2.2384730513635986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167450 + }, + { + "epoch": 0.8121507372331539, + "grad_norm": 2.4704148060550324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167460 + }, + { + "epoch": 0.8121992354259899, + "grad_norm": 1.9263081796339065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167470 + }, + { + "epoch": 0.8122477336188261, + "grad_norm": 2.2945410904640084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167480 + }, + { + "epoch": 0.8122962318116621, + "grad_norm": 2.8233550608547375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167490 + }, + { + "epoch": 0.8123447300044982, + "grad_norm": 2.3239111968109682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167500 + }, + { + "epoch": 0.8123932281973343, + "grad_norm": 3.32541354453042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167510 + }, + { + "epoch": 0.8124417263901704, + "grad_norm": 2.2882954198166772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167520 + }, + { + "epoch": 0.8124902245830065, + "grad_norm": 2.3657705128243833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167530 + }, + { + "epoch": 0.8125387227758426, + "grad_norm": 2.495964857018862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167540 + }, + { + "epoch": 0.8125872209686786, + "grad_norm": 3.17678257033549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167550 + }, + { + "epoch": 0.8126357191615148, + "grad_norm": 2.2123831655562753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167560 + }, + { + "epoch": 0.8126842173543508, + "grad_norm": 1.8853782535188657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167570 + }, + { + "epoch": 0.812732715547187, + "grad_norm": 2.2929201648480557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167580 + }, + { + "epoch": 0.812781213740023, + "grad_norm": 2.089352335588046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167590 + }, + { + "epoch": 0.8128297119328591, + "grad_norm": 2.4363412620687086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167600 + }, + { + "epoch": 0.8128782101256952, + "grad_norm": 2.1652187598419914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167610 + }, + { + "epoch": 0.8129267083185313, + "grad_norm": 2.3871566057209748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167620 + }, + { + "epoch": 0.8129752065113673, + "grad_norm": 1.8662365874888565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167630 + }, + { + "epoch": 0.8130237047042035, + "grad_norm": 2.1009290307461015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167640 + }, + { + "epoch": 0.8130722028970395, + "grad_norm": 2.6991541446363954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167650 + }, + { + "epoch": 0.8131207010898757, + "grad_norm": 1.9413299412462948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167660 + }, + { + "epoch": 0.8131691992827117, + "grad_norm": 1.9005140572403434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167670 + }, + { + "epoch": 0.8132176974755478, + "grad_norm": 1.8624342956741202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167680 + }, + { + "epoch": 0.8132661956683839, + "grad_norm": 2.0633825315030663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167690 + }, + { + "epoch": 0.81331469386122, + "grad_norm": 2.7231319421616718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167700 + }, + { + "epoch": 0.813363192054056, + "grad_norm": 2.115680786118901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167710 + }, + { + "epoch": 0.8134116902468922, + "grad_norm": 2.2940827903994432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167720 + }, + { + "epoch": 0.8134601884397282, + "grad_norm": 2.1972649477675077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167730 + }, + { + "epoch": 0.8135086866325644, + "grad_norm": 2.2657790310631754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167740 + }, + { + "epoch": 0.8135571848254004, + "grad_norm": 2.584943992189892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167750 + }, + { + "epoch": 0.8136056830182365, + "grad_norm": 1.876097677211419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167760 + }, + { + "epoch": 0.8136541812110726, + "grad_norm": 2.1643863590270485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167770 + }, + { + "epoch": 0.8137026794039087, + "grad_norm": 2.4483872707037335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167780 + }, + { + "epoch": 0.8137511775967448, + "grad_norm": 3.025163408665321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167790 + }, + { + "epoch": 0.8137996757895809, + "grad_norm": 2.5950397386509394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167800 + }, + { + "epoch": 0.8138481739824169, + "grad_norm": 2.4547105681449466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167810 + }, + { + "epoch": 0.8138966721752531, + "grad_norm": 2.8671790275325293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167820 + }, + { + "epoch": 0.8139451703680891, + "grad_norm": 1.979056918344213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167830 + }, + { + "epoch": 0.8139936685609253, + "grad_norm": 2.3444597374577825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167840 + }, + { + "epoch": 0.8140421667537613, + "grad_norm": 2.9586079364207762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167850 + }, + { + "epoch": 0.8140906649465974, + "grad_norm": 2.1532356342390813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167860 + }, + { + "epoch": 0.8141391631394335, + "grad_norm": 1.8019822078940706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167870 + }, + { + "epoch": 0.8141876613322696, + "grad_norm": 3.381963864512727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167880 + }, + { + "epoch": 0.8142361595251056, + "grad_norm": 2.9079606278514802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167890 + }, + { + "epoch": 0.8142846577179418, + "grad_norm": 2.615383110082803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167900 + }, + { + "epoch": 0.8143331559107779, + "grad_norm": 1.9832761211091565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167910 + }, + { + "epoch": 0.814381654103614, + "grad_norm": 1.964197871018314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167920 + }, + { + "epoch": 0.8144301522964501, + "grad_norm": 2.1347345224853598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167930 + }, + { + "epoch": 0.8144786504892861, + "grad_norm": 2.566838297468621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167940 + }, + { + "epoch": 0.8145271486821223, + "grad_norm": 2.4705686385573244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167950 + }, + { + "epoch": 0.8145756468749583, + "grad_norm": 2.516753916381731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167960 + }, + { + "epoch": 0.8146241450677945, + "grad_norm": 2.2732571380856825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167970 + }, + { + "epoch": 0.8146726432606305, + "grad_norm": 2.1643185021957834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167980 + }, + { + "epoch": 0.8147211414534666, + "grad_norm": 2.581117719557824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 167990 + }, + { + "epoch": 0.8147696396463027, + "grad_norm": 2.5310779250276028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168000 + }, + { + "epoch": 0.8148181378391388, + "grad_norm": 1.8370011289903232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168010 + }, + { + "epoch": 0.8148666360319748, + "grad_norm": 2.1984206455272215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168020 + }, + { + "epoch": 0.814915134224811, + "grad_norm": 2.2881458505707997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168030 + }, + { + "epoch": 0.814963632417647, + "grad_norm": 2.279584876418994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168040 + }, + { + "epoch": 0.8150121306104832, + "grad_norm": 2.6268899944170698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168050 + }, + { + "epoch": 0.8150606288033192, + "grad_norm": 2.2870001004093865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168060 + }, + { + "epoch": 0.8151091269961553, + "grad_norm": 1.8146499414228856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168070 + }, + { + "epoch": 0.8151576251889914, + "grad_norm": 2.3820305727895175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168080 + }, + { + "epoch": 0.8152061233818275, + "grad_norm": 3.35442749133108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168090 + }, + { + "epoch": 0.8152546215746636, + "grad_norm": 2.4934317721658772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168100 + }, + { + "epoch": 0.8153031197674997, + "grad_norm": 1.7758230441700107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168110 + }, + { + "epoch": 0.8153516179603357, + "grad_norm": 2.223183415139829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168120 + }, + { + "epoch": 0.8154001161531719, + "grad_norm": 2.1753393752987904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168130 + }, + { + "epoch": 0.8154486143460079, + "grad_norm": 2.2191436244156648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168140 + }, + { + "epoch": 0.815497112538844, + "grad_norm": 2.270682131211288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168150 + }, + { + "epoch": 0.8155456107316801, + "grad_norm": 1.8038608828874203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168160 + }, + { + "epoch": 0.8155941089245162, + "grad_norm": 2.374992114084762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168170 + }, + { + "epoch": 0.8156426071173523, + "grad_norm": 2.0006494239055428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168180 + }, + { + "epoch": 0.8156911053101884, + "grad_norm": 2.0352532104084275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168190 + }, + { + "epoch": 0.8157396035030244, + "grad_norm": 2.163493206808198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168200 + }, + { + "epoch": 0.8157881016958606, + "grad_norm": 1.9551004371010094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168210 + }, + { + "epoch": 0.8158365998886966, + "grad_norm": 1.90061264504493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168220 + }, + { + "epoch": 0.8158850980815328, + "grad_norm": 2.2567954616192765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168230 + }, + { + "epoch": 0.8159335962743688, + "grad_norm": 2.7089194887253143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168240 + }, + { + "epoch": 0.8159820944672049, + "grad_norm": 2.284599887047989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168250 + }, + { + "epoch": 0.816030592660041, + "grad_norm": 2.4010397225993074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168260 + }, + { + "epoch": 0.8160790908528771, + "grad_norm": 2.317902136894645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168270 + }, + { + "epoch": 0.8161275890457131, + "grad_norm": 1.9595791656001893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168280 + }, + { + "epoch": 0.8161760872385493, + "grad_norm": 2.290049572195585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168290 + }, + { + "epoch": 0.8162245854313853, + "grad_norm": 2.4143384180774774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168300 + }, + { + "epoch": 0.8162730836242215, + "grad_norm": 1.6334682939600498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168310 + }, + { + "epoch": 0.8163215818170575, + "grad_norm": 1.9044344767848997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168320 + }, + { + "epoch": 0.8163700800098936, + "grad_norm": 2.218370376283474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168330 + }, + { + "epoch": 0.8164185782027297, + "grad_norm": 2.1251766568752828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168340 + }, + { + "epoch": 0.8164670763955658, + "grad_norm": 2.1098912839079276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168350 + }, + { + "epoch": 0.8165155745884018, + "grad_norm": 2.2240710606524772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168360 + }, + { + "epoch": 0.816564072781238, + "grad_norm": 1.9294770225997127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168370 + }, + { + "epoch": 0.816612570974074, + "grad_norm": 2.157830003568506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168380 + }, + { + "epoch": 0.8166610691669102, + "grad_norm": 2.328008186225361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168390 + }, + { + "epoch": 0.8167095673597462, + "grad_norm": 2.6511775885751376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168400 + }, + { + "epoch": 0.8167580655525823, + "grad_norm": 3.4469479714971385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168410 + }, + { + "epoch": 0.8168065637454185, + "grad_norm": 1.814340500061462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168420 + }, + { + "epoch": 0.8168550619382545, + "grad_norm": 2.120528996840676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168430 + }, + { + "epoch": 0.8169035601310907, + "grad_norm": 2.9081816066423016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168440 + }, + { + "epoch": 0.8169520583239267, + "grad_norm": 1.9689093022634552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168450 + }, + { + "epoch": 0.8170005565167628, + "grad_norm": 2.365706741613849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168460 + }, + { + "epoch": 0.8170490547095989, + "grad_norm": 1.968768081894723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168470 + }, + { + "epoch": 0.817097552902435, + "grad_norm": 2.1712107667326563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168480 + }, + { + "epoch": 0.817146051095271, + "grad_norm": 2.5141421389207608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168490 + }, + { + "epoch": 0.8171945492881072, + "grad_norm": 2.1721898946225338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168500 + }, + { + "epoch": 0.8172430474809432, + "grad_norm": 2.0731535599338713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168510 + }, + { + "epoch": 0.8172915456737794, + "grad_norm": 2.459872128213192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168520 + }, + { + "epoch": 0.8173400438666154, + "grad_norm": 2.673659871277323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168530 + }, + { + "epoch": 0.8173885420594516, + "grad_norm": 2.5470999531762573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168540 + }, + { + "epoch": 0.8174370402522876, + "grad_norm": 1.946897576488027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168550 + }, + { + "epoch": 0.8174855384451237, + "grad_norm": 2.7348882269961905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168560 + }, + { + "epoch": 0.8175340366379598, + "grad_norm": 1.7644055105847656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168570 + }, + { + "epoch": 0.8175825348307959, + "grad_norm": 2.3525032588622707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168580 + }, + { + "epoch": 0.8176310330236319, + "grad_norm": 2.3869679566246305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168590 + }, + { + "epoch": 0.8176795312164681, + "grad_norm": 2.4825233424508042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168600 + }, + { + "epoch": 0.8177280294093041, + "grad_norm": 1.9754974545094228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168610 + }, + { + "epoch": 0.8177765276021403, + "grad_norm": 2.650113017921285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168620 + }, + { + "epoch": 0.8178250257949763, + "grad_norm": 2.1186028931197143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168630 + }, + { + "epoch": 0.8178735239878124, + "grad_norm": 4.1239104575652163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168640 + }, + { + "epoch": 0.8179220221806485, + "grad_norm": 2.2568299229419608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168650 + }, + { + "epoch": 0.8179705203734846, + "grad_norm": 2.0686297119709707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168660 + }, + { + "epoch": 0.8180190185663206, + "grad_norm": 2.1953855622314222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168670 + }, + { + "epoch": 0.8180675167591568, + "grad_norm": 1.6910735922692766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168680 + }, + { + "epoch": 0.8181160149519928, + "grad_norm": 2.169929658180081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168690 + }, + { + "epoch": 0.818164513144829, + "grad_norm": 3.546292148826069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168700 + }, + { + "epoch": 0.818213011337665, + "grad_norm": 2.085603156842808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168710 + }, + { + "epoch": 0.8182615095305011, + "grad_norm": 1.9132793127596415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168720 + }, + { + "epoch": 0.8183100077233372, + "grad_norm": 2.3488928135861897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168730 + }, + { + "epoch": 0.8183585059161733, + "grad_norm": 2.181578473425816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168740 + }, + { + "epoch": 0.8184070041090094, + "grad_norm": 2.3519106662206468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168750 + }, + { + "epoch": 0.8184555023018455, + "grad_norm": 2.0662925592773718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168760 + }, + { + "epoch": 0.8185040004946815, + "grad_norm": 2.0937854117164534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168770 + }, + { + "epoch": 0.8185524986875177, + "grad_norm": 2.4445569124509348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168780 + }, + { + "epoch": 0.8186009968803537, + "grad_norm": 2.4790887565018238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168790 + }, + { + "epoch": 0.8186494950731898, + "grad_norm": 2.1146874473743082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168800 + }, + { + "epoch": 0.8186979932660259, + "grad_norm": 1.710655794795457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168810 + }, + { + "epoch": 0.818746491458862, + "grad_norm": 2.0613347473386057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168820 + }, + { + "epoch": 0.8187949896516981, + "grad_norm": 2.1526629367940586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168830 + }, + { + "epoch": 0.8188434878445342, + "grad_norm": 2.451317904217376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168840 + }, + { + "epoch": 0.8188919860373702, + "grad_norm": 2.218100902950937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168850 + }, + { + "epoch": 0.8189404842302064, + "grad_norm": 1.7863859724798203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168860 + }, + { + "epoch": 0.8189889824230424, + "grad_norm": 1.9287371699761025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168870 + }, + { + "epoch": 0.8190374806158786, + "grad_norm": 1.7738619462193128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168880 + }, + { + "epoch": 0.8190859788087146, + "grad_norm": 1.9208140855653255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168890 + }, + { + "epoch": 0.8191344770015507, + "grad_norm": 2.044240865473057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168900 + }, + { + "epoch": 0.8191829751943868, + "grad_norm": 1.7609702140930494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168910 + }, + { + "epoch": 0.8192314733872229, + "grad_norm": 2.147899458293523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168920 + }, + { + "epoch": 0.819279971580059, + "grad_norm": 2.076981076015727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168930 + }, + { + "epoch": 0.8193284697728951, + "grad_norm": 2.3996683751192904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168940 + }, + { + "epoch": 0.8193769679657312, + "grad_norm": 2.5223121369322143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168950 + }, + { + "epoch": 0.8194254661585673, + "grad_norm": 2.4243721696848297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168960 + }, + { + "epoch": 0.8194739643514034, + "grad_norm": 5.181614426419401e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168970 + }, + { + "epoch": 0.8195224625442394, + "grad_norm": 3.154877958877478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168980 + }, + { + "epoch": 0.8195709607370756, + "grad_norm": 2.1752967427346448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 168990 + }, + { + "epoch": 0.8196194589299116, + "grad_norm": 2.6853864909526237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169000 + }, + { + "epoch": 0.8196679571227478, + "grad_norm": 2.9207363638761308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169010 + }, + { + "epoch": 0.8197164553155838, + "grad_norm": 2.336804882929755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169020 + }, + { + "epoch": 0.8197649535084199, + "grad_norm": 2.2613678041238927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169030 + }, + { + "epoch": 0.819813451701256, + "grad_norm": 2.4702243806018487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169040 + }, + { + "epoch": 0.8198619498940921, + "grad_norm": 2.599753656795656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169050 + }, + { + "epoch": 0.8199104480869281, + "grad_norm": 1.7100438398642837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169060 + }, + { + "epoch": 0.8199589462797643, + "grad_norm": 2.627325201842723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169070 + }, + { + "epoch": 0.8200074444726003, + "grad_norm": 1.992527920435805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169080 + }, + { + "epoch": 0.8200559426654365, + "grad_norm": 3.838339779349553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169090 + }, + { + "epoch": 0.8201044408582725, + "grad_norm": 2.463556647569476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169100 + }, + { + "epoch": 0.8201529390511086, + "grad_norm": 1.790773929144507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169110 + }, + { + "epoch": 0.8202014372439447, + "grad_norm": 2.0741893536069256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169120 + }, + { + "epoch": 0.8202499354367808, + "grad_norm": 2.3887576361403262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169130 + }, + { + "epoch": 0.8202984336296169, + "grad_norm": 1.981230823844271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169140 + }, + { + "epoch": 0.820346931822453, + "grad_norm": 2.2364773144545325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169150 + }, + { + "epoch": 0.820395430015289, + "grad_norm": 2.215659655746549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169160 + }, + { + "epoch": 0.8204439282081252, + "grad_norm": 1.8170700499808845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169170 + }, + { + "epoch": 0.8204924264009612, + "grad_norm": 1.696074392043556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169180 + }, + { + "epoch": 0.8205409245937973, + "grad_norm": 2.722168979119033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169190 + }, + { + "epoch": 0.8205894227866334, + "grad_norm": 3.4441818286268244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169200 + }, + { + "epoch": 0.8206379209794695, + "grad_norm": 2.13498836387771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169210 + }, + { + "epoch": 0.8206864191723056, + "grad_norm": 2.0844540316034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169220 + }, + { + "epoch": 0.8207349173651417, + "grad_norm": 1.902641777462577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169230 + }, + { + "epoch": 0.8207834155579777, + "grad_norm": 2.3537014115504462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169240 + }, + { + "epoch": 0.8208319137508139, + "grad_norm": 2.1847519349194044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169250 + }, + { + "epoch": 0.8208804119436499, + "grad_norm": 1.747482869518535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169260 + }, + { + "epoch": 0.8209289101364861, + "grad_norm": 1.9307394794054744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169270 + }, + { + "epoch": 0.8209774083293221, + "grad_norm": 1.6629343235763372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169280 + }, + { + "epoch": 0.8210259065221582, + "grad_norm": 2.263363008125907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169290 + }, + { + "epoch": 0.8210744047149943, + "grad_norm": 2.638194551707329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169300 + }, + { + "epoch": 0.8211229029078304, + "grad_norm": 2.249034203316569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169310 + }, + { + "epoch": 0.8211714011006664, + "grad_norm": 2.552444122727593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169320 + }, + { + "epoch": 0.8212198992935026, + "grad_norm": 2.2247755637749833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169330 + }, + { + "epoch": 0.8212683974863386, + "grad_norm": 2.0982307447070525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169340 + }, + { + "epoch": 0.8213168956791748, + "grad_norm": 2.4079444216340562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169350 + }, + { + "epoch": 0.8213653938720108, + "grad_norm": 1.7542799213288163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169360 + }, + { + "epoch": 0.8214138920648469, + "grad_norm": 2.6141904641008296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169370 + }, + { + "epoch": 0.821462390257683, + "grad_norm": 2.481338867710292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169380 + }, + { + "epoch": 0.8215108884505191, + "grad_norm": 2.4940908005532947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169390 + }, + { + "epoch": 0.8215593866433551, + "grad_norm": 2.294518708367832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169400 + }, + { + "epoch": 0.8216078848361913, + "grad_norm": 2.601269777358084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169410 + }, + { + "epoch": 0.8216563830290273, + "grad_norm": 2.1812544659383093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169420 + }, + { + "epoch": 0.8217048812218635, + "grad_norm": 2.1261421068174968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169430 + }, + { + "epoch": 0.8217533794146996, + "grad_norm": 2.333653270625291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169440 + }, + { + "epoch": 0.8218018776075356, + "grad_norm": 1.9425815622753362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169450 + }, + { + "epoch": 0.8218503758003718, + "grad_norm": 1.885421774261431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169460 + }, + { + "epoch": 0.8218988739932078, + "grad_norm": 1.9344744472959974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169470 + }, + { + "epoch": 0.821947372186044, + "grad_norm": 2.0082715934677253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169480 + }, + { + "epoch": 0.82199587037888, + "grad_norm": 2.2381295039508586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169490 + }, + { + "epoch": 0.8220443685717161, + "grad_norm": 2.427481149425148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169500 + }, + { + "epoch": 0.8220928667645522, + "grad_norm": 1.8674086277314927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169510 + }, + { + "epoch": 0.8221413649573883, + "grad_norm": 1.736891697134979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169520 + }, + { + "epoch": 0.8221898631502244, + "grad_norm": 1.930758308787972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169530 + }, + { + "epoch": 0.8222383613430605, + "grad_norm": 2.184110847736065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169540 + }, + { + "epoch": 0.8222868595358965, + "grad_norm": 2.323055525721429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169550 + }, + { + "epoch": 0.8223353577287327, + "grad_norm": 2.3167018525782623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169560 + }, + { + "epoch": 0.8223838559215687, + "grad_norm": 2.6162133792695386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169570 + }, + { + "epoch": 0.8224323541144049, + "grad_norm": 1.928060378020291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169580 + }, + { + "epoch": 0.8224808523072409, + "grad_norm": 2.1627831969794897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169590 + }, + { + "epoch": 0.822529350500077, + "grad_norm": 2.158680345587527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169600 + }, + { + "epoch": 0.8225778486929131, + "grad_norm": 1.9115782734502318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169610 + }, + { + "epoch": 0.8226263468857492, + "grad_norm": 1.6132165825410993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169620 + }, + { + "epoch": 0.8226748450785852, + "grad_norm": 1.9517148786007965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169630 + }, + { + "epoch": 0.8227233432714214, + "grad_norm": 2.7776371425147772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169640 + }, + { + "epoch": 0.8227718414642574, + "grad_norm": 1.98805256701462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169650 + }, + { + "epoch": 0.8228203396570936, + "grad_norm": 2.3581572250463978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169660 + }, + { + "epoch": 0.8228688378499296, + "grad_norm": 2.0384762322578354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169670 + }, + { + "epoch": 0.8229173360427657, + "grad_norm": 2.079582017699977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169680 + }, + { + "epoch": 0.8229658342356018, + "grad_norm": 2.03685743827009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169690 + }, + { + "epoch": 0.8230143324284379, + "grad_norm": 2.1140976969036274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169700 + }, + { + "epoch": 0.8230628306212739, + "grad_norm": 1.7643932537225737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169710 + }, + { + "epoch": 0.8231113288141101, + "grad_norm": 2.6525729168724865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169720 + }, + { + "epoch": 0.8231598270069461, + "grad_norm": 2.0139392375995158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169730 + }, + { + "epoch": 0.8232083251997823, + "grad_norm": 2.32773853525714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169740 + }, + { + "epoch": 0.8232568233926183, + "grad_norm": 2.2395459708945964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169750 + }, + { + "epoch": 0.8233053215854544, + "grad_norm": 2.240506624673344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169760 + }, + { + "epoch": 0.8233538197782905, + "grad_norm": 2.0242767462264055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169770 + }, + { + "epoch": 0.8234023179711266, + "grad_norm": 1.7690354070509784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169780 + }, + { + "epoch": 0.8234508161639627, + "grad_norm": 2.3698392581650296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169790 + }, + { + "epoch": 0.8234993143567988, + "grad_norm": 1.875593902411765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169800 + }, + { + "epoch": 0.8235478125496348, + "grad_norm": 2.86053083442539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169810 + }, + { + "epoch": 0.823596310742471, + "grad_norm": 1.9278447282999878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169820 + }, + { + "epoch": 0.823644808935307, + "grad_norm": 1.578978547911447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169830 + }, + { + "epoch": 0.8236933071281431, + "grad_norm": 2.605425564183861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169840 + }, + { + "epoch": 0.8237418053209792, + "grad_norm": 1.7431524668154452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169850 + }, + { + "epoch": 0.8237903035138153, + "grad_norm": 2.711669111477022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169860 + }, + { + "epoch": 0.8238388017066514, + "grad_norm": 2.4614401183953305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169870 + }, + { + "epoch": 0.8238872998994875, + "grad_norm": 1.7273571017994982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169880 + }, + { + "epoch": 0.8239357980923235, + "grad_norm": 2.272053478691305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169890 + }, + { + "epoch": 0.8239842962851597, + "grad_norm": 1.812291472447214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169900 + }, + { + "epoch": 0.8240327944779957, + "grad_norm": 1.7343408487136003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169910 + }, + { + "epoch": 0.8240812926708319, + "grad_norm": 2.7845839412066198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169920 + }, + { + "epoch": 0.8241297908636679, + "grad_norm": 2.0478283957459098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169930 + }, + { + "epoch": 0.824178289056504, + "grad_norm": 2.2826286638633064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169940 + }, + { + "epoch": 0.8242267872493402, + "grad_norm": 2.3232589185795405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169950 + }, + { + "epoch": 0.8242752854421762, + "grad_norm": 2.0215841445292426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169960 + }, + { + "epoch": 0.8243237836350124, + "grad_norm": 2.2787013165270764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169970 + }, + { + "epoch": 0.8243722818278484, + "grad_norm": 1.8374654686681424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169980 + }, + { + "epoch": 0.8244207800206845, + "grad_norm": 1.912661318215214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 169990 + }, + { + "epoch": 0.8244692782135206, + "grad_norm": 2.2377420805241854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170000 + }, + { + "epoch": 0.8245177764063567, + "grad_norm": 2.124091658117777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170010 + }, + { + "epoch": 0.8245662745991927, + "grad_norm": 2.496904016879853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170020 + }, + { + "epoch": 0.8246147727920289, + "grad_norm": 2.4604361215097015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170030 + }, + { + "epoch": 0.8246632709848649, + "grad_norm": 2.503517571028624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170040 + }, + { + "epoch": 0.8247117691777011, + "grad_norm": 2.134147614185622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170050 + }, + { + "epoch": 0.8247602673705371, + "grad_norm": 1.4804331982531949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170060 + }, + { + "epoch": 0.8248087655633732, + "grad_norm": 2.0967164005014638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170070 + }, + { + "epoch": 0.8248572637562093, + "grad_norm": 2.7606347430264577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170080 + }, + { + "epoch": 0.8249057619490454, + "grad_norm": 2.1813251649405174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170090 + }, + { + "epoch": 0.8249542601418814, + "grad_norm": 2.473354321352872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170100 + }, + { + "epoch": 0.8250027583347176, + "grad_norm": 1.7976015342924256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170110 + }, + { + "epoch": 0.8250512565275536, + "grad_norm": 2.5608066778204375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170120 + }, + { + "epoch": 0.8250997547203898, + "grad_norm": 2.6327059643449502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170130 + }, + { + "epoch": 0.8251482529132258, + "grad_norm": 2.195632120560731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170140 + }, + { + "epoch": 0.8251967511060619, + "grad_norm": 2.1074267664289437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170150 + }, + { + "epoch": 0.825245249298898, + "grad_norm": 2.077324978699835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170160 + }, + { + "epoch": 0.8252937474917341, + "grad_norm": 1.7863476031720893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170170 + }, + { + "epoch": 0.8253422456845702, + "grad_norm": 1.8066453222331802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170180 + }, + { + "epoch": 0.8253907438774063, + "grad_norm": 1.901839219442536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170190 + }, + { + "epoch": 0.8254392420702423, + "grad_norm": 2.6973298261623313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170200 + }, + { + "epoch": 0.8254877402630785, + "grad_norm": 2.2502558039150244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170210 + }, + { + "epoch": 0.8255362384559145, + "grad_norm": 2.1075731382325102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170220 + }, + { + "epoch": 0.8255847366487507, + "grad_norm": 1.4886877508502039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170230 + }, + { + "epoch": 0.8256332348415867, + "grad_norm": 2.8520604544723938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170240 + }, + { + "epoch": 0.8256817330344228, + "grad_norm": 1.790652248701008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170250 + }, + { + "epoch": 0.8257302312272589, + "grad_norm": 2.443931634843466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170260 + }, + { + "epoch": 0.825778729420095, + "grad_norm": 2.0668361244702282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170270 + }, + { + "epoch": 0.825827227612931, + "grad_norm": 1.8423492065267055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170280 + }, + { + "epoch": 0.8258757258057672, + "grad_norm": 2.1875605327181802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170290 + }, + { + "epoch": 0.8259242239986032, + "grad_norm": 1.9556798847020218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170300 + }, + { + "epoch": 0.8259727221914394, + "grad_norm": 1.7372483895883306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170310 + }, + { + "epoch": 0.8260212203842754, + "grad_norm": 2.0796784738763563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170320 + }, + { + "epoch": 0.8260697185771115, + "grad_norm": 1.995332254978166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170330 + }, + { + "epoch": 0.8261182167699476, + "grad_norm": 3.003500737008835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170340 + }, + { + "epoch": 0.8261667149627837, + "grad_norm": 2.143699084911077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170350 + }, + { + "epoch": 0.8262152131556197, + "grad_norm": 1.595067899984315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170360 + }, + { + "epoch": 0.8262637113484559, + "grad_norm": 2.1893075796697303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170370 + }, + { + "epoch": 0.8263122095412919, + "grad_norm": 2.0146353918448767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170380 + }, + { + "epoch": 0.8263607077341281, + "grad_norm": 2.3530022019713215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170390 + }, + { + "epoch": 0.8264092059269641, + "grad_norm": 2.9187507522010492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170400 + }, + { + "epoch": 0.8264577041198002, + "grad_norm": 2.1163877761409822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170410 + }, + { + "epoch": 0.8265062023126363, + "grad_norm": 1.7999067125629153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170420 + }, + { + "epoch": 0.8265547005054724, + "grad_norm": 2.751259842170839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170430 + }, + { + "epoch": 0.8266031986983085, + "grad_norm": 1.958134454582705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170440 + }, + { + "epoch": 0.8266516968911446, + "grad_norm": 2.790086206516662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170450 + }, + { + "epoch": 0.8267001950839807, + "grad_norm": 1.5085332094599835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170460 + }, + { + "epoch": 0.8267486932768168, + "grad_norm": 2.2104567065639458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170470 + }, + { + "epoch": 0.8267971914696529, + "grad_norm": 3.0055403499318345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170480 + }, + { + "epoch": 0.826845689662489, + "grad_norm": 2.1630626179103274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170490 + }, + { + "epoch": 0.8268941878553251, + "grad_norm": 2.264790488482049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170500 + }, + { + "epoch": 0.8269426860481611, + "grad_norm": 1.7249627504156706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170510 + }, + { + "epoch": 0.8269911842409973, + "grad_norm": 2.205731597371141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170520 + }, + { + "epoch": 0.8270396824338333, + "grad_norm": 2.5280666449134515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170530 + }, + { + "epoch": 0.8270881806266694, + "grad_norm": 2.4058055103637344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170540 + }, + { + "epoch": 0.8271366788195055, + "grad_norm": 2.2238628716308995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170550 + }, + { + "epoch": 0.8271851770123416, + "grad_norm": 1.660743365050621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170560 + }, + { + "epoch": 0.8272336752051777, + "grad_norm": 1.9820847185769708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170570 + }, + { + "epoch": 0.8272821733980138, + "grad_norm": 1.597860510571536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170580 + }, + { + "epoch": 0.8273306715908498, + "grad_norm": 3.8520646228334954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170590 + }, + { + "epoch": 0.827379169783686, + "grad_norm": 2.401322163336772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170600 + }, + { + "epoch": 0.827427667976522, + "grad_norm": 2.211255711870308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170610 + }, + { + "epoch": 0.8274761661693582, + "grad_norm": 1.591625853336609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170620 + }, + { + "epoch": 0.8275246643621942, + "grad_norm": 1.6475587116815404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170630 + }, + { + "epoch": 0.8275731625550303, + "grad_norm": 2.3692498629657166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170640 + }, + { + "epoch": 0.8276216607478664, + "grad_norm": 3.548003846276515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170650 + }, + { + "epoch": 0.8276701589407025, + "grad_norm": 1.9822332220087446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170660 + }, + { + "epoch": 0.8277186571335385, + "grad_norm": 1.394705684276687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170670 + }, + { + "epoch": 0.8277671553263747, + "grad_norm": 2.4915404850389677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170680 + }, + { + "epoch": 0.8278156535192107, + "grad_norm": 2.2360358897799415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170690 + }, + { + "epoch": 0.8278641517120469, + "grad_norm": 3.0274993179091325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170700 + }, + { + "epoch": 0.8279126499048829, + "grad_norm": 2.208844307460822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170710 + }, + { + "epoch": 0.827961148097719, + "grad_norm": 1.6696560578566277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170720 + }, + { + "epoch": 0.8280096462905551, + "grad_norm": 2.2292622858799405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170730 + }, + { + "epoch": 0.8280581444833912, + "grad_norm": 1.7792201489896797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170740 + }, + { + "epoch": 0.8281066426762272, + "grad_norm": 1.8010378965982454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170750 + }, + { + "epoch": 0.8281551408690634, + "grad_norm": 2.359699813325733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170760 + }, + { + "epoch": 0.8282036390618994, + "grad_norm": 2.313187330571509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170770 + }, + { + "epoch": 0.8282521372547356, + "grad_norm": 1.859550025073986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170780 + }, + { + "epoch": 0.8283006354475716, + "grad_norm": 2.0471865980198345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170790 + }, + { + "epoch": 0.8283491336404077, + "grad_norm": 2.067239179837088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170800 + }, + { + "epoch": 0.8283976318332438, + "grad_norm": 1.9246684246354562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170810 + }, + { + "epoch": 0.8284461300260799, + "grad_norm": 1.7943493801908517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170820 + }, + { + "epoch": 0.828494628218916, + "grad_norm": 2.3416049543811823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170830 + }, + { + "epoch": 0.8285431264117521, + "grad_norm": 2.0373381204308316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170840 + }, + { + "epoch": 0.8285916246045881, + "grad_norm": 1.9887842483967688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170850 + }, + { + "epoch": 0.8286401227974243, + "grad_norm": 1.7356880377406014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170860 + }, + { + "epoch": 0.8286886209902603, + "grad_norm": 2.1406648897936975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170870 + }, + { + "epoch": 0.8287371191830964, + "grad_norm": 1.8391274281270853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170880 + }, + { + "epoch": 0.8287856173759325, + "grad_norm": 2.2647274278142504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170890 + }, + { + "epoch": 0.8288341155687686, + "grad_norm": 2.1683609574552065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170900 + }, + { + "epoch": 0.8288826137616047, + "grad_norm": 1.6198022478874918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170910 + }, + { + "epoch": 0.8289311119544408, + "grad_norm": 1.9857344213392025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170920 + }, + { + "epoch": 0.8289796101472768, + "grad_norm": 1.9146586538454358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170930 + }, + { + "epoch": 0.829028108340113, + "grad_norm": 1.8974784410374923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170940 + }, + { + "epoch": 0.829076606532949, + "grad_norm": 2.0814995949081094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170950 + }, + { + "epoch": 0.8291251047257852, + "grad_norm": 1.7922113570989495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170960 + }, + { + "epoch": 0.8291736029186213, + "grad_norm": 2.1840435238118516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170970 + }, + { + "epoch": 0.8292221011114573, + "grad_norm": 1.768823665315722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170980 + }, + { + "epoch": 0.8292705993042935, + "grad_norm": 1.993433862423899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 170990 + }, + { + "epoch": 0.8293190974971295, + "grad_norm": 2.9632042597427244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171000 + }, + { + "epoch": 0.8293675956899657, + "grad_norm": 2.8510866556530345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171010 + }, + { + "epoch": 0.8294160938828017, + "grad_norm": 2.4026867606607993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171020 + }, + { + "epoch": 0.8294645920756378, + "grad_norm": 2.2276239519669616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171030 + }, + { + "epoch": 0.8295130902684739, + "grad_norm": 2.6788178786318895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171040 + }, + { + "epoch": 0.82956158846131, + "grad_norm": 2.019470990433092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171050 + }, + { + "epoch": 0.829610086654146, + "grad_norm": 2.1194530575030512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171060 + }, + { + "epoch": 0.8296585848469822, + "grad_norm": 1.595304865986691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171070 + }, + { + "epoch": 0.8297070830398182, + "grad_norm": 1.5510051909473077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171080 + }, + { + "epoch": 0.8297555812326544, + "grad_norm": 2.7089376075650762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171090 + }, + { + "epoch": 0.8298040794254904, + "grad_norm": 1.933228510608842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171100 + }, + { + "epoch": 0.8298525776183265, + "grad_norm": 1.6251965107016986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171110 + }, + { + "epoch": 0.8299010758111626, + "grad_norm": 2.3340138710636893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171120 + }, + { + "epoch": 0.8299495740039987, + "grad_norm": 2.1561280760806767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171130 + }, + { + "epoch": 0.8299980721968347, + "grad_norm": 1.8795706324681305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171140 + }, + { + "epoch": 0.8300465703896709, + "grad_norm": 1.9224314584675994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171150 + }, + { + "epoch": 0.8300950685825069, + "grad_norm": 1.695610230001421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171160 + }, + { + "epoch": 0.8301435667753431, + "grad_norm": 2.7121178192146544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171170 + }, + { + "epoch": 0.8301920649681791, + "grad_norm": 2.048556346778696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171180 + }, + { + "epoch": 0.8302405631610152, + "grad_norm": 1.9035901743791328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171190 + }, + { + "epoch": 0.8302890613538513, + "grad_norm": 1.7320276768373333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171200 + }, + { + "epoch": 0.8303375595466874, + "grad_norm": 1.8080632102623895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171210 + }, + { + "epoch": 0.8303860577395235, + "grad_norm": 1.8644952248791924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171220 + }, + { + "epoch": 0.8304345559323596, + "grad_norm": 1.8217088282312943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171230 + }, + { + "epoch": 0.8304830541251956, + "grad_norm": 2.1231189251125215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171240 + }, + { + "epoch": 0.8305315523180318, + "grad_norm": 2.425884737533579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171250 + }, + { + "epoch": 0.8305800505108678, + "grad_norm": 7.856682060491949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171260 + }, + { + "epoch": 0.830628548703704, + "grad_norm": 2.5170034945176667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171270 + }, + { + "epoch": 0.83067704689654, + "grad_norm": 1.81508159613486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171280 + }, + { + "epoch": 0.8307255450893761, + "grad_norm": 1.9342749624229327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171290 + }, + { + "epoch": 0.8307740432822122, + "grad_norm": 2.6190322799379828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171300 + }, + { + "epoch": 0.8308225414750483, + "grad_norm": 1.997915255458338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171310 + }, + { + "epoch": 0.8308710396678843, + "grad_norm": 1.634436586073207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171320 + }, + { + "epoch": 0.8309195378607205, + "grad_norm": 1.660681370196926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171330 + }, + { + "epoch": 0.8309680360535565, + "grad_norm": 2.7922146372816314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171340 + }, + { + "epoch": 0.8310165342463927, + "grad_norm": 2.0103179565467144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171350 + }, + { + "epoch": 0.8310650324392287, + "grad_norm": 2.4331775705377368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171360 + }, + { + "epoch": 0.8311135306320648, + "grad_norm": 2.8170665800075767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171370 + }, + { + "epoch": 0.8311620288249009, + "grad_norm": 1.9209981161338874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171380 + }, + { + "epoch": 0.831210527017737, + "grad_norm": 1.922033199264206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171390 + }, + { + "epoch": 0.831259025210573, + "grad_norm": 1.949373995557835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171400 + }, + { + "epoch": 0.8313075234034092, + "grad_norm": 2.1359712221169502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171410 + }, + { + "epoch": 0.8313560215962452, + "grad_norm": 2.256719788817918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171420 + }, + { + "epoch": 0.8314045197890814, + "grad_norm": 1.6572325733932303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171430 + }, + { + "epoch": 0.8314530179819174, + "grad_norm": 1.9315326227342666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171440 + }, + { + "epoch": 0.8315015161747535, + "grad_norm": 1.7900113391533523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171450 + }, + { + "epoch": 0.8315500143675896, + "grad_norm": 2.276483890284453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171460 + }, + { + "epoch": 0.8315985125604257, + "grad_norm": 1.597792120833219e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171470 + }, + { + "epoch": 0.8316470107532619, + "grad_norm": 2.0922509946785794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171480 + }, + { + "epoch": 0.8316955089460979, + "grad_norm": 1.942259153508985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171490 + }, + { + "epoch": 0.831744007138934, + "grad_norm": 1.8405314605729473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171500 + }, + { + "epoch": 0.8317925053317701, + "grad_norm": 2.1435859309804073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171510 + }, + { + "epoch": 0.8318410035246062, + "grad_norm": 2.1207176459370203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171520 + }, + { + "epoch": 0.8318895017174422, + "grad_norm": 1.697475582318475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171530 + }, + { + "epoch": 0.8319379999102784, + "grad_norm": 1.9023675079665736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171540 + }, + { + "epoch": 0.8319864981031144, + "grad_norm": 1.9380676619107362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171550 + }, + { + "epoch": 0.8320349962959506, + "grad_norm": 1.6807703673293872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171560 + }, + { + "epoch": 0.8320834944887866, + "grad_norm": 1.739563515457121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171570 + }, + { + "epoch": 0.8321319926816227, + "grad_norm": 1.805167570978483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171580 + }, + { + "epoch": 0.8321804908744588, + "grad_norm": 2.4752475624723047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171590 + }, + { + "epoch": 0.8322289890672949, + "grad_norm": 2.326740045077713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171600 + }, + { + "epoch": 0.832277487260131, + "grad_norm": 1.523254766766513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171610 + }, + { + "epoch": 0.8323259854529671, + "grad_norm": 1.6828197502150033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171620 + }, + { + "epoch": 0.8323744836458031, + "grad_norm": 1.7147000264117196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171630 + }, + { + "epoch": 0.8324229818386393, + "grad_norm": 2.101155871514493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171640 + }, + { + "epoch": 0.8324714800314753, + "grad_norm": 2.2269132315955176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171650 + }, + { + "epoch": 0.8325199782243115, + "grad_norm": 1.889645417918473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171660 + }, + { + "epoch": 0.8325684764171475, + "grad_norm": 1.6715290485080914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171670 + }, + { + "epoch": 0.8326169746099836, + "grad_norm": 1.7109440975104917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171680 + }, + { + "epoch": 0.8326654728028197, + "grad_norm": 2.737451332279761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171690 + }, + { + "epoch": 0.8327139709956558, + "grad_norm": 1.73072187692469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171700 + }, + { + "epoch": 0.8327624691884918, + "grad_norm": 1.70112137709566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171710 + }, + { + "epoch": 0.832810967381328, + "grad_norm": 1.5983268042418786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171720 + }, + { + "epoch": 0.832859465574164, + "grad_norm": 1.8901095799606082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171730 + }, + { + "epoch": 0.8329079637670002, + "grad_norm": 2.36216131099809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171740 + }, + { + "epoch": 0.8329564619598362, + "grad_norm": 1.963140050520451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171750 + }, + { + "epoch": 0.8330049601526723, + "grad_norm": 2.429645995505325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171760 + }, + { + "epoch": 0.8330534583455084, + "grad_norm": 1.5876416625815182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171770 + }, + { + "epoch": 0.8331019565383445, + "grad_norm": 1.949121930522324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171780 + }, + { + "epoch": 0.8331504547311805, + "grad_norm": 2.5159994976320377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171790 + }, + { + "epoch": 0.8331989529240167, + "grad_norm": 2.3448359698363674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171800 + }, + { + "epoch": 0.8332474511168527, + "grad_norm": 1.6122452706213153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171810 + }, + { + "epoch": 0.8332959493096889, + "grad_norm": 2.3889642264407485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171820 + }, + { + "epoch": 0.8333444475025249, + "grad_norm": 1.6003676606146655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171830 + }, + { + "epoch": 0.833392945695361, + "grad_norm": 2.0593835969862084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171840 + }, + { + "epoch": 0.8334414438881971, + "grad_norm": 2.9541352475348504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171850 + }, + { + "epoch": 0.8334899420810332, + "grad_norm": 1.5644497253219924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171860 + }, + { + "epoch": 0.8335384402738693, + "grad_norm": 1.7728829959651193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171870 + }, + { + "epoch": 0.8335869384667054, + "grad_norm": 2.1805902861160575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171880 + }, + { + "epoch": 0.8336354366595414, + "grad_norm": 2.3034239404751133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171890 + }, + { + "epoch": 0.8336839348523776, + "grad_norm": 1.7692693532467274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171900 + }, + { + "epoch": 0.8337324330452136, + "grad_norm": 1.881366706868448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171910 + }, + { + "epoch": 0.8337809312380497, + "grad_norm": 1.3976270807347646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171920 + }, + { + "epoch": 0.8338294294308858, + "grad_norm": 1.644429836744621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171930 + }, + { + "epoch": 0.8338779276237219, + "grad_norm": 2.5406901471569654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171940 + }, + { + "epoch": 0.833926425816558, + "grad_norm": 2.0316520021879114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171950 + }, + { + "epoch": 0.8339749240093941, + "grad_norm": 2.5447764784303217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171960 + }, + { + "epoch": 0.8340234222022301, + "grad_norm": 1.5377189299670135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171970 + }, + { + "epoch": 0.8340719203950663, + "grad_norm": 1.8214688424222913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171980 + }, + { + "epoch": 0.8341204185879024, + "grad_norm": 2.214682659484879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 171990 + }, + { + "epoch": 0.8341689167807385, + "grad_norm": 2.238951424260449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172000 + }, + { + "epoch": 0.8342174149735746, + "grad_norm": 2.0514692167239446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172010 + }, + { + "epoch": 0.8342659131664106, + "grad_norm": 1.54931640850009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172020 + }, + { + "epoch": 0.8343144113592468, + "grad_norm": 3.160507233701537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172030 + }, + { + "epoch": 0.8343629095520828, + "grad_norm": 2.1389892523870913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172040 + }, + { + "epoch": 0.834411407744919, + "grad_norm": 2.583401226274873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172050 + }, + { + "epoch": 0.834459905937755, + "grad_norm": 1.8835017101537233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172060 + }, + { + "epoch": 0.8345084041305911, + "grad_norm": 2.0406107026360587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172070 + }, + { + "epoch": 0.8345569023234272, + "grad_norm": 1.366461344076697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172080 + }, + { + "epoch": 0.8346054005162633, + "grad_norm": 1.9174844823055537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172090 + }, + { + "epoch": 0.8346538987090993, + "grad_norm": 1.9801987605205795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172100 + }, + { + "epoch": 0.8347023969019355, + "grad_norm": 2.290834011375864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172110 + }, + { + "epoch": 0.8347508950947715, + "grad_norm": 1.897257639882355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172120 + }, + { + "epoch": 0.8347993932876077, + "grad_norm": 2.2203625604788613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172130 + }, + { + "epoch": 0.8348478914804437, + "grad_norm": 1.935725535417987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172140 + }, + { + "epoch": 0.8348963896732798, + "grad_norm": 1.8306739235640634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172150 + }, + { + "epoch": 0.8349448878661159, + "grad_norm": 2.23650786779217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172160 + }, + { + "epoch": 0.834993386058952, + "grad_norm": 1.6572851535556765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172170 + }, + { + "epoch": 0.835041884251788, + "grad_norm": 1.3634238626991646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172180 + }, + { + "epoch": 0.8350903824446242, + "grad_norm": 1.9520575378351168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172190 + }, + { + "epoch": 0.8351388806374602, + "grad_norm": 1.699456930737142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172200 + }, + { + "epoch": 0.8351873788302964, + "grad_norm": 2.139286436886323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172210 + }, + { + "epoch": 0.8352358770231324, + "grad_norm": 2.1965361085563018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172220 + }, + { + "epoch": 0.8352843752159685, + "grad_norm": 1.689135409321807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172230 + }, + { + "epoch": 0.8353328734088046, + "grad_norm": 1.9837417042367633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172240 + }, + { + "epoch": 0.8353813716016407, + "grad_norm": 1.9796292605178678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172250 + }, + { + "epoch": 0.8354298697944768, + "grad_norm": 1.5235844585959057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172260 + }, + { + "epoch": 0.8354783679873129, + "grad_norm": 2.231251272633017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172270 + }, + { + "epoch": 0.8355268661801489, + "grad_norm": 1.9030689912824528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172280 + }, + { + "epoch": 0.8355753643729851, + "grad_norm": 2.38105144489964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172290 + }, + { + "epoch": 0.8356238625658211, + "grad_norm": 2.337929849716147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172300 + }, + { + "epoch": 0.8356723607586573, + "grad_norm": 1.9067838863406905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172310 + }, + { + "epoch": 0.8357208589514933, + "grad_norm": 1.955196182734653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172320 + }, + { + "epoch": 0.8357693571443294, + "grad_norm": 1.8869684481614968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172330 + }, + { + "epoch": 0.8358178553371655, + "grad_norm": 2.4811043886074913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172340 + }, + { + "epoch": 0.8358663535300016, + "grad_norm": 2.3322815678739062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172350 + }, + { + "epoch": 0.8359148517228376, + "grad_norm": 1.745288713550508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172360 + }, + { + "epoch": 0.8359633499156738, + "grad_norm": 1.5235281480840968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172370 + }, + { + "epoch": 0.8360118481085098, + "grad_norm": 2.310829216867205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172380 + }, + { + "epoch": 0.836060346301346, + "grad_norm": 2.0408045031672373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172390 + }, + { + "epoch": 0.836108844494182, + "grad_norm": 2.6353554005709157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172400 + }, + { + "epoch": 0.8361573426870181, + "grad_norm": 1.867271492983491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172410 + }, + { + "epoch": 0.8362058408798542, + "grad_norm": 2.0847817694402693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172420 + }, + { + "epoch": 0.8362543390726903, + "grad_norm": 1.801841875703758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172430 + }, + { + "epoch": 0.8363028372655263, + "grad_norm": 1.864274423724055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172440 + }, + { + "epoch": 0.8363513354583625, + "grad_norm": 2.9304104032235045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172450 + }, + { + "epoch": 0.8363998336511985, + "grad_norm": 1.7661969664573007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172460 + }, + { + "epoch": 0.8364483318440347, + "grad_norm": 1.7226341242349008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172470 + }, + { + "epoch": 0.8364968300368707, + "grad_norm": 1.8499731524457275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172480 + }, + { + "epoch": 0.8365453282297068, + "grad_norm": 2.145556088350986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172490 + }, + { + "epoch": 0.836593826422543, + "grad_norm": 2.3128086112933488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172500 + }, + { + "epoch": 0.836642324615379, + "grad_norm": 2.367501394928695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172510 + }, + { + "epoch": 0.8366908228082152, + "grad_norm": 1.8023763814767335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172520 + }, + { + "epoch": 0.8367393210010512, + "grad_norm": 2.2302227620230042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172530 + }, + { + "epoch": 0.8367878191938873, + "grad_norm": 2.3304888685515834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172540 + }, + { + "epoch": 0.8368363173867234, + "grad_norm": 1.6623372900426148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172550 + }, + { + "epoch": 0.8368848155795595, + "grad_norm": 1.775934954650893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172560 + }, + { + "epoch": 0.8369333137723955, + "grad_norm": 1.6672956348884327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172570 + }, + { + "epoch": 0.8369818119652317, + "grad_norm": 1.5055039881417542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172580 + }, + { + "epoch": 0.8370303101580677, + "grad_norm": 1.729948984063867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172590 + }, + { + "epoch": 0.8370788083509039, + "grad_norm": 2.6605970759874253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172600 + }, + { + "epoch": 0.8371273065437399, + "grad_norm": 1.772531454946602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172610 + }, + { + "epoch": 0.837175804736576, + "grad_norm": 1.626335333071438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172620 + }, + { + "epoch": 0.8372243029294121, + "grad_norm": 2.156026646105147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172630 + }, + { + "epoch": 0.8372728011222482, + "grad_norm": 2.085970152165828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172640 + }, + { + "epoch": 0.8373212993150843, + "grad_norm": 1.9690885366685507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172650 + }, + { + "epoch": 0.8373697975079204, + "grad_norm": 1.5273711184704553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172660 + }, + { + "epoch": 0.8374182957007564, + "grad_norm": 1.9994297772996106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172670 + }, + { + "epoch": 0.8374667938935926, + "grad_norm": 1.858768250428966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172680 + }, + { + "epoch": 0.8375152920864286, + "grad_norm": 1.7094347271040533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172690 + }, + { + "epoch": 0.8375637902792648, + "grad_norm": 2.5087794952582954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172700 + }, + { + "epoch": 0.8376122884721008, + "grad_norm": 2.1934308591653462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172710 + }, + { + "epoch": 0.8376607866649369, + "grad_norm": 1.9509801774120206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172720 + }, + { + "epoch": 0.837709284857773, + "grad_norm": 1.7149348607858883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172730 + }, + { + "epoch": 0.8377577830506091, + "grad_norm": 2.167479884462864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172740 + }, + { + "epoch": 0.8378062812434451, + "grad_norm": 1.956271944436594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172750 + }, + { + "epoch": 0.8378547794362813, + "grad_norm": 1.5105083406297126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172760 + }, + { + "epoch": 0.8379032776291173, + "grad_norm": 1.403801519472836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172770 + }, + { + "epoch": 0.8379517758219535, + "grad_norm": 1.576318986451497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172780 + }, + { + "epoch": 0.8380002740147895, + "grad_norm": 1.924429149369189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172790 + }, + { + "epoch": 0.8380487722076256, + "grad_norm": 2.6921611606667284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172800 + }, + { + "epoch": 0.8380972704004617, + "grad_norm": 2.0770446695905775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172810 + }, + { + "epoch": 0.8381457685932978, + "grad_norm": 2.2335623128810767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172820 + }, + { + "epoch": 0.8381942667861338, + "grad_norm": 1.5439686862350754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172830 + }, + { + "epoch": 0.83824276497897, + "grad_norm": 2.2125492549207593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172840 + }, + { + "epoch": 0.838291263171806, + "grad_norm": 3.4989660946394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172850 + }, + { + "epoch": 0.8383397613646422, + "grad_norm": 2.1300062158502442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172860 + }, + { + "epoch": 0.8383882595574782, + "grad_norm": 2.1879115408296457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172870 + }, + { + "epoch": 0.8384367577503143, + "grad_norm": 1.421602036089098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172880 + }, + { + "epoch": 0.8384852559431504, + "grad_norm": 1.7952686448552413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172890 + }, + { + "epoch": 0.8385337541359865, + "grad_norm": 1.8921770816859862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172900 + }, + { + "epoch": 0.8385822523288226, + "grad_norm": 2.2783892106303938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172910 + }, + { + "epoch": 0.8386307505216587, + "grad_norm": 3.008272031479464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172920 + }, + { + "epoch": 0.8386792487144947, + "grad_norm": 1.7228906301625102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172930 + }, + { + "epoch": 0.8387277469073309, + "grad_norm": 1.6354288590036958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172940 + }, + { + "epoch": 0.8387762451001669, + "grad_norm": 1.9606721579634723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172950 + }, + { + "epoch": 0.838824743293003, + "grad_norm": 2.1999307264763956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172960 + }, + { + "epoch": 0.8388732414858391, + "grad_norm": 2.2054409853922152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172970 + }, + { + "epoch": 0.8389217396786752, + "grad_norm": 2.3179186570132515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172980 + }, + { + "epoch": 0.8389702378715113, + "grad_norm": 2.117985076210971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 172990 + }, + { + "epoch": 0.8390187360643474, + "grad_norm": 1.6911352318516037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173000 + }, + { + "epoch": 0.8390672342571834, + "grad_norm": 1.685759087877159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173010 + }, + { + "epoch": 0.8391157324500196, + "grad_norm": 2.0145034085317093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173020 + }, + { + "epoch": 0.8391642306428557, + "grad_norm": 1.9419546859467118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173030 + }, + { + "epoch": 0.8392127288356918, + "grad_norm": 2.050155778476892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173040 + }, + { + "epoch": 0.8392612270285279, + "grad_norm": 1.6267732050323502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173050 + }, + { + "epoch": 0.8393097252213639, + "grad_norm": 1.679514838315299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173060 + }, + { + "epoch": 0.8393582234142001, + "grad_norm": 1.5953050436223748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173070 + }, + { + "epoch": 0.8394067216070361, + "grad_norm": 1.7228540372116186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173080 + }, + { + "epoch": 0.8394552197998723, + "grad_norm": 1.7901745863468932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173090 + }, + { + "epoch": 0.8395037179927083, + "grad_norm": 1.930989590448462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173100 + }, + { + "epoch": 0.8395522161855444, + "grad_norm": 1.8082937813801436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173110 + }, + { + "epoch": 0.8396007143783805, + "grad_norm": 1.7660211071302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173120 + }, + { + "epoch": 0.8396492125712166, + "grad_norm": 1.9089199554400693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173130 + }, + { + "epoch": 0.8396977107640526, + "grad_norm": 1.6798018975805462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173140 + }, + { + "epoch": 0.8397462089568888, + "grad_norm": 2.0035891168390663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173150 + }, + { + "epoch": 0.8397947071497248, + "grad_norm": 1.7387678852287536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173160 + }, + { + "epoch": 0.839843205342561, + "grad_norm": 1.8905829790583084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173170 + }, + { + "epoch": 0.839891703535397, + "grad_norm": 1.8092244147283054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173180 + }, + { + "epoch": 0.8399402017282331, + "grad_norm": 1.6849485362513406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173190 + }, + { + "epoch": 0.8399886999210692, + "grad_norm": 2.52920226984088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173200 + }, + { + "epoch": 0.8400371981139053, + "grad_norm": 1.5592018343113523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173210 + }, + { + "epoch": 0.8400856963067413, + "grad_norm": 1.765079993276686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173220 + }, + { + "epoch": 0.8401341944995775, + "grad_norm": 2.681908206625394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173230 + }, + { + "epoch": 0.8401826926924135, + "grad_norm": 2.040998126062732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173240 + }, + { + "epoch": 0.8402311908852497, + "grad_norm": 2.178447644496373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173250 + }, + { + "epoch": 0.8402796890780857, + "grad_norm": 1.3575884416638928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173260 + }, + { + "epoch": 0.8403281872709218, + "grad_norm": 2.7681112513278094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173270 + }, + { + "epoch": 0.8403766854637579, + "grad_norm": 1.71397012138641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173280 + }, + { + "epoch": 0.840425183656594, + "grad_norm": 2.117469222184809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173290 + }, + { + "epoch": 0.84047368184943, + "grad_norm": 2.278207666961407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173300 + }, + { + "epoch": 0.8405221800422662, + "grad_norm": 2.1744474665297275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173310 + }, + { + "epoch": 0.8405706782351022, + "grad_norm": 1.4763605449275019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173320 + }, + { + "epoch": 0.8406191764279384, + "grad_norm": 1.3746575433515318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173330 + }, + { + "epoch": 0.8406676746207744, + "grad_norm": 1.7500648041846034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173340 + }, + { + "epoch": 0.8407161728136106, + "grad_norm": 1.7163644727702376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173350 + }, + { + "epoch": 0.8407646710064466, + "grad_norm": 1.5228474481432386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173360 + }, + { + "epoch": 0.8408131691992827, + "grad_norm": 2.0379447462914868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173370 + }, + { + "epoch": 0.8408616673921188, + "grad_norm": 2.659259656923041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173380 + }, + { + "epoch": 0.8409101655849549, + "grad_norm": 1.9733482403694325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173390 + }, + { + "epoch": 0.8409586637777909, + "grad_norm": 1.818028216860057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173400 + }, + { + "epoch": 0.8410071619706271, + "grad_norm": 1.8670629486905455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173410 + }, + { + "epoch": 0.8410556601634631, + "grad_norm": 1.5465275282622315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173420 + }, + { + "epoch": 0.8411041583562993, + "grad_norm": 1.785021908062845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173430 + }, + { + "epoch": 0.8411526565491353, + "grad_norm": 2.5270564307788845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173440 + }, + { + "epoch": 0.8412011547419714, + "grad_norm": 2.2944321997897532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173450 + }, + { + "epoch": 0.8412496529348075, + "grad_norm": 1.79777845943363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173460 + }, + { + "epoch": 0.8412981511276436, + "grad_norm": 1.9787385951985925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173470 + }, + { + "epoch": 0.8413466493204796, + "grad_norm": 2.0441069281673663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173480 + }, + { + "epoch": 0.8413951475133158, + "grad_norm": 2.1267499761279396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173490 + }, + { + "epoch": 0.8414436457061518, + "grad_norm": 2.0815170032051356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173500 + }, + { + "epoch": 0.841492143898988, + "grad_norm": 1.758101220161734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173510 + }, + { + "epoch": 0.841540642091824, + "grad_norm": 1.738509070037253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173520 + }, + { + "epoch": 0.8415891402846601, + "grad_norm": 1.770079016694126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173530 + }, + { + "epoch": 0.8416376384774963, + "grad_norm": 1.928551895957753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173540 + }, + { + "epoch": 0.8416861366703323, + "grad_norm": 2.6347393600190117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173550 + }, + { + "epoch": 0.8417346348631685, + "grad_norm": 2.0734441719127972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173560 + }, + { + "epoch": 0.8417831330560045, + "grad_norm": 1.45747574009647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173570 + }, + { + "epoch": 0.8418316312488406, + "grad_norm": 1.73960224003622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173580 + }, + { + "epoch": 0.8418801294416767, + "grad_norm": 1.7154526688045735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173590 + }, + { + "epoch": 0.8419286276345128, + "grad_norm": 1.9616805957411998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173600 + }, + { + "epoch": 0.8419771258273488, + "grad_norm": 1.6321450857503805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173610 + }, + { + "epoch": 0.842025624020185, + "grad_norm": 1.6069819253061723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173620 + }, + { + "epoch": 0.842074122213021, + "grad_norm": 1.5207215042778444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173630 + }, + { + "epoch": 0.8421226204058572, + "grad_norm": 1.7120900253075888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173640 + }, + { + "epoch": 0.8421711185986932, + "grad_norm": 2.1480314416066904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173650 + }, + { + "epoch": 0.8422196167915293, + "grad_norm": 1.5146012444233747e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173660 + }, + { + "epoch": 0.8422681149843654, + "grad_norm": 1.8895486064707256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173670 + }, + { + "epoch": 0.8423166131772015, + "grad_norm": 1.6779884148832025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173680 + }, + { + "epoch": 0.8423651113700376, + "grad_norm": 1.922465386883232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173690 + }, + { + "epoch": 0.8424136095628737, + "grad_norm": 1.8084069353108134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173700 + }, + { + "epoch": 0.8424621077557097, + "grad_norm": 1.7155384668399165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173710 + }, + { + "epoch": 0.8425106059485459, + "grad_norm": 1.7837432864098446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173720 + }, + { + "epoch": 0.8425591041413819, + "grad_norm": 2.188215475484867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173730 + }, + { + "epoch": 0.842607602334218, + "grad_norm": 2.362143014522644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173740 + }, + { + "epoch": 0.8426561005270541, + "grad_norm": 2.1007396711070214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173750 + }, + { + "epoch": 0.8427045987198902, + "grad_norm": 1.4707842943550986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173760 + }, + { + "epoch": 0.8427530969127263, + "grad_norm": 1.757748790964797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173770 + }, + { + "epoch": 0.8428015951055624, + "grad_norm": 1.4258858094251536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173780 + }, + { + "epoch": 0.8428500932983984, + "grad_norm": 2.2261819054847365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173790 + }, + { + "epoch": 0.8428985914912346, + "grad_norm": 1.701912921703297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173800 + }, + { + "epoch": 0.8429470896840706, + "grad_norm": 1.7607913349593218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173810 + }, + { + "epoch": 0.8429955878769068, + "grad_norm": 1.941873861710519e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173820 + }, + { + "epoch": 0.8430440860697428, + "grad_norm": 1.7607476365810726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173830 + }, + { + "epoch": 0.8430925842625789, + "grad_norm": 2.2027220936138292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173840 + }, + { + "epoch": 0.843141082455415, + "grad_norm": 2.2344332606394346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173850 + }, + { + "epoch": 0.8431895806482511, + "grad_norm": 2.683632338573716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173860 + }, + { + "epoch": 0.8432380788410871, + "grad_norm": 1.8150640102021498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173870 + }, + { + "epoch": 0.8432865770339233, + "grad_norm": 1.757292444892755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173880 + }, + { + "epoch": 0.8433350752267593, + "grad_norm": 2.2794148790694635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173890 + }, + { + "epoch": 0.8433835734195955, + "grad_norm": 1.6606447772460342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173900 + }, + { + "epoch": 0.8434320716124315, + "grad_norm": 1.2954466832582057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173910 + }, + { + "epoch": 0.8434805698052676, + "grad_norm": 1.709849861697421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173920 + }, + { + "epoch": 0.8435290679981037, + "grad_norm": 1.879725886055894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173930 + }, + { + "epoch": 0.8435775661909398, + "grad_norm": 2.5025382655030626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173940 + }, + { + "epoch": 0.8436260643837759, + "grad_norm": 1.8776121990526917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173950 + }, + { + "epoch": 0.843674562576612, + "grad_norm": 1.5004060216483595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173960 + }, + { + "epoch": 0.843723060769448, + "grad_norm": 1.5622404703208304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173970 + }, + { + "epoch": 0.8437715589622842, + "grad_norm": 2.3868194531928566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173980 + }, + { + "epoch": 0.8438200571551202, + "grad_norm": 2.4597039072205007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 173990 + }, + { + "epoch": 0.8438685553479564, + "grad_norm": 1.5404388875595032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174000 + }, + { + "epoch": 0.8439170535407924, + "grad_norm": 1.4909637968685274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174010 + }, + { + "epoch": 0.8439655517336285, + "grad_norm": 1.758036738408464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174020 + }, + { + "epoch": 0.8440140499264646, + "grad_norm": 2.2552249845375627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174030 + }, + { + "epoch": 0.8440625481193007, + "grad_norm": 2.283119115986665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174040 + }, + { + "epoch": 0.8441110463121368, + "grad_norm": 2.0743351925034403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174050 + }, + { + "epoch": 0.8441595445049729, + "grad_norm": 1.7870450008672378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174060 + }, + { + "epoch": 0.844208042697809, + "grad_norm": 1.3642377005851358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174070 + }, + { + "epoch": 0.8442565408906451, + "grad_norm": 2.81597252183019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174080 + }, + { + "epoch": 0.8443050390834812, + "grad_norm": 1.960350459739857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174090 + }, + { + "epoch": 0.8443535372763172, + "grad_norm": 2.0452354476674373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174100 + }, + { + "epoch": 0.8444020354691534, + "grad_norm": 2.2976545110964253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174110 + }, + { + "epoch": 0.8444505336619894, + "grad_norm": 1.6727170759622823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174120 + }, + { + "epoch": 0.8444990318548256, + "grad_norm": 1.4288208838308947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174130 + }, + { + "epoch": 0.8445475300476616, + "grad_norm": 3.401203940711639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174140 + }, + { + "epoch": 0.8445960282404977, + "grad_norm": 3.0562674169232196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174150 + }, + { + "epoch": 0.8446445264333338, + "grad_norm": 1.7500198623565666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174160 + }, + { + "epoch": 0.8446930246261699, + "grad_norm": 1.7634883775485832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174170 + }, + { + "epoch": 0.8447415228190059, + "grad_norm": 1.668242610719517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174180 + }, + { + "epoch": 0.8447900210118421, + "grad_norm": 1.798159843247049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174190 + }, + { + "epoch": 0.8448385192046781, + "grad_norm": 1.976267327563619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174200 + }, + { + "epoch": 0.8448870173975143, + "grad_norm": 2.8408926766587683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174210 + }, + { + "epoch": 0.8449355155903503, + "grad_norm": 1.3771323637001842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174220 + }, + { + "epoch": 0.8449840137831864, + "grad_norm": 1.5487183091522638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174230 + }, + { + "epoch": 0.8450325119760225, + "grad_norm": 1.63905387040586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174240 + }, + { + "epoch": 0.8450810101688586, + "grad_norm": 1.8319115113740736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174250 + }, + { + "epoch": 0.8451295083616946, + "grad_norm": 1.5246257589751622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174260 + }, + { + "epoch": 0.8451780065545308, + "grad_norm": 1.643919844696029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174270 + }, + { + "epoch": 0.8452265047473668, + "grad_norm": 2.1004973760341272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174280 + }, + { + "epoch": 0.845275002940203, + "grad_norm": 1.6801749325168203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174290 + }, + { + "epoch": 0.845323501133039, + "grad_norm": 1.714847108758022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174300 + }, + { + "epoch": 0.8453719993258751, + "grad_norm": 1.5804557662590923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174310 + }, + { + "epoch": 0.8454204975187112, + "grad_norm": 1.4516665203245793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174320 + }, + { + "epoch": 0.8454689957115473, + "grad_norm": 1.975432084577733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174330 + }, + { + "epoch": 0.8455174939043834, + "grad_norm": 2.2417047773615195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174340 + }, + { + "epoch": 0.8455659920972195, + "grad_norm": 2.1665993443775733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174350 + }, + { + "epoch": 0.8456144902900555, + "grad_norm": 2.0518417187531668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174360 + }, + { + "epoch": 0.8456629884828917, + "grad_norm": 1.78837780140384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174370 + }, + { + "epoch": 0.8457114866757277, + "grad_norm": 2.0172256753880902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174380 + }, + { + "epoch": 0.8457599848685639, + "grad_norm": 2.0163625435998256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174390 + }, + { + "epoch": 0.8458084830613999, + "grad_norm": 1.7010291841756953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174400 + }, + { + "epoch": 0.845856981254236, + "grad_norm": 2.1852368803365607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174410 + }, + { + "epoch": 0.8459054794470721, + "grad_norm": 1.5640191364241218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174420 + }, + { + "epoch": 0.8459539776399082, + "grad_norm": 1.5145511511605037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174430 + }, + { + "epoch": 0.8460024758327442, + "grad_norm": 1.7925543716046377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174440 + }, + { + "epoch": 0.8460509740255804, + "grad_norm": 1.912730063224899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174450 + }, + { + "epoch": 0.8460994722184164, + "grad_norm": 1.7228568793825616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174460 + }, + { + "epoch": 0.8461479704112526, + "grad_norm": 1.6050270446044124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174470 + }, + { + "epoch": 0.8461964686040886, + "grad_norm": 1.6097574828677352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174480 + }, + { + "epoch": 0.8462449667969247, + "grad_norm": 1.880440159141017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174490 + }, + { + "epoch": 0.8462934649897608, + "grad_norm": 1.9330968825670425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174500 + }, + { + "epoch": 0.8463419631825969, + "grad_norm": 2.6309606937502394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174510 + }, + { + "epoch": 0.846390461375433, + "grad_norm": 1.7727133538869566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174520 + }, + { + "epoch": 0.8464389595682691, + "grad_norm": 1.9187853084190465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174530 + }, + { + "epoch": 0.8464874577611051, + "grad_norm": 1.532957760730369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174540 + }, + { + "epoch": 0.8465359559539413, + "grad_norm": 2.460964587669423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174550 + }, + { + "epoch": 0.8465844541467774, + "grad_norm": 1.4587871355331572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174560 + }, + { + "epoch": 0.8466329523396134, + "grad_norm": 1.4052006669373895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174570 + }, + { + "epoch": 0.8466814505324496, + "grad_norm": 2.4856921854166103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174580 + }, + { + "epoch": 0.8467299487252856, + "grad_norm": 1.6906822608575567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174590 + }, + { + "epoch": 0.8467784469181218, + "grad_norm": 1.9466289913339097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174600 + }, + { + "epoch": 0.8468269451109578, + "grad_norm": 1.5572176437217422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174610 + }, + { + "epoch": 0.8468754433037939, + "grad_norm": 1.6199752650436494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174620 + }, + { + "epoch": 0.84692394149663, + "grad_norm": 2.5018653815322978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174630 + }, + { + "epoch": 0.8469724396894661, + "grad_norm": 2.0104767628481568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174640 + }, + { + "epoch": 0.8470209378823021, + "grad_norm": 2.5513230639262474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174650 + }, + { + "epoch": 0.8470694360751383, + "grad_norm": 1.8468632845269894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174660 + }, + { + "epoch": 0.8471179342679743, + "grad_norm": 2.0390594102082105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174670 + }, + { + "epoch": 0.8471664324608105, + "grad_norm": 1.792517778653746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174680 + }, + { + "epoch": 0.8472149306536465, + "grad_norm": 1.805555527312208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174690 + }, + { + "epoch": 0.8472634288464826, + "grad_norm": 1.929478798956552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174700 + }, + { + "epoch": 0.8473119270393187, + "grad_norm": 1.707664054606539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174710 + }, + { + "epoch": 0.8473604252321548, + "grad_norm": 1.3864013048703328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174720 + }, + { + "epoch": 0.8474089234249909, + "grad_norm": 1.977890029536411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174730 + }, + { + "epoch": 0.847457421617827, + "grad_norm": 1.806607308196817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174740 + }, + { + "epoch": 0.847505919810663, + "grad_norm": 1.6681013903507846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174750 + }, + { + "epoch": 0.8475544180034992, + "grad_norm": 2.1534265925993168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174760 + }, + { + "epoch": 0.8476029161963352, + "grad_norm": 1.4221286370741382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174770 + }, + { + "epoch": 0.8476514143891714, + "grad_norm": 1.8176327998276065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174780 + }, + { + "epoch": 0.8476999125820074, + "grad_norm": 2.219874772890762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174790 + }, + { + "epoch": 0.8477484107748435, + "grad_norm": 2.581830926828843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174800 + }, + { + "epoch": 0.8477969089676796, + "grad_norm": 1.7562641119184264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174810 + }, + { + "epoch": 0.8478454071605157, + "grad_norm": 1.3245018415375398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174820 + }, + { + "epoch": 0.8478939053533517, + "grad_norm": 1.6665371305180088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174830 + }, + { + "epoch": 0.8479424035461879, + "grad_norm": 1.7623627002194553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174840 + }, + { + "epoch": 0.8479909017390239, + "grad_norm": 1.9425648645210458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174850 + }, + { + "epoch": 0.8480393999318601, + "grad_norm": 2.2368674024164648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174860 + }, + { + "epoch": 0.8480878981246961, + "grad_norm": 1.831051221756752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174870 + }, + { + "epoch": 0.8481363963175322, + "grad_norm": 1.8231267162605036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174880 + }, + { + "epoch": 0.8481848945103683, + "grad_norm": 1.5814563880667265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174890 + }, + { + "epoch": 0.8482333927032044, + "grad_norm": 2.21610694239871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174900 + }, + { + "epoch": 0.8482818908960404, + "grad_norm": 1.8693137704417495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174910 + }, + { + "epoch": 0.8483303890888766, + "grad_norm": 1.5177921142139894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174920 + }, + { + "epoch": 0.8483788872817126, + "grad_norm": 2.3506292023967035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174930 + }, + { + "epoch": 0.8484273854745488, + "grad_norm": 1.865254972699404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174940 + }, + { + "epoch": 0.8484758836673848, + "grad_norm": 2.1808711281323667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174950 + }, + { + "epoch": 0.8485243818602209, + "grad_norm": 1.9622641289629428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174960 + }, + { + "epoch": 0.848572880053057, + "grad_norm": 2.6188219592881978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174970 + }, + { + "epoch": 0.8486213782458931, + "grad_norm": 1.8199397544549356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174980 + }, + { + "epoch": 0.8486698764387292, + "grad_norm": 2.2421316359100274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 174990 + }, + { + "epoch": 0.8487183746315653, + "grad_norm": 1.9048144395128475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175000 + }, + { + "epoch": 0.8487668728244013, + "grad_norm": 1.854453124394695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175010 + }, + { + "epoch": 0.8488153710172375, + "grad_norm": 2.7410008485162507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175020 + }, + { + "epoch": 0.8488638692100735, + "grad_norm": 2.1534342309337262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175030 + }, + { + "epoch": 0.8489123674029097, + "grad_norm": 2.2310356229127137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175040 + }, + { + "epoch": 0.8489608655957457, + "grad_norm": 1.793192794252718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175050 + }, + { + "epoch": 0.8490093637885818, + "grad_norm": 1.6148176129604508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175060 + }, + { + "epoch": 0.849057861981418, + "grad_norm": 1.6655157253353536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175070 + }, + { + "epoch": 0.849106360174254, + "grad_norm": 1.8176391947122283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175080 + }, + { + "epoch": 0.8491548583670901, + "grad_norm": 2.0430372060786794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175090 + }, + { + "epoch": 0.8492033565599262, + "grad_norm": 2.332574844388091e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175100 + }, + { + "epoch": 0.8492518547527623, + "grad_norm": 2.366126672370683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175110 + }, + { + "epoch": 0.8493003529455984, + "grad_norm": 1.813958405705307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175120 + }, + { + "epoch": 0.8493488511384345, + "grad_norm": 2.2755138218144566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175130 + }, + { + "epoch": 0.8493973493312705, + "grad_norm": 1.6090639931576334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175140 + }, + { + "epoch": 0.8494458475241067, + "grad_norm": 1.8016923064578805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175150 + }, + { + "epoch": 0.8494943457169427, + "grad_norm": 1.4972547646152634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175160 + }, + { + "epoch": 0.8495428439097789, + "grad_norm": 1.669921800839802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175170 + }, + { + "epoch": 0.8495913421026149, + "grad_norm": 2.1344858325278437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175180 + }, + { + "epoch": 0.849639840295451, + "grad_norm": 1.7750421577034103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175190 + }, + { + "epoch": 0.8496883384882871, + "grad_norm": 1.8384515243496935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175200 + }, + { + "epoch": 0.8497368366811232, + "grad_norm": 1.515631176118859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175210 + }, + { + "epoch": 0.8497853348739592, + "grad_norm": 1.4404555770397565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175220 + }, + { + "epoch": 0.8498338330667954, + "grad_norm": 1.3950899102610492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175230 + }, + { + "epoch": 0.8498823312596314, + "grad_norm": 1.4925207736382617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175240 + }, + { + "epoch": 0.8499308294524676, + "grad_norm": 1.417683392901381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175250 + }, + { + "epoch": 0.8499793276453036, + "grad_norm": 1.3476162408210257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175260 + }, + { + "epoch": 0.8500278258381397, + "grad_norm": 1.9816155827356852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175270 + }, + { + "epoch": 0.8500763240309758, + "grad_norm": 1.5751616899706278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175280 + }, + { + "epoch": 0.8501248222238119, + "grad_norm": 1.6854848183811555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175290 + }, + { + "epoch": 0.850173320416648, + "grad_norm": 3.256169733845127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175300 + }, + { + "epoch": 0.8502218186094841, + "grad_norm": 1.8547241964483874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175310 + }, + { + "epoch": 0.8502703168023201, + "grad_norm": 1.9284962959886798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175320 + }, + { + "epoch": 0.8503188149951563, + "grad_norm": 1.6438168159993438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175330 + }, + { + "epoch": 0.8503673131879923, + "grad_norm": 1.469092136829886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175340 + }, + { + "epoch": 0.8504158113808284, + "grad_norm": 1.8352045216829538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175350 + }, + { + "epoch": 0.8504643095736645, + "grad_norm": 1.4588339425358754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175360 + }, + { + "epoch": 0.8505128077665006, + "grad_norm": 2.4575415480398988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175370 + }, + { + "epoch": 0.8505613059593367, + "grad_norm": 1.7633896121083126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175380 + }, + { + "epoch": 0.8506098041521728, + "grad_norm": 1.9259935868376488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175390 + }, + { + "epoch": 0.8506583023450088, + "grad_norm": 1.5819946241890648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175400 + }, + { + "epoch": 0.850706800537845, + "grad_norm": 1.541754990341815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175410 + }, + { + "epoch": 0.850755298730681, + "grad_norm": 1.4298196404638475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175420 + }, + { + "epoch": 0.8508037969235172, + "grad_norm": 1.5330504865573857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175430 + }, + { + "epoch": 0.8508522951163532, + "grad_norm": 1.872647104050884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175440 + }, + { + "epoch": 0.8509007933091893, + "grad_norm": 1.8832134074386886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175450 + }, + { + "epoch": 0.8509492915020254, + "grad_norm": 2.1064316513275116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175460 + }, + { + "epoch": 0.8509977896948615, + "grad_norm": 1.6304101180253383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175470 + }, + { + "epoch": 0.8510462878876975, + "grad_norm": 1.8041527383161338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175480 + }, + { + "epoch": 0.8510947860805337, + "grad_norm": 2.4583362900898464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175490 + }, + { + "epoch": 0.8511432842733697, + "grad_norm": 1.563727991538144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175500 + }, + { + "epoch": 0.8511917824662059, + "grad_norm": 2.0338443817990992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175510 + }, + { + "epoch": 0.8512402806590419, + "grad_norm": 1.4895053190855378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175520 + }, + { + "epoch": 0.851288778851878, + "grad_norm": 1.9047158517082607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175530 + }, + { + "epoch": 0.8513372770447141, + "grad_norm": 1.8106852905930282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175540 + }, + { + "epoch": 0.8513857752375502, + "grad_norm": 1.72617280469467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175550 + }, + { + "epoch": 0.8514342734303862, + "grad_norm": 1.522967352229898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175560 + }, + { + "epoch": 0.8514827716232224, + "grad_norm": 1.3811473742464386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175570 + }, + { + "epoch": 0.8515312698160585, + "grad_norm": 1.6341630271199392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175580 + }, + { + "epoch": 0.8515797680088946, + "grad_norm": 1.5372302542004945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175590 + }, + { + "epoch": 0.8516282662017307, + "grad_norm": 1.5340916093009582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175600 + }, + { + "epoch": 0.8516767643945667, + "grad_norm": 1.883123168511247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175610 + }, + { + "epoch": 0.8517252625874029, + "grad_norm": 1.717675246482031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175620 + }, + { + "epoch": 0.8517737607802389, + "grad_norm": 2.6476801195940425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175630 + }, + { + "epoch": 0.8518222589730751, + "grad_norm": 1.919044123610547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175640 + }, + { + "epoch": 0.8518707571659111, + "grad_norm": 1.758030521159526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175650 + }, + { + "epoch": 0.8519192553587472, + "grad_norm": 1.690876949567155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175660 + }, + { + "epoch": 0.8519677535515833, + "grad_norm": 1.935802096397765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175670 + }, + { + "epoch": 0.8520162517444194, + "grad_norm": 2.1404192196428085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175680 + }, + { + "epoch": 0.8520647499372555, + "grad_norm": 1.711304165041838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175690 + }, + { + "epoch": 0.8521132481300916, + "grad_norm": 1.7174510702488988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175700 + }, + { + "epoch": 0.8521617463229276, + "grad_norm": 1.5671005826334294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175710 + }, + { + "epoch": 0.8522102445157638, + "grad_norm": 2.0558330149356152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175720 + }, + { + "epoch": 0.8522587427085998, + "grad_norm": 2.764579498659714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175730 + }, + { + "epoch": 0.852307240901436, + "grad_norm": 1.3859863479126489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175740 + }, + { + "epoch": 0.852355739094272, + "grad_norm": 1.9270341766741694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175750 + }, + { + "epoch": 0.8524042372871081, + "grad_norm": 1.3870793402759318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175760 + }, + { + "epoch": 0.8524527354799442, + "grad_norm": 2.7576460226441668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175770 + }, + { + "epoch": 0.8525012336727803, + "grad_norm": 1.6155642157400507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175780 + }, + { + "epoch": 0.8525497318656163, + "grad_norm": 2.2085403728056008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175790 + }, + { + "epoch": 0.8525982300584525, + "grad_norm": 1.8203429874574795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175800 + }, + { + "epoch": 0.8526467282512885, + "grad_norm": 1.6372611710835372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175810 + }, + { + "epoch": 0.8526952264441247, + "grad_norm": 1.98144718410731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175820 + }, + { + "epoch": 0.8527437246369607, + "grad_norm": 2.3197163301347246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175830 + }, + { + "epoch": 0.8527922228297968, + "grad_norm": 1.8243794031036487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175840 + }, + { + "epoch": 0.8528407210226329, + "grad_norm": 1.6350066189829704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175850 + }, + { + "epoch": 0.852889219215469, + "grad_norm": 1.4243555668258523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175860 + }, + { + "epoch": 0.852937717408305, + "grad_norm": 1.8910743193600865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175870 + }, + { + "epoch": 0.8529862156011412, + "grad_norm": 1.686184880611563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175880 + }, + { + "epoch": 0.8530347137939772, + "grad_norm": 2.2101293239984443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175890 + }, + { + "epoch": 0.8530832119868134, + "grad_norm": 3.2329083410331805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175900 + }, + { + "epoch": 0.8531317101796494, + "grad_norm": 2.0069595763061443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175910 + }, + { + "epoch": 0.8531802083724855, + "grad_norm": 1.6616422016113575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175920 + }, + { + "epoch": 0.8532287065653216, + "grad_norm": 1.840052377133361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175930 + }, + { + "epoch": 0.8532772047581577, + "grad_norm": 1.6578985295723214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175940 + }, + { + "epoch": 0.8533257029509937, + "grad_norm": 1.676716721021876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175950 + }, + { + "epoch": 0.8533742011438299, + "grad_norm": 1.3460905279316648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175960 + }, + { + "epoch": 0.8534226993366659, + "grad_norm": 1.8123657241631008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175970 + }, + { + "epoch": 0.8534711975295021, + "grad_norm": 1.8472578133810202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175980 + }, + { + "epoch": 0.8535196957223381, + "grad_norm": 2.258938636146013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 175990 + }, + { + "epoch": 0.8535681939151742, + "grad_norm": 1.7966751642006784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176000 + }, + { + "epoch": 0.8536166921080103, + "grad_norm": 1.6373942202108083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176010 + }, + { + "epoch": 0.8536651903008464, + "grad_norm": 1.7536127216999375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176020 + }, + { + "epoch": 0.8537136884936825, + "grad_norm": 1.501352642208076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176030 + }, + { + "epoch": 0.8537621866865186, + "grad_norm": 2.1780257597470154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176040 + }, + { + "epoch": 0.8538106848793546, + "grad_norm": 2.6282565457336204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176050 + }, + { + "epoch": 0.8538591830721908, + "grad_norm": 1.621724443623407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176060 + }, + { + "epoch": 0.8539076812650268, + "grad_norm": 1.4561437389204457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176070 + }, + { + "epoch": 0.853956179457863, + "grad_norm": 1.2707044305670934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176080 + }, + { + "epoch": 0.8540046776506991, + "grad_norm": 2.2436237756551236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176090 + }, + { + "epoch": 0.8540531758435351, + "grad_norm": 1.5121971230769304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176100 + }, + { + "epoch": 0.8541016740363713, + "grad_norm": 1.8122577216672653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176110 + }, + { + "epoch": 0.8541501722292073, + "grad_norm": 1.731975274310571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176120 + }, + { + "epoch": 0.8541986704220434, + "grad_norm": 1.2682546568498765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176130 + }, + { + "epoch": 0.8542471686148795, + "grad_norm": 2.0728736060959818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176140 + }, + { + "epoch": 0.8542956668077156, + "grad_norm": 1.7764332227443447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176150 + }, + { + "epoch": 0.8543441650005517, + "grad_norm": 1.360413381945591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176160 + }, + { + "epoch": 0.8543926631933878, + "grad_norm": 1.2144846017747568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176170 + }, + { + "epoch": 0.8544411613862238, + "grad_norm": 1.9594279976331563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176180 + }, + { + "epoch": 0.85448965957906, + "grad_norm": 2.040384394774719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176190 + }, + { + "epoch": 0.854538157771896, + "grad_norm": 1.896679791002498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176200 + }, + { + "epoch": 0.8545866559647322, + "grad_norm": 1.3527235331878273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176210 + }, + { + "epoch": 0.8546351541575682, + "grad_norm": 1.7099432980671736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176220 + }, + { + "epoch": 0.8546836523504043, + "grad_norm": 2.0767522812548123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176230 + }, + { + "epoch": 0.8547321505432404, + "grad_norm": 1.3753247429804105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176240 + }, + { + "epoch": 0.8547806487360765, + "grad_norm": 1.972819774209711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176250 + }, + { + "epoch": 0.8548291469289125, + "grad_norm": 2.3111836000566655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176260 + }, + { + "epoch": 0.8548776451217487, + "grad_norm": 1.7699033350027094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176270 + }, + { + "epoch": 0.8549261433145847, + "grad_norm": 1.3503969498174229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176280 + }, + { + "epoch": 0.8549746415074209, + "grad_norm": 2.0775626552449467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176290 + }, + { + "epoch": 0.8550231397002569, + "grad_norm": 1.7071830171744296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176300 + }, + { + "epoch": 0.855071637893093, + "grad_norm": 1.615305578184234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176310 + }, + { + "epoch": 0.8551201360859291, + "grad_norm": 1.848614950006322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176320 + }, + { + "epoch": 0.8551686342787652, + "grad_norm": 1.402702309860615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176330 + }, + { + "epoch": 0.8552171324716012, + "grad_norm": 1.7654214090612186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176340 + }, + { + "epoch": 0.8552656306644374, + "grad_norm": 1.5485646542856557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176350 + }, + { + "epoch": 0.8553141288572734, + "grad_norm": 1.6887257814346412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176360 + }, + { + "epoch": 0.8553626270501096, + "grad_norm": 2.027909395962979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176370 + }, + { + "epoch": 0.8554111252429456, + "grad_norm": 1.8614827013152535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176380 + }, + { + "epoch": 0.8554596234357817, + "grad_norm": 2.6147034759560484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176390 + }, + { + "epoch": 0.8555081216286178, + "grad_norm": 2.1720575560379984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176400 + }, + { + "epoch": 0.8555566198214539, + "grad_norm": 1.740205668454564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176410 + }, + { + "epoch": 0.85560511801429, + "grad_norm": 1.9535354667254978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176420 + }, + { + "epoch": 0.8556536162071261, + "grad_norm": 1.2803120341686736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176430 + }, + { + "epoch": 0.8557021143999621, + "grad_norm": 1.8272377388939276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176440 + }, + { + "epoch": 0.8557506125927983, + "grad_norm": 2.6486723925245315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176450 + }, + { + "epoch": 0.8557991107856343, + "grad_norm": 1.6865655538822466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176460 + }, + { + "epoch": 0.8558476089784705, + "grad_norm": 2.2238307195721063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176470 + }, + { + "epoch": 0.8558961071713065, + "grad_norm": 1.3940399057332797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176480 + }, + { + "epoch": 0.8559446053641426, + "grad_norm": 2.5589026009242843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176490 + }, + { + "epoch": 0.8559931035569787, + "grad_norm": 2.0014439883198065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176500 + }, + { + "epoch": 0.8560416017498148, + "grad_norm": 1.4297697248366603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176510 + }, + { + "epoch": 0.8560900999426508, + "grad_norm": 2.388480169202012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176520 + }, + { + "epoch": 0.856138598135487, + "grad_norm": 1.620891509901412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176530 + }, + { + "epoch": 0.856187096328323, + "grad_norm": 2.0157845170842847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176540 + }, + { + "epoch": 0.8562355945211592, + "grad_norm": 2.05620338533663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176550 + }, + { + "epoch": 0.8562840927139952, + "grad_norm": 1.7734576474026653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176560 + }, + { + "epoch": 0.8563325909068313, + "grad_norm": 1.2218820621967552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176570 + }, + { + "epoch": 0.8563810890996674, + "grad_norm": 2.7373344479997286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176580 + }, + { + "epoch": 0.8564295872925035, + "grad_norm": 1.9260530947917687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176590 + }, + { + "epoch": 0.8564780854853397, + "grad_norm": 1.7446774691620703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176600 + }, + { + "epoch": 0.8565265836781757, + "grad_norm": 1.523370762868126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176610 + }, + { + "epoch": 0.8565750818710118, + "grad_norm": 1.7624570247676274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176620 + }, + { + "epoch": 0.8566235800638479, + "grad_norm": 1.3874686288772864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176630 + }, + { + "epoch": 0.856672078256684, + "grad_norm": 1.8477251728654664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176640 + }, + { + "epoch": 0.85672057644952, + "grad_norm": 1.736799859486382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176650 + }, + { + "epoch": 0.8567690746423562, + "grad_norm": 1.5939642494799955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176660 + }, + { + "epoch": 0.8568175728351922, + "grad_norm": 2.2833892998619376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176670 + }, + { + "epoch": 0.8568660710280284, + "grad_norm": 1.2853178077421035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176680 + }, + { + "epoch": 0.8569145692208644, + "grad_norm": 1.8770892395991723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176690 + }, + { + "epoch": 0.8569630674137005, + "grad_norm": 2.5190352914705727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176700 + }, + { + "epoch": 0.8570115656065366, + "grad_norm": 1.2631810264451815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176710 + }, + { + "epoch": 0.8570600637993727, + "grad_norm": 2.9726555439424374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176720 + }, + { + "epoch": 0.8571085619922088, + "grad_norm": 2.5171022599579373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176730 + }, + { + "epoch": 0.8571570601850449, + "grad_norm": 2.1201008948423805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176740 + }, + { + "epoch": 0.8572055583778809, + "grad_norm": 1.6358171706087887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176750 + }, + { + "epoch": 0.8572540565707171, + "grad_norm": 1.4918825286258652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176760 + }, + { + "epoch": 0.8573025547635531, + "grad_norm": 2.2436557500782328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176770 + }, + { + "epoch": 0.8573510529563892, + "grad_norm": 2.2362973695067012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176780 + }, + { + "epoch": 0.8573995511492253, + "grad_norm": 2.1303927510984977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176790 + }, + { + "epoch": 0.8574480493420614, + "grad_norm": 1.4384893276542243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176800 + }, + { + "epoch": 0.8574965475348975, + "grad_norm": 1.663787863037669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176810 + }, + { + "epoch": 0.8575450457277336, + "grad_norm": 1.3930874231959933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176820 + }, + { + "epoch": 0.8575935439205696, + "grad_norm": 1.5838649503052693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176830 + }, + { + "epoch": 0.8576420421134058, + "grad_norm": 1.31443123052577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176840 + }, + { + "epoch": 0.8576905403062418, + "grad_norm": 2.214940053590908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176850 + }, + { + "epoch": 0.857739038499078, + "grad_norm": 1.9514958538024985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176860 + }, + { + "epoch": 0.857787536691914, + "grad_norm": 2.1240939673816683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176870 + }, + { + "epoch": 0.8578360348847501, + "grad_norm": 1.6459232199395046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176880 + }, + { + "epoch": 0.8578845330775862, + "grad_norm": 2.431482570841581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176890 + }, + { + "epoch": 0.8579330312704223, + "grad_norm": 1.576958474913681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176900 + }, + { + "epoch": 0.8579815294632583, + "grad_norm": 1.7755150238940587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176910 + }, + { + "epoch": 0.8580300276560945, + "grad_norm": 1.3831207290593284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176920 + }, + { + "epoch": 0.8580785258489305, + "grad_norm": 1.2924563641547593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176930 + }, + { + "epoch": 0.8581270240417667, + "grad_norm": 1.7106298599856018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176940 + }, + { + "epoch": 0.8581755222346027, + "grad_norm": 3.5290170785629016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176950 + }, + { + "epoch": 0.8582240204274388, + "grad_norm": 1.5464861391478735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176960 + }, + { + "epoch": 0.8582725186202749, + "grad_norm": 1.3711280111294855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176970 + }, + { + "epoch": 0.858321016813111, + "grad_norm": 1.3188309111455965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176980 + }, + { + "epoch": 0.858369515005947, + "grad_norm": 1.7352563830286272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 176990 + }, + { + "epoch": 0.8584180131987832, + "grad_norm": 1.5709961331822342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177000 + }, + { + "epoch": 0.8584665113916192, + "grad_norm": 1.5656423713039658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177010 + }, + { + "epoch": 0.8585150095844554, + "grad_norm": 1.7475441538294945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177020 + }, + { + "epoch": 0.8585635077772914, + "grad_norm": 1.7526140538848267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177030 + }, + { + "epoch": 0.8586120059701275, + "grad_norm": 2.077143612666532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177040 + }, + { + "epoch": 0.8586605041629636, + "grad_norm": 2.357771755612248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177050 + }, + { + "epoch": 0.8587090023557997, + "grad_norm": 1.5695329480536202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177060 + }, + { + "epoch": 0.8587575005486358, + "grad_norm": 2.3466668608307373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177070 + }, + { + "epoch": 0.8588059987414719, + "grad_norm": 2.2404542221465817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177080 + }, + { + "epoch": 0.8588544969343079, + "grad_norm": 1.550622030777049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177090 + }, + { + "epoch": 0.8589029951271441, + "grad_norm": 2.0807235046049755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177100 + }, + { + "epoch": 0.8589514933199802, + "grad_norm": 1.525204140762071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177110 + }, + { + "epoch": 0.8589999915128163, + "grad_norm": 1.911456593006733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177120 + }, + { + "epoch": 0.8590484897056524, + "grad_norm": 1.6929782020724815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177130 + }, + { + "epoch": 0.8590969878984884, + "grad_norm": 1.997455711943985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177140 + }, + { + "epoch": 0.8591454860913246, + "grad_norm": 1.6928296986407076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177150 + }, + { + "epoch": 0.8591939842841606, + "grad_norm": 1.4750398236174078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177160 + }, + { + "epoch": 0.8592424824769967, + "grad_norm": 1.80043802089358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177170 + }, + { + "epoch": 0.8592909806698328, + "grad_norm": 1.4148887395037946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177180 + }, + { + "epoch": 0.8593394788626689, + "grad_norm": 1.7195281643012095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177190 + }, + { + "epoch": 0.859387977055505, + "grad_norm": 2.054773418080913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177200 + }, + { + "epoch": 0.8594364752483411, + "grad_norm": 1.7175000976976662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177210 + }, + { + "epoch": 0.8594849734411771, + "grad_norm": 2.289750611339514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177220 + }, + { + "epoch": 0.8595334716340133, + "grad_norm": 1.3728279846247915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177230 + }, + { + "epoch": 0.8595819698268493, + "grad_norm": 1.6168279159956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177240 + }, + { + "epoch": 0.8596304680196855, + "grad_norm": 1.2982383168491651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177250 + }, + { + "epoch": 0.8596789662125215, + "grad_norm": 1.8910633059476822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177260 + }, + { + "epoch": 0.8597274644053576, + "grad_norm": 1.4698969152959762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177270 + }, + { + "epoch": 0.8597759625981937, + "grad_norm": 1.2178349884095496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177280 + }, + { + "epoch": 0.8598244607910298, + "grad_norm": 2.100323648335234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177290 + }, + { + "epoch": 0.8598729589838658, + "grad_norm": 2.5472038700513622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177300 + }, + { + "epoch": 0.859921457176702, + "grad_norm": 3.522890068552442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177310 + }, + { + "epoch": 0.859969955369538, + "grad_norm": 2.3961199246969045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177320 + }, + { + "epoch": 0.8600184535623742, + "grad_norm": 1.5265500863392845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177330 + }, + { + "epoch": 0.8600669517552102, + "grad_norm": 1.8975185867020627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177340 + }, + { + "epoch": 0.8601154499480463, + "grad_norm": 1.6303824068586437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177350 + }, + { + "epoch": 0.8601639481408824, + "grad_norm": 2.443285218589608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177360 + }, + { + "epoch": 0.8602124463337185, + "grad_norm": 1.4210263188374483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177370 + }, + { + "epoch": 0.8602609445265545, + "grad_norm": 1.3888453942456636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177380 + }, + { + "epoch": 0.8603094427193907, + "grad_norm": 1.882415112675062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177390 + }, + { + "epoch": 0.8603579409122267, + "grad_norm": 1.4352239396941968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177400 + }, + { + "epoch": 0.8604064391050629, + "grad_norm": 1.5910105233274408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177410 + }, + { + "epoch": 0.8604549372978989, + "grad_norm": 1.5118352791887446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177420 + }, + { + "epoch": 0.860503435490735, + "grad_norm": 1.4016505289760062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177430 + }, + { + "epoch": 0.8605519336835711, + "grad_norm": 2.286850531163509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177440 + }, + { + "epoch": 0.8606004318764072, + "grad_norm": 2.0952127144369115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177450 + }, + { + "epoch": 0.8606489300692433, + "grad_norm": 2.6542075204361026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177460 + }, + { + "epoch": 0.8606974282620794, + "grad_norm": 1.6752153442212148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177470 + }, + { + "epoch": 0.8607459264549154, + "grad_norm": 1.6132169378124672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177480 + }, + { + "epoch": 0.8607944246477516, + "grad_norm": 1.51258934266707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177490 + }, + { + "epoch": 0.8608429228405876, + "grad_norm": 1.753329748055421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177500 + }, + { + "epoch": 0.8608914210334238, + "grad_norm": 1.735348220677224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177510 + }, + { + "epoch": 0.8609399192262598, + "grad_norm": 1.6267193814201164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177520 + }, + { + "epoch": 0.8609884174190959, + "grad_norm": 1.463135568258167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177530 + }, + { + "epoch": 0.861036915611932, + "grad_norm": 2.6347342085841774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177540 + }, + { + "epoch": 0.8610854138047681, + "grad_norm": 2.1191555177324517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177550 + }, + { + "epoch": 0.8611339119976041, + "grad_norm": 1.9964824460316777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177560 + }, + { + "epoch": 0.8611824101904403, + "grad_norm": 1.6173842709577002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177570 + }, + { + "epoch": 0.8612309083832763, + "grad_norm": 1.958016682124253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177580 + }, + { + "epoch": 0.8612794065761125, + "grad_norm": 1.910736635579724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177590 + }, + { + "epoch": 0.8613279047689485, + "grad_norm": 2.0652649368457787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177600 + }, + { + "epoch": 0.8613764029617846, + "grad_norm": 1.6104550581985677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177610 + }, + { + "epoch": 0.8614249011546208, + "grad_norm": 1.6432641913866064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177620 + }, + { + "epoch": 0.8614733993474568, + "grad_norm": 1.4651312163493913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177630 + }, + { + "epoch": 0.861521897540293, + "grad_norm": 1.8031354187542092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177640 + }, + { + "epoch": 0.861570395733129, + "grad_norm": 1.8020633874016312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177650 + }, + { + "epoch": 0.8616188939259651, + "grad_norm": 2.1089467949764185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177660 + }, + { + "epoch": 0.8616673921188012, + "grad_norm": 1.997977072676349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177670 + }, + { + "epoch": 0.8617158903116373, + "grad_norm": 1.841391394918901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177680 + }, + { + "epoch": 0.8617643885044733, + "grad_norm": 2.083638861449799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177690 + }, + { + "epoch": 0.8618128866973095, + "grad_norm": 1.5590208235494174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177700 + }, + { + "epoch": 0.8618613848901455, + "grad_norm": 1.3850241842305877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177710 + }, + { + "epoch": 0.8619098830829817, + "grad_norm": 2.296713574878595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177720 + }, + { + "epoch": 0.8619583812758177, + "grad_norm": 1.267056770615227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177730 + }, + { + "epoch": 0.8620068794686538, + "grad_norm": 2.272740751152469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177740 + }, + { + "epoch": 0.8620553776614899, + "grad_norm": 1.781727831939861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177750 + }, + { + "epoch": 0.862103875854326, + "grad_norm": 1.2428007956089004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177760 + }, + { + "epoch": 0.862152374047162, + "grad_norm": 1.5199217884287464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177770 + }, + { + "epoch": 0.8622008722399982, + "grad_norm": 1.787571513034436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177780 + }, + { + "epoch": 0.8622493704328342, + "grad_norm": 1.4796989411536288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177790 + }, + { + "epoch": 0.8622978686256704, + "grad_norm": 2.0118752885878166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177800 + }, + { + "epoch": 0.8623463668185064, + "grad_norm": 1.3868412196416102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177810 + }, + { + "epoch": 0.8623948650113425, + "grad_norm": 2.093211648457327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177820 + }, + { + "epoch": 0.8624433632041786, + "grad_norm": 1.8773439691699423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177830 + }, + { + "epoch": 0.8624918613970147, + "grad_norm": 2.038284740990548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177840 + }, + { + "epoch": 0.8625403595898508, + "grad_norm": 1.7630679138846972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177850 + }, + { + "epoch": 0.8625888577826869, + "grad_norm": 1.3695429679216886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177860 + }, + { + "epoch": 0.8626373559755229, + "grad_norm": 1.6698908922307965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177870 + }, + { + "epoch": 0.8626858541683591, + "grad_norm": 1.431879592672658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177880 + }, + { + "epoch": 0.8627343523611951, + "grad_norm": 2.098619944490565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177890 + }, + { + "epoch": 0.8627828505540313, + "grad_norm": 2.1095187818787053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177900 + }, + { + "epoch": 0.8628313487468673, + "grad_norm": 2.936664778019349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177910 + }, + { + "epoch": 0.8628798469397034, + "grad_norm": 1.982880526441022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177920 + }, + { + "epoch": 0.8629283451325395, + "grad_norm": 2.2406412725217706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177930 + }, + { + "epoch": 0.8629768433253756, + "grad_norm": 2.0301909486875047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177940 + }, + { + "epoch": 0.8630253415182116, + "grad_norm": 1.7330721746589006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177950 + }, + { + "epoch": 0.8630738397110478, + "grad_norm": 2.1527313265323755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177960 + }, + { + "epoch": 0.8631223379038838, + "grad_norm": 1.1355578699578928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177970 + }, + { + "epoch": 0.86317083609672, + "grad_norm": 1.824043494025318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177980 + }, + { + "epoch": 0.863219334289556, + "grad_norm": 1.6869034169531005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 177990 + }, + { + "epoch": 0.8632678324823921, + "grad_norm": 1.688187545312303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178000 + }, + { + "epoch": 0.8633163306752282, + "grad_norm": 1.4681910798231002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178010 + }, + { + "epoch": 0.8633648288680643, + "grad_norm": 1.5282948240269434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178020 + }, + { + "epoch": 0.8634133270609003, + "grad_norm": 1.4125465241932034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178030 + }, + { + "epoch": 0.8634618252537365, + "grad_norm": 1.687331518951396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178040 + }, + { + "epoch": 0.8635103234465725, + "grad_norm": 1.6193318685964186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178050 + }, + { + "epoch": 0.8635588216394087, + "grad_norm": 1.4244440293964544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178060 + }, + { + "epoch": 0.8636073198322447, + "grad_norm": 1.496992041438716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178070 + }, + { + "epoch": 0.8636558180250808, + "grad_norm": 1.9631674064157778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178080 + }, + { + "epoch": 0.8637043162179169, + "grad_norm": 1.4414768045867277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178090 + }, + { + "epoch": 0.863752814410753, + "grad_norm": 1.3456229908115347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178100 + }, + { + "epoch": 0.863801312603589, + "grad_norm": 1.4848168028436248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178110 + }, + { + "epoch": 0.8638498107964252, + "grad_norm": 1.683229378102169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178120 + }, + { + "epoch": 0.8638983089892612, + "grad_norm": 1.9658788374954383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178130 + }, + { + "epoch": 0.8639468071820974, + "grad_norm": 2.3829658246654617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178140 + }, + { + "epoch": 0.8639953053749335, + "grad_norm": 1.9792638639160032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178150 + }, + { + "epoch": 0.8640438035677696, + "grad_norm": 1.713754116394739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178160 + }, + { + "epoch": 0.8640923017606057, + "grad_norm": 1.4514765389606055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178170 + }, + { + "epoch": 0.8641407999534417, + "grad_norm": 1.7696429210900533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178180 + }, + { + "epoch": 0.8641892981462779, + "grad_norm": 1.577290653642649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178190 + }, + { + "epoch": 0.8642377963391139, + "grad_norm": 2.052655467821296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178200 + }, + { + "epoch": 0.86428629453195, + "grad_norm": 1.8271524737656364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178210 + }, + { + "epoch": 0.8643347927247861, + "grad_norm": 2.3789173297927846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178220 + }, + { + "epoch": 0.8643832909176222, + "grad_norm": 1.619748601910942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178230 + }, + { + "epoch": 0.8644317891104583, + "grad_norm": 2.084658667911299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178240 + }, + { + "epoch": 0.8644802873032944, + "grad_norm": 1.6564410287855935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178250 + }, + { + "epoch": 0.8645287854961304, + "grad_norm": 2.377642083217779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178260 + }, + { + "epoch": 0.8645772836889666, + "grad_norm": 1.6702584204608684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178270 + }, + { + "epoch": 0.8646257818818026, + "grad_norm": 1.3065332815642705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178280 + }, + { + "epoch": 0.8646742800746388, + "grad_norm": 2.1649157133651897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178290 + }, + { + "epoch": 0.8647227782674748, + "grad_norm": 1.7872496371751367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178300 + }, + { + "epoch": 0.8647712764603109, + "grad_norm": 1.779435976345667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178310 + }, + { + "epoch": 0.864819774653147, + "grad_norm": 2.3549450389737103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178320 + }, + { + "epoch": 0.8648682728459831, + "grad_norm": 1.4637822509655507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178330 + }, + { + "epoch": 0.8649167710388191, + "grad_norm": 1.564199614279005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178340 + }, + { + "epoch": 0.8649652692316553, + "grad_norm": 1.5638358163982957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178350 + }, + { + "epoch": 0.8650137674244913, + "grad_norm": 1.5245863238533275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178360 + }, + { + "epoch": 0.8650622656173275, + "grad_norm": 1.5869776603949504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178370 + }, + { + "epoch": 0.8651107638101635, + "grad_norm": 1.660962034577551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178380 + }, + { + "epoch": 0.8651592620029996, + "grad_norm": 1.5254185470325865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178390 + }, + { + "epoch": 0.8652077601958357, + "grad_norm": 2.1956374496312492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178400 + }, + { + "epoch": 0.8652562583886718, + "grad_norm": 1.7956587328171736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178410 + }, + { + "epoch": 0.8653047565815079, + "grad_norm": 1.944560779065796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178420 + }, + { + "epoch": 0.865353254774344, + "grad_norm": 2.3823703898528947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178430 + }, + { + "epoch": 0.86540175296718, + "grad_norm": 1.723933706898606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178440 + }, + { + "epoch": 0.8654502511600162, + "grad_norm": 1.4546603033238625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178450 + }, + { + "epoch": 0.8654987493528522, + "grad_norm": 1.3128688358676754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178460 + }, + { + "epoch": 0.8655472475456883, + "grad_norm": 1.5439034939390694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178470 + }, + { + "epoch": 0.8655957457385244, + "grad_norm": 1.2367922685996291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178480 + }, + { + "epoch": 0.8656442439313605, + "grad_norm": 2.12604351901291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178490 + }, + { + "epoch": 0.8656927421241966, + "grad_norm": 1.4001400039376222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178500 + }, + { + "epoch": 0.8657412403170327, + "grad_norm": 1.627759793620953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178510 + }, + { + "epoch": 0.8657897385098687, + "grad_norm": 1.493020818088553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178520 + }, + { + "epoch": 0.8658382367027049, + "grad_norm": 1.684917450006651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178530 + }, + { + "epoch": 0.8658867348955409, + "grad_norm": 2.6553893306413556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178540 + }, + { + "epoch": 0.865935233088377, + "grad_norm": 1.5040376055708293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178550 + }, + { + "epoch": 0.8659837312812131, + "grad_norm": 1.1799382804156267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178560 + }, + { + "epoch": 0.8660322294740492, + "grad_norm": 1.521045511765351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178570 + }, + { + "epoch": 0.8660807276668853, + "grad_norm": 1.188787290828941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178580 + }, + { + "epoch": 0.8661292258597214, + "grad_norm": 1.9936578610213473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178590 + }, + { + "epoch": 0.8661777240525574, + "grad_norm": 2.1402556171778997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178600 + }, + { + "epoch": 0.8662262222453936, + "grad_norm": 1.4048726626469943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178610 + }, + { + "epoch": 0.8662747204382296, + "grad_norm": 1.5410039466701164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178620 + }, + { + "epoch": 0.8663232186310658, + "grad_norm": 1.5568286215739136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178630 + }, + { + "epoch": 0.8663717168239018, + "grad_norm": 1.8035247961734058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178640 + }, + { + "epoch": 0.8664202150167379, + "grad_norm": 1.5580516432578406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178650 + }, + { + "epoch": 0.8664687132095741, + "grad_norm": 2.0244423026838376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178660 + }, + { + "epoch": 0.8665172114024101, + "grad_norm": 1.33790649670118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178670 + }, + { + "epoch": 0.8665657095952463, + "grad_norm": 1.7808757135640008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178680 + }, + { + "epoch": 0.8666142077880823, + "grad_norm": 1.9002206030904745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178690 + }, + { + "epoch": 0.8666627059809184, + "grad_norm": 1.637341462412678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178700 + }, + { + "epoch": 0.8667112041737545, + "grad_norm": 1.4983553953129558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178710 + }, + { + "epoch": 0.8667597023665906, + "grad_norm": 1.348829758995862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178720 + }, + { + "epoch": 0.8668082005594266, + "grad_norm": 1.4556713168190072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178730 + }, + { + "epoch": 0.8668566987522628, + "grad_norm": 1.9470471457339045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178740 + }, + { + "epoch": 0.8669051969450988, + "grad_norm": 1.640750824094539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178750 + }, + { + "epoch": 0.866953695137935, + "grad_norm": 1.799599225194015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178760 + }, + { + "epoch": 0.867002193330771, + "grad_norm": 1.2692941808722935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178770 + }, + { + "epoch": 0.8670506915236071, + "grad_norm": 1.5763410132763056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178780 + }, + { + "epoch": 0.8670991897164432, + "grad_norm": 1.4658046332272079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178790 + }, + { + "epoch": 0.8671476879092793, + "grad_norm": 1.8254640465897864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178800 + }, + { + "epoch": 0.8671961861021154, + "grad_norm": 1.8454477057616714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178810 + }, + { + "epoch": 0.8672446842949515, + "grad_norm": 1.8238051069374706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178820 + }, + { + "epoch": 0.8672931824877875, + "grad_norm": 2.2185782100336837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178830 + }, + { + "epoch": 0.8673416806806237, + "grad_norm": 1.4129311054489335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178840 + }, + { + "epoch": 0.8673901788734597, + "grad_norm": 1.7450796363505106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178850 + }, + { + "epoch": 0.8674386770662958, + "grad_norm": 1.6542426095611518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178860 + }, + { + "epoch": 0.8674871752591319, + "grad_norm": 1.2111358138611195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178870 + }, + { + "epoch": 0.867535673451968, + "grad_norm": 1.4444335505459094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178880 + }, + { + "epoch": 0.8675841716448041, + "grad_norm": 1.8052610073482356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178890 + }, + { + "epoch": 0.8676326698376402, + "grad_norm": 1.5915091466922604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178900 + }, + { + "epoch": 0.8676811680304762, + "grad_norm": 1.150748563105708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178910 + }, + { + "epoch": 0.8677296662233124, + "grad_norm": 1.5733222724634288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178920 + }, + { + "epoch": 0.8677781644161484, + "grad_norm": 1.2873335286656129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178930 + }, + { + "epoch": 0.8678266626089846, + "grad_norm": 1.5597619196228152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178940 + }, + { + "epoch": 0.8678751608018206, + "grad_norm": 1.641870284174729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178950 + }, + { + "epoch": 0.8679236589946567, + "grad_norm": 1.221515422145103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178960 + }, + { + "epoch": 0.8679721571874928, + "grad_norm": 1.8044284288976087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178970 + }, + { + "epoch": 0.8680206553803289, + "grad_norm": 1.3619264827013922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178980 + }, + { + "epoch": 0.8680691535731649, + "grad_norm": 1.4452080421278879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 178990 + }, + { + "epoch": 0.8681176517660011, + "grad_norm": 1.444763686464512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179000 + }, + { + "epoch": 0.8681661499588371, + "grad_norm": 2.28577530236862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179010 + }, + { + "epoch": 0.8682146481516733, + "grad_norm": 1.4404307080440049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179020 + }, + { + "epoch": 0.8682631463445093, + "grad_norm": 1.8095676068696775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179030 + }, + { + "epoch": 0.8683116445373454, + "grad_norm": 1.9299479347978377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179040 + }, + { + "epoch": 0.8683601427301815, + "grad_norm": 1.819737605046612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179050 + }, + { + "epoch": 0.8684086409230176, + "grad_norm": 1.6789455159482713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179060 + }, + { + "epoch": 0.8684571391158536, + "grad_norm": 1.5297310085315985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179070 + }, + { + "epoch": 0.8685056373086898, + "grad_norm": 1.8928776768234457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179080 + }, + { + "epoch": 0.8685541355015258, + "grad_norm": 1.586125719654774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179090 + }, + { + "epoch": 0.868602633694362, + "grad_norm": 1.5773215622516545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179100 + }, + { + "epoch": 0.868651131887198, + "grad_norm": 1.650282221987709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179110 + }, + { + "epoch": 0.8686996300800341, + "grad_norm": 1.3884615235326692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179120 + }, + { + "epoch": 0.8687481282728702, + "grad_norm": 1.9599394107672197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179130 + }, + { + "epoch": 0.8687966264657063, + "grad_norm": 1.643371838611074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179140 + }, + { + "epoch": 0.8688451246585424, + "grad_norm": 2.1417761786324263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179150 + }, + { + "epoch": 0.8688936228513785, + "grad_norm": 1.3204497939511839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179160 + }, + { + "epoch": 0.8689421210442146, + "grad_norm": 1.3092437356476694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179170 + }, + { + "epoch": 0.8689906192370507, + "grad_norm": 1.8259934009279277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179180 + }, + { + "epoch": 0.8690391174298868, + "grad_norm": 1.4777186585490654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179190 + }, + { + "epoch": 0.8690876156227229, + "grad_norm": 1.700035134888367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179200 + }, + { + "epoch": 0.869136113815559, + "grad_norm": 1.2116855963029138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179210 + }, + { + "epoch": 0.869184612008395, + "grad_norm": 2.2435976632095844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179220 + }, + { + "epoch": 0.8692331102012312, + "grad_norm": 2.1596266108758755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179230 + }, + { + "epoch": 0.8692816083940672, + "grad_norm": 2.492771500328672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179240 + }, + { + "epoch": 0.8693301065869034, + "grad_norm": 2.1589315224446182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179250 + }, + { + "epoch": 0.8693786047797394, + "grad_norm": 1.2191154752372313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179260 + }, + { + "epoch": 0.8694271029725755, + "grad_norm": 1.432024721026437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179270 + }, + { + "epoch": 0.8694756011654116, + "grad_norm": 2.0907126696556588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179280 + }, + { + "epoch": 0.8695240993582477, + "grad_norm": 2.1238454550598362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179290 + }, + { + "epoch": 0.8695725975510837, + "grad_norm": 2.0646012899305788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179300 + }, + { + "epoch": 0.8696210957439199, + "grad_norm": 2.200004090013863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179310 + }, + { + "epoch": 0.8696695939367559, + "grad_norm": 1.788711578853963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179320 + }, + { + "epoch": 0.8697180921295921, + "grad_norm": 1.3953379784936715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179330 + }, + { + "epoch": 0.8697665903224281, + "grad_norm": 1.9416324548160446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179340 + }, + { + "epoch": 0.8698150885152642, + "grad_norm": 1.882285260990102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179350 + }, + { + "epoch": 0.8698635867081003, + "grad_norm": 2.1646322068136215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179360 + }, + { + "epoch": 0.8699120849009364, + "grad_norm": 2.2127640164626428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179370 + }, + { + "epoch": 0.8699605830937724, + "grad_norm": 1.7270801677682357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179380 + }, + { + "epoch": 0.8700090812866086, + "grad_norm": 1.652156811360328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179390 + }, + { + "epoch": 0.8700575794794446, + "grad_norm": 2.1590199850152203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179400 + }, + { + "epoch": 0.8701060776722808, + "grad_norm": 1.642670177659511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179410 + }, + { + "epoch": 0.8701545758651168, + "grad_norm": 1.4642804302411605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179420 + }, + { + "epoch": 0.8702030740579529, + "grad_norm": 1.738070309897921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179430 + }, + { + "epoch": 0.870251572250789, + "grad_norm": 1.3850728564079873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179440 + }, + { + "epoch": 0.8703000704436251, + "grad_norm": 1.650453462787027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179450 + }, + { + "epoch": 0.8703485686364612, + "grad_norm": 1.590227682868317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179460 + }, + { + "epoch": 0.8703970668292973, + "grad_norm": 1.5768382155556537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179470 + }, + { + "epoch": 0.8704455650221333, + "grad_norm": 2.0131437850068323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179480 + }, + { + "epoch": 0.8704940632149695, + "grad_norm": 1.9280387064668503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179490 + }, + { + "epoch": 0.8705425614078055, + "grad_norm": 2.3184485442584446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179500 + }, + { + "epoch": 0.8705910596006416, + "grad_norm": 1.5200013692151515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179510 + }, + { + "epoch": 0.8706395577934777, + "grad_norm": 1.5597134250810996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179520 + }, + { + "epoch": 0.8706880559863138, + "grad_norm": 1.6333485675090742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179530 + }, + { + "epoch": 0.8707365541791499, + "grad_norm": 2.3018744244041045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179540 + }, + { + "epoch": 0.870785052371986, + "grad_norm": 2.9043295768360622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179550 + }, + { + "epoch": 0.870833550564822, + "grad_norm": 2.3065981125114376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179560 + }, + { + "epoch": 0.8708820487576582, + "grad_norm": 1.4852661323061511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179570 + }, + { + "epoch": 0.8709305469504942, + "grad_norm": 1.392629478402796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179580 + }, + { + "epoch": 0.8709790451433304, + "grad_norm": 1.4014919891280897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179590 + }, + { + "epoch": 0.8710275433361664, + "grad_norm": 1.6504962729868566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179600 + }, + { + "epoch": 0.8710760415290025, + "grad_norm": 1.953114292518876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179610 + }, + { + "epoch": 0.8711245397218386, + "grad_norm": 1.4236849033011367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179620 + }, + { + "epoch": 0.8711730379146747, + "grad_norm": 2.2049553294323232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179630 + }, + { + "epoch": 0.8712215361075107, + "grad_norm": 1.6584071005354417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179640 + }, + { + "epoch": 0.8712700343003469, + "grad_norm": 1.4616466259553818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179650 + }, + { + "epoch": 0.8713185324931829, + "grad_norm": 1.2673661231588085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179660 + }, + { + "epoch": 0.8713670306860191, + "grad_norm": 1.3965615330846504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179670 + }, + { + "epoch": 0.8714155288788552, + "grad_norm": 1.7146284392310918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179680 + }, + { + "epoch": 0.8714640270716912, + "grad_norm": 1.792493975472098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179690 + }, + { + "epoch": 0.8715125252645274, + "grad_norm": 1.7410906494319534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179700 + }, + { + "epoch": 0.8715610234573634, + "grad_norm": 1.0826517460316154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179710 + }, + { + "epoch": 0.8716095216501996, + "grad_norm": 1.8562204218142142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179720 + }, + { + "epoch": 0.8716580198430356, + "grad_norm": 1.5909247252920977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179730 + }, + { + "epoch": 0.8717065180358717, + "grad_norm": 2.7215595110874347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179740 + }, + { + "epoch": 0.8717550162287078, + "grad_norm": 1.5425163368831818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179750 + }, + { + "epoch": 0.8718035144215439, + "grad_norm": 1.6202356789563055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179760 + }, + { + "epoch": 0.87185201261438, + "grad_norm": 1.7459141687936608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179770 + }, + { + "epoch": 0.8719005108072161, + "grad_norm": 1.8120312361702418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179780 + }, + { + "epoch": 0.8719490090000521, + "grad_norm": 1.7550203068594783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179790 + }, + { + "epoch": 0.8719975071928883, + "grad_norm": 1.506188418431975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179800 + }, + { + "epoch": 0.8720460053857243, + "grad_norm": 1.723761933192236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179810 + }, + { + "epoch": 0.8720945035785604, + "grad_norm": 1.1887918205388814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179820 + }, + { + "epoch": 0.8721430017713965, + "grad_norm": 1.888768785818229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179830 + }, + { + "epoch": 0.8721914999642326, + "grad_norm": 1.5437557010500313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179840 + }, + { + "epoch": 0.8722399981570687, + "grad_norm": 1.7276560626555693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179850 + }, + { + "epoch": 0.8722884963499048, + "grad_norm": 1.7160450838105135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179860 + }, + { + "epoch": 0.8723369945427408, + "grad_norm": 1.9091480396582483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179870 + }, + { + "epoch": 0.872385492735577, + "grad_norm": 2.1375429426484516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179880 + }, + { + "epoch": 0.872433990928413, + "grad_norm": 1.5268289743630703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179890 + }, + { + "epoch": 0.8724824891212491, + "grad_norm": 1.2486868428140951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179900 + }, + { + "epoch": 0.8725309873140852, + "grad_norm": 1.3748235438981737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179910 + }, + { + "epoch": 0.8725794855069213, + "grad_norm": 1.7101006832831445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179920 + }, + { + "epoch": 0.8726279836997574, + "grad_norm": 3.150894301029439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179930 + }, + { + "epoch": 0.8726764818925935, + "grad_norm": 1.6132052138573272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179940 + }, + { + "epoch": 0.8727249800854295, + "grad_norm": 2.0605385842031865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179950 + }, + { + "epoch": 0.8727734782782657, + "grad_norm": 1.5234068229119657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179960 + }, + { + "epoch": 0.8728219764711017, + "grad_norm": 1.4303545015081909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179970 + }, + { + "epoch": 0.8728704746639379, + "grad_norm": 1.7148988007420485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179980 + }, + { + "epoch": 0.8729189728567739, + "grad_norm": 2.2457479431636784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 179990 + }, + { + "epoch": 0.87296747104961, + "grad_norm": 1.6033760985578738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180000 + }, + { + "epoch": 0.8730159692424461, + "grad_norm": 1.588148279552115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180010 + }, + { + "epoch": 0.8730644674352822, + "grad_norm": 2.1349862322495028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180020 + }, + { + "epoch": 0.8731129656281182, + "grad_norm": 1.3601907156157722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180030 + }, + { + "epoch": 0.8731614638209544, + "grad_norm": 2.0350196194840464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180040 + }, + { + "epoch": 0.8732099620137904, + "grad_norm": 2.0700436920151333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180050 + }, + { + "epoch": 0.8732584602066266, + "grad_norm": 1.6589881468576095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180060 + }, + { + "epoch": 0.8733069583994626, + "grad_norm": 1.503111413114766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180070 + }, + { + "epoch": 0.8733554565922987, + "grad_norm": 1.475394828531762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180080 + }, + { + "epoch": 0.8734039547851348, + "grad_norm": 1.6785298484478517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180090 + }, + { + "epoch": 0.8734524529779709, + "grad_norm": 1.3961440892273913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180100 + }, + { + "epoch": 0.873500951170807, + "grad_norm": 1.7100029836569774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180110 + }, + { + "epoch": 0.8735494493636431, + "grad_norm": 1.567019225490185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180120 + }, + { + "epoch": 0.8735979475564791, + "grad_norm": 1.5824804577846407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180130 + }, + { + "epoch": 0.8736464457493153, + "grad_norm": 1.4976635043240094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180140 + }, + { + "epoch": 0.8736949439421513, + "grad_norm": 1.729842580289187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180150 + }, + { + "epoch": 0.8737434421349874, + "grad_norm": 2.652325292729074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180160 + }, + { + "epoch": 0.8737919403278235, + "grad_norm": 2.1469523048267547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180170 + }, + { + "epoch": 0.8738404385206596, + "grad_norm": 1.4568764861166983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180180 + }, + { + "epoch": 0.8738889367134958, + "grad_norm": 1.4628553479667517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180190 + }, + { + "epoch": 0.8739374349063318, + "grad_norm": 1.4671263315335636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180200 + }, + { + "epoch": 0.8739859330991679, + "grad_norm": 1.4059274633382302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180210 + }, + { + "epoch": 0.874034431292004, + "grad_norm": 1.8512444910356862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180220 + }, + { + "epoch": 0.8740829294848401, + "grad_norm": 1.1748954698020952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180230 + }, + { + "epoch": 0.8741314276776762, + "grad_norm": 1.5991769686252155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180240 + }, + { + "epoch": 0.8741799258705123, + "grad_norm": 2.0086952545739223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180250 + }, + { + "epoch": 0.8742284240633483, + "grad_norm": 2.0384259613592803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180260 + }, + { + "epoch": 0.8742769222561845, + "grad_norm": 2.3121733860875793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180270 + }, + { + "epoch": 0.8743254204490205, + "grad_norm": 1.4605683773538658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180280 + }, + { + "epoch": 0.8743739186418567, + "grad_norm": 1.948091465919788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180290 + }, + { + "epoch": 0.8744224168346927, + "grad_norm": 1.9444463816853386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180300 + }, + { + "epoch": 0.8744709150275288, + "grad_norm": 1.8775388355152245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180310 + }, + { + "epoch": 0.8745194132203649, + "grad_norm": 1.4933117853388467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180320 + }, + { + "epoch": 0.874567911413201, + "grad_norm": 1.3926696240673664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180330 + }, + { + "epoch": 0.874616409606037, + "grad_norm": 2.686434186216502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180340 + }, + { + "epoch": 0.8746649077988732, + "grad_norm": 2.0736660388820383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180350 + }, + { + "epoch": 0.8747134059917092, + "grad_norm": 1.989926978751555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180360 + }, + { + "epoch": 0.8747619041845454, + "grad_norm": 2.0828906599490438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180370 + }, + { + "epoch": 0.8748104023773814, + "grad_norm": 1.585424058703211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180380 + }, + { + "epoch": 0.8748589005702175, + "grad_norm": 1.996979470675342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180390 + }, + { + "epoch": 0.8749073987630536, + "grad_norm": 1.3574822155248967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180400 + }, + { + "epoch": 0.8749558969558897, + "grad_norm": 1.568419705222368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180410 + }, + { + "epoch": 0.8750043951487257, + "grad_norm": 2.1364023439218727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180420 + }, + { + "epoch": 0.8750528933415619, + "grad_norm": 2.0588704074953057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180430 + }, + { + "epoch": 0.8751013915343979, + "grad_norm": 1.544265693098623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180440 + }, + { + "epoch": 0.8751498897272341, + "grad_norm": 2.083307570899251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180450 + }, + { + "epoch": 0.8751983879200701, + "grad_norm": 1.5449463930394813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180460 + }, + { + "epoch": 0.8752468861129062, + "grad_norm": 1.8495438069976444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180470 + }, + { + "epoch": 0.8752953843057423, + "grad_norm": 1.993507758868418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180480 + }, + { + "epoch": 0.8753438824985784, + "grad_norm": 2.0282483248479366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180490 + }, + { + "epoch": 0.8753923806914145, + "grad_norm": 1.4279320836863008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180500 + }, + { + "epoch": 0.8754408788842506, + "grad_norm": 1.5850515566739887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180510 + }, + { + "epoch": 0.8754893770770866, + "grad_norm": 1.4315842733481077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180520 + }, + { + "epoch": 0.8755378752699228, + "grad_norm": 1.3944738697091452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180530 + }, + { + "epoch": 0.8755863734627588, + "grad_norm": 1.4487801180962379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180540 + }, + { + "epoch": 0.875634871655595, + "grad_norm": 1.7437832511291163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180550 + }, + { + "epoch": 0.875683369848431, + "grad_norm": 1.4575986639897565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180560 + }, + { + "epoch": 0.8757318680412671, + "grad_norm": 2.142014210448906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180570 + }, + { + "epoch": 0.8757803662341032, + "grad_norm": 1.3924849717739107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180580 + }, + { + "epoch": 0.8758288644269393, + "grad_norm": 1.903266522162994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180590 + }, + { + "epoch": 0.8758773626197753, + "grad_norm": 2.587902692141597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180600 + }, + { + "epoch": 0.8759258608126115, + "grad_norm": 1.9558529018581794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180610 + }, + { + "epoch": 0.8759743590054475, + "grad_norm": 1.8855294214858986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180620 + }, + { + "epoch": 0.8760228571982837, + "grad_norm": 2.17378257616474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180630 + }, + { + "epoch": 0.8760713553911197, + "grad_norm": 1.740883881495847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180640 + }, + { + "epoch": 0.8761198535839558, + "grad_norm": 1.746882460906818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180650 + }, + { + "epoch": 0.8761683517767919, + "grad_norm": 2.7251138234873906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180660 + }, + { + "epoch": 0.876216849969628, + "grad_norm": 1.3382082997281941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180670 + }, + { + "epoch": 0.876265348162464, + "grad_norm": 1.828708562356951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180680 + }, + { + "epoch": 0.8763138463553002, + "grad_norm": 1.9650023830308783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180690 + }, + { + "epoch": 0.8763623445481363, + "grad_norm": 1.550859352050793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180700 + }, + { + "epoch": 0.8764108427409724, + "grad_norm": 1.6617843101585095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180710 + }, + { + "epoch": 0.8764593409338085, + "grad_norm": 2.687855982230758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180720 + }, + { + "epoch": 0.8765078391266445, + "grad_norm": 1.2482794353729787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180730 + }, + { + "epoch": 0.8765563373194807, + "grad_norm": 2.4378847385264635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180740 + }, + { + "epoch": 0.8766048355123167, + "grad_norm": 1.6670922420303214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180750 + }, + { + "epoch": 0.8766533337051529, + "grad_norm": 1.2954585848490296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180760 + }, + { + "epoch": 0.8767018318979889, + "grad_norm": 1.411679573237734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180770 + }, + { + "epoch": 0.876750330090825, + "grad_norm": 1.5914650930426433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180780 + }, + { + "epoch": 0.8767988282836611, + "grad_norm": 1.8263055068246103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180790 + }, + { + "epoch": 0.8768473264764972, + "grad_norm": 1.4029655659442142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180800 + }, + { + "epoch": 0.8768958246693332, + "grad_norm": 2.2019809975404314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180810 + }, + { + "epoch": 0.8769443228621694, + "grad_norm": 1.7217022474369514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180820 + }, + { + "epoch": 0.8769928210550054, + "grad_norm": 1.524013271136937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180830 + }, + { + "epoch": 0.8770413192478416, + "grad_norm": 1.6243472344967813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180840 + }, + { + "epoch": 0.8770898174406776, + "grad_norm": 2.165945822696358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180850 + }, + { + "epoch": 0.8771383156335137, + "grad_norm": 1.7584723011054848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180860 + }, + { + "epoch": 0.8771868138263498, + "grad_norm": 1.7010496122793484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180870 + }, + { + "epoch": 0.8772353120191859, + "grad_norm": 1.8138065271955384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180880 + }, + { + "epoch": 0.877283810212022, + "grad_norm": 2.0701470759831864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180890 + }, + { + "epoch": 0.8773323084048581, + "grad_norm": 1.8011409252949306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180900 + }, + { + "epoch": 0.8773808065976941, + "grad_norm": 1.454644138476624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180910 + }, + { + "epoch": 0.8774293047905303, + "grad_norm": 1.2131116555735844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180920 + }, + { + "epoch": 0.8774778029833663, + "grad_norm": 1.542373873064662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180930 + }, + { + "epoch": 0.8775263011762025, + "grad_norm": 1.3132515519487242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180940 + }, + { + "epoch": 0.8775747993690385, + "grad_norm": 2.0722559668229223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180950 + }, + { + "epoch": 0.8776232975618746, + "grad_norm": 1.854192355210671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180960 + }, + { + "epoch": 0.8776717957547107, + "grad_norm": 1.8674038315680264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180970 + }, + { + "epoch": 0.8777202939475468, + "grad_norm": 2.0797045863218955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180980 + }, + { + "epoch": 0.8777687921403828, + "grad_norm": 1.8017596303820937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 180990 + }, + { + "epoch": 0.877817290333219, + "grad_norm": 1.565126162006436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181000 + }, + { + "epoch": 0.877865788526055, + "grad_norm": 1.283538964003128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181010 + }, + { + "epoch": 0.8779142867188912, + "grad_norm": 2.504180507401088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181020 + }, + { + "epoch": 0.8779627849117272, + "grad_norm": 1.601731547395957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181030 + }, + { + "epoch": 0.8780112831045633, + "grad_norm": 2.0065449746198283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181040 + }, + { + "epoch": 0.8780597812973994, + "grad_norm": 1.5513139217659955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181050 + }, + { + "epoch": 0.8781082794902355, + "grad_norm": 1.11946558689624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181060 + }, + { + "epoch": 0.8781567776830715, + "grad_norm": 1.4711346807416703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181070 + }, + { + "epoch": 0.8782052758759077, + "grad_norm": 1.567028284910066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181080 + }, + { + "epoch": 0.8782537740687437, + "grad_norm": 1.7946321761996842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181090 + }, + { + "epoch": 0.8783022722615799, + "grad_norm": 2.3338140309192568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181100 + }, + { + "epoch": 0.8783507704544159, + "grad_norm": 1.7459468537595058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181110 + }, + { + "epoch": 0.878399268647252, + "grad_norm": 1.546786343453732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181120 + }, + { + "epoch": 0.8784477668400881, + "grad_norm": 2.18161382292692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181130 + }, + { + "epoch": 0.8784962650329242, + "grad_norm": 1.5408254228077567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181140 + }, + { + "epoch": 0.8785447632257603, + "grad_norm": 1.762099799407224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181150 + }, + { + "epoch": 0.8785932614185964, + "grad_norm": 2.2332169891114972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181160 + }, + { + "epoch": 0.8786417596114324, + "grad_norm": 1.5640758022072987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181170 + }, + { + "epoch": 0.8786902578042686, + "grad_norm": 2.1268448335831636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181180 + }, + { + "epoch": 0.8787387559971046, + "grad_norm": 1.817539896364906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181190 + }, + { + "epoch": 0.8787872541899407, + "grad_norm": 1.703358343263517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181200 + }, + { + "epoch": 0.8788357523827769, + "grad_norm": 2.046018110490877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181210 + }, + { + "epoch": 0.8788842505756129, + "grad_norm": 2.313068314663269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181220 + }, + { + "epoch": 0.8789327487684491, + "grad_norm": 1.1839665248203346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181230 + }, + { + "epoch": 0.8789812469612851, + "grad_norm": 1.5619606941186248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181240 + }, + { + "epoch": 0.8790297451541212, + "grad_norm": 1.7240919802929966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181250 + }, + { + "epoch": 0.8790782433469573, + "grad_norm": 1.5488753390968668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181260 + }, + { + "epoch": 0.8791267415397934, + "grad_norm": 1.8661092227034715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181270 + }, + { + "epoch": 0.8791752397326295, + "grad_norm": 1.4049347463185313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181280 + }, + { + "epoch": 0.8792237379254656, + "grad_norm": 1.1886473139099962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181290 + }, + { + "epoch": 0.8792722361183016, + "grad_norm": 1.4797761238583007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181300 + }, + { + "epoch": 0.8793207343111378, + "grad_norm": 2.6931616048386786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181310 + }, + { + "epoch": 0.8793692325039738, + "grad_norm": 1.7710728883457705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181320 + }, + { + "epoch": 0.87941773069681, + "grad_norm": 1.63653002260844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181330 + }, + { + "epoch": 0.879466228889646, + "grad_norm": 1.4357493860472914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181340 + }, + { + "epoch": 0.8795147270824821, + "grad_norm": 1.643292435460353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181350 + }, + { + "epoch": 0.8795632252753182, + "grad_norm": 2.2317792058856867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181360 + }, + { + "epoch": 0.8796117234681543, + "grad_norm": 1.7107135263927375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181370 + }, + { + "epoch": 0.8796602216609903, + "grad_norm": 1.4448355400986657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181380 + }, + { + "epoch": 0.8797087198538265, + "grad_norm": 1.625200773958113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181390 + }, + { + "epoch": 0.8797572180466625, + "grad_norm": 1.5921044038691434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181400 + }, + { + "epoch": 0.8798057162394987, + "grad_norm": 1.5967332345212526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181410 + }, + { + "epoch": 0.8798542144323347, + "grad_norm": 2.4629528638797638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181420 + }, + { + "epoch": 0.8799027126251708, + "grad_norm": 2.132690291034578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181430 + }, + { + "epoch": 0.8799512108180069, + "grad_norm": 1.4695721084478919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181440 + }, + { + "epoch": 0.879999709010843, + "grad_norm": 1.6464742458310866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181450 + }, + { + "epoch": 0.880048207203679, + "grad_norm": 1.7803056806542372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181460 + }, + { + "epoch": 0.8800967053965152, + "grad_norm": 1.672358962423459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181470 + }, + { + "epoch": 0.8801452035893512, + "grad_norm": 1.866578891451809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181480 + }, + { + "epoch": 0.8801937017821874, + "grad_norm": 1.280604156050913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181490 + }, + { + "epoch": 0.8802421999750234, + "grad_norm": 1.640990987539226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181500 + }, + { + "epoch": 0.8802906981678595, + "grad_norm": 1.920057002280373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181510 + }, + { + "epoch": 0.8803391963606956, + "grad_norm": 1.3218025785022292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181520 + }, + { + "epoch": 0.8803876945535317, + "grad_norm": 1.8093100351279645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181530 + }, + { + "epoch": 0.8804361927463678, + "grad_norm": 1.8363369491680714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181540 + }, + { + "epoch": 0.8804846909392039, + "grad_norm": 1.6285763848600254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181550 + }, + { + "epoch": 0.8805331891320399, + "grad_norm": 2.1325371690750217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181560 + }, + { + "epoch": 0.8805816873248761, + "grad_norm": 2.1351056034291105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181570 + }, + { + "epoch": 0.8806301855177121, + "grad_norm": 1.8915459421009473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181580 + }, + { + "epoch": 0.8806786837105482, + "grad_norm": 1.7534109275629817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181590 + }, + { + "epoch": 0.8807271819033843, + "grad_norm": 1.787661041419142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181600 + }, + { + "epoch": 0.8807756800962204, + "grad_norm": 1.4513896751111588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181610 + }, + { + "epoch": 0.8808241782890565, + "grad_norm": 1.889156209244902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181620 + }, + { + "epoch": 0.8808726764818926, + "grad_norm": 1.362806667515315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181630 + }, + { + "epoch": 0.8809211746747286, + "grad_norm": 1.6280006676083758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181640 + }, + { + "epoch": 0.8809696728675648, + "grad_norm": 1.2598735388280602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181650 + }, + { + "epoch": 0.8810181710604008, + "grad_norm": 1.5961031607503173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181660 + }, + { + "epoch": 0.881066669253237, + "grad_norm": 1.4614317755956563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181670 + }, + { + "epoch": 0.881115167446073, + "grad_norm": 1.602007415613116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181680 + }, + { + "epoch": 0.8811636656389091, + "grad_norm": 1.4681309501440865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181690 + }, + { + "epoch": 0.8812121638317452, + "grad_norm": 1.6178429262936334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181700 + }, + { + "epoch": 0.8812606620245813, + "grad_norm": 1.51101993139946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181710 + }, + { + "epoch": 0.8813091602174175, + "grad_norm": 1.9580227217375068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181720 + }, + { + "epoch": 0.8813576584102535, + "grad_norm": 2.000349397235368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181730 + }, + { + "epoch": 0.8814061566030896, + "grad_norm": 1.8041141913727188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181740 + }, + { + "epoch": 0.8814546547959257, + "grad_norm": 1.783461023308064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181750 + }, + { + "epoch": 0.8815031529887618, + "grad_norm": 2.0404341327662223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181760 + }, + { + "epoch": 0.8815516511815978, + "grad_norm": 1.4189197372616036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181770 + }, + { + "epoch": 0.881600149374434, + "grad_norm": 2.0427441072001784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181780 + }, + { + "epoch": 0.88164864756727, + "grad_norm": 1.2030569429555271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181790 + }, + { + "epoch": 0.8816971457601062, + "grad_norm": 2.2404931243613646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181800 + }, + { + "epoch": 0.8817456439529422, + "grad_norm": 1.2781966596264738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181810 + }, + { + "epoch": 0.8817941421457783, + "grad_norm": 2.0051960092359877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181820 + }, + { + "epoch": 0.8818426403386144, + "grad_norm": 1.2384798075970593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181830 + }, + { + "epoch": 0.8818911385314505, + "grad_norm": 1.658936810144951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181840 + }, + { + "epoch": 0.8819396367242865, + "grad_norm": 1.9239180915064935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181850 + }, + { + "epoch": 0.8819881349171227, + "grad_norm": 1.5615295723137024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181860 + }, + { + "epoch": 0.8820366331099587, + "grad_norm": 1.6533554969555553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181870 + }, + { + "epoch": 0.8820851313027949, + "grad_norm": 1.511386393815428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181880 + }, + { + "epoch": 0.8821336294956309, + "grad_norm": 1.4430286299216277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181890 + }, + { + "epoch": 0.882182127688467, + "grad_norm": 2.0607251016713235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181900 + }, + { + "epoch": 0.8822306258813031, + "grad_norm": 2.0702238145986485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181910 + }, + { + "epoch": 0.8822791240741392, + "grad_norm": 2.053943859436913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181920 + }, + { + "epoch": 0.8823276222669753, + "grad_norm": 1.833112683868876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181930 + }, + { + "epoch": 0.8823761204598114, + "grad_norm": 1.5283823984191258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181940 + }, + { + "epoch": 0.8824246186526474, + "grad_norm": 1.510015223971095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181950 + }, + { + "epoch": 0.8824731168454836, + "grad_norm": 1.4063888720272644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181960 + }, + { + "epoch": 0.8825216150383196, + "grad_norm": 1.5909252581991495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181970 + }, + { + "epoch": 0.8825701132311558, + "grad_norm": 1.6171281203014587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181980 + }, + { + "epoch": 0.8826186114239918, + "grad_norm": 1.225830992268584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 181990 + }, + { + "epoch": 0.8826671096168279, + "grad_norm": 1.6350750087212873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182000 + }, + { + "epoch": 0.882715607809664, + "grad_norm": 2.0746300677387808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182010 + }, + { + "epoch": 0.8827641060025001, + "grad_norm": 1.3237735352333857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182020 + }, + { + "epoch": 0.8828126041953361, + "grad_norm": 1.8258853984320922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182030 + }, + { + "epoch": 0.8828611023881723, + "grad_norm": 1.3793967745812097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182040 + }, + { + "epoch": 0.8829096005810083, + "grad_norm": 1.7879731473158245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182050 + }, + { + "epoch": 0.8829580987738445, + "grad_norm": 1.3971301449089424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182060 + }, + { + "epoch": 0.8830065969666805, + "grad_norm": 1.239111657724834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182070 + }, + { + "epoch": 0.8830550951595166, + "grad_norm": 1.7553038134110466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182080 + }, + { + "epoch": 0.8831035933523527, + "grad_norm": 1.2661100612376686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182090 + }, + { + "epoch": 0.8831520915451888, + "grad_norm": 1.616303713092293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182100 + }, + { + "epoch": 0.8832005897380248, + "grad_norm": 1.889690004475142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182110 + }, + { + "epoch": 0.883249087930861, + "grad_norm": 1.4683815940941258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182120 + }, + { + "epoch": 0.883297586123697, + "grad_norm": 1.5359546523541212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182130 + }, + { + "epoch": 0.8833460843165332, + "grad_norm": 1.412799388589292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182140 + }, + { + "epoch": 0.8833945825093692, + "grad_norm": 2.227305806457025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182150 + }, + { + "epoch": 0.8834430807022053, + "grad_norm": 1.6971306138202635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182160 + }, + { + "epoch": 0.8834915788950414, + "grad_norm": 2.176587621249837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182170 + }, + { + "epoch": 0.8835400770878775, + "grad_norm": 1.5257972663107466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182180 + }, + { + "epoch": 0.8835885752807136, + "grad_norm": 1.4428980676939318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182190 + }, + { + "epoch": 0.8836370734735497, + "grad_norm": 1.8790988320915858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182200 + }, + { + "epoch": 0.8836855716663857, + "grad_norm": 2.10202948380811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182210 + }, + { + "epoch": 0.8837340698592219, + "grad_norm": 1.446587827302892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182220 + }, + { + "epoch": 0.883782568052058, + "grad_norm": 1.690992412761716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182230 + }, + { + "epoch": 0.883831066244894, + "grad_norm": 1.6430542260081893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182240 + }, + { + "epoch": 0.8838795644377302, + "grad_norm": 2.063821646913766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182250 + }, + { + "epoch": 0.8839280626305662, + "grad_norm": 1.724307452377616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182260 + }, + { + "epoch": 0.8839765608234024, + "grad_norm": 1.5951131970837196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182270 + }, + { + "epoch": 0.8840250590162384, + "grad_norm": 1.1568520363880452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182280 + }, + { + "epoch": 0.8840735572090745, + "grad_norm": 1.6548039383224022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182290 + }, + { + "epoch": 0.8841220554019106, + "grad_norm": 2.183849723280673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182300 + }, + { + "epoch": 0.8841705535947467, + "grad_norm": 1.497895851798603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182310 + }, + { + "epoch": 0.8842190517875828, + "grad_norm": 2.007490884636809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182320 + }, + { + "epoch": 0.8842675499804189, + "grad_norm": 1.0579420006706641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182330 + }, + { + "epoch": 0.8843160481732549, + "grad_norm": 1.4628199984656476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182340 + }, + { + "epoch": 0.8843645463660911, + "grad_norm": 1.4580660234742027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182350 + }, + { + "epoch": 0.8844130445589271, + "grad_norm": 1.1141308320361532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182360 + }, + { + "epoch": 0.8844615427517633, + "grad_norm": 1.6098402610964513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182370 + }, + { + "epoch": 0.8845100409445993, + "grad_norm": 1.6023218307736897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182380 + }, + { + "epoch": 0.8845585391374354, + "grad_norm": 2.0774027831294006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182390 + }, + { + "epoch": 0.8846070373302715, + "grad_norm": 1.3692472933257704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182400 + }, + { + "epoch": 0.8846555355231076, + "grad_norm": 1.4685095806044046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182410 + }, + { + "epoch": 0.8847040337159436, + "grad_norm": 1.6908753508459995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182420 + }, + { + "epoch": 0.8847525319087798, + "grad_norm": 1.427543239174156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182430 + }, + { + "epoch": 0.8848010301016158, + "grad_norm": 2.1982634379469346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182440 + }, + { + "epoch": 0.884849528294452, + "grad_norm": 1.899692136930753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182450 + }, + { + "epoch": 0.884898026487288, + "grad_norm": 1.690012396693419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182460 + }, + { + "epoch": 0.8849465246801241, + "grad_norm": 1.646396086130153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182470 + }, + { + "epoch": 0.8849950228729602, + "grad_norm": 1.2278502659057722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182480 + }, + { + "epoch": 0.8850435210657963, + "grad_norm": 1.8898774101216986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182490 + }, + { + "epoch": 0.8850920192586323, + "grad_norm": 1.9846511989385363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182500 + }, + { + "epoch": 0.8851405174514685, + "grad_norm": 2.2229631468917432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182510 + }, + { + "epoch": 0.8851890156443045, + "grad_norm": 2.058579440245012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182520 + }, + { + "epoch": 0.8852375138371407, + "grad_norm": 2.1797497140596533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182530 + }, + { + "epoch": 0.8852860120299767, + "grad_norm": 1.670286309263247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182540 + }, + { + "epoch": 0.8853345102228128, + "grad_norm": 1.7686444309106264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182550 + }, + { + "epoch": 0.8853830084156489, + "grad_norm": 2.090036410606899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182560 + }, + { + "epoch": 0.885431506608485, + "grad_norm": 2.595710313357813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182570 + }, + { + "epoch": 0.885480004801321, + "grad_norm": 1.6503332034289997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182580 + }, + { + "epoch": 0.8855285029941572, + "grad_norm": 1.8762031928076794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182590 + }, + { + "epoch": 0.8855770011869932, + "grad_norm": 1.754754919147672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182600 + }, + { + "epoch": 0.8856254993798294, + "grad_norm": 1.6142641001692937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182610 + }, + { + "epoch": 0.8856739975726654, + "grad_norm": 1.6792716550639852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182620 + }, + { + "epoch": 0.8857224957655015, + "grad_norm": 1.9429032604989516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182630 + }, + { + "epoch": 0.8857709939583376, + "grad_norm": 1.6717066841920314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182640 + }, + { + "epoch": 0.8858194921511737, + "grad_norm": 1.9019271491060863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182650 + }, + { + "epoch": 0.8858679903440098, + "grad_norm": 1.9134747120119755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182660 + }, + { + "epoch": 0.8859164885368459, + "grad_norm": 2.037893587214512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182670 + }, + { + "epoch": 0.8859649867296819, + "grad_norm": 1.993993947735362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182680 + }, + { + "epoch": 0.8860134849225181, + "grad_norm": 1.60934607862373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182690 + }, + { + "epoch": 0.8860619831153541, + "grad_norm": 2.2372656616198583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182700 + }, + { + "epoch": 0.8861104813081903, + "grad_norm": 2.241472785158294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182710 + }, + { + "epoch": 0.8861589795010263, + "grad_norm": 1.6337770247787375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182720 + }, + { + "epoch": 0.8862074776938624, + "grad_norm": 2.1620552459467035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182730 + }, + { + "epoch": 0.8862559758866986, + "grad_norm": 1.5296397037900533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182740 + }, + { + "epoch": 0.8863044740795346, + "grad_norm": 2.4418264743530926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182750 + }, + { + "epoch": 0.8863529722723708, + "grad_norm": 1.4225768119047189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182760 + }, + { + "epoch": 0.8864014704652068, + "grad_norm": 1.140061911542034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182770 + }, + { + "epoch": 0.8864499686580429, + "grad_norm": 1.8658989020536865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182780 + }, + { + "epoch": 0.886498466850879, + "grad_norm": 1.5759287208538808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182790 + }, + { + "epoch": 0.8865469650437151, + "grad_norm": 1.869312171720594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182800 + }, + { + "epoch": 0.8865954632365511, + "grad_norm": 1.3563817624628882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182810 + }, + { + "epoch": 0.8866439614293873, + "grad_norm": 2.590130065982521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182820 + }, + { + "epoch": 0.8866924596222233, + "grad_norm": 1.7637749039067785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182830 + }, + { + "epoch": 0.8867409578150595, + "grad_norm": 2.022365386267211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182840 + }, + { + "epoch": 0.8867894560078955, + "grad_norm": 1.8649725319619392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182850 + }, + { + "epoch": 0.8868379542007316, + "grad_norm": 1.6684071013628454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182860 + }, + { + "epoch": 0.8868864523935677, + "grad_norm": 1.232006141549391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182870 + }, + { + "epoch": 0.8869349505864038, + "grad_norm": 2.48732199281676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182880 + }, + { + "epoch": 0.8869834487792398, + "grad_norm": 1.7231927884608922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182890 + }, + { + "epoch": 0.887031946972076, + "grad_norm": 1.5842672951293935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182900 + }, + { + "epoch": 0.887080445164912, + "grad_norm": 1.531246240915607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182910 + }, + { + "epoch": 0.8871289433577482, + "grad_norm": 1.425067441829242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182920 + }, + { + "epoch": 0.8871774415505842, + "grad_norm": 1.8547826385884036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182930 + }, + { + "epoch": 0.8872259397434203, + "grad_norm": 1.7299591092978517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182940 + }, + { + "epoch": 0.8872744379362564, + "grad_norm": 1.9460342670640784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182950 + }, + { + "epoch": 0.8873229361290925, + "grad_norm": 1.5142649800736763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182960 + }, + { + "epoch": 0.8873714343219286, + "grad_norm": 2.8315971789538708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182970 + }, + { + "epoch": 0.8874199325147647, + "grad_norm": 1.8659779499330398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182980 + }, + { + "epoch": 0.8874684307076007, + "grad_norm": 2.484310002159873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 182990 + }, + { + "epoch": 0.8875169289004369, + "grad_norm": 1.9106543902580597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183000 + }, + { + "epoch": 0.8875654270932729, + "grad_norm": 2.665946929880647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183010 + }, + { + "epoch": 0.887613925286109, + "grad_norm": 1.3578404178815617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183020 + }, + { + "epoch": 0.8876624234789451, + "grad_norm": 1.625227774582072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183030 + }, + { + "epoch": 0.8877109216717812, + "grad_norm": 1.1959787826754109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183040 + }, + { + "epoch": 0.8877594198646173, + "grad_norm": 1.2820112971212438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183050 + }, + { + "epoch": 0.8878079180574534, + "grad_norm": 1.3730104164721979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183060 + }, + { + "epoch": 0.8878564162502894, + "grad_norm": 1.6509492439809037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183070 + }, + { + "epoch": 0.8879049144431256, + "grad_norm": 1.9285614882846858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183080 + }, + { + "epoch": 0.8879534126359616, + "grad_norm": 1.896401613521448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183090 + }, + { + "epoch": 0.8880019108287978, + "grad_norm": 1.8374445076574375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183100 + }, + { + "epoch": 0.8880504090216338, + "grad_norm": 1.2293505768923296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183110 + }, + { + "epoch": 0.8880989072144699, + "grad_norm": 1.7932539009279935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183120 + }, + { + "epoch": 0.888147405407306, + "grad_norm": 1.6591069851301654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183130 + }, + { + "epoch": 0.8881959036001421, + "grad_norm": 1.5557509058794494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183140 + }, + { + "epoch": 0.8882444017929781, + "grad_norm": 1.360668289152045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183150 + }, + { + "epoch": 0.8882928999858143, + "grad_norm": 1.8521493672096767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183160 + }, + { + "epoch": 0.8883413981786503, + "grad_norm": 2.3085174660764096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183170 + }, + { + "epoch": 0.8883898963714865, + "grad_norm": 1.7705984234339667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183180 + }, + { + "epoch": 0.8884383945643225, + "grad_norm": 1.8733068429810373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183190 + }, + { + "epoch": 0.8884868927571586, + "grad_norm": 1.6823832993395627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183200 + }, + { + "epoch": 0.8885353909499947, + "grad_norm": 2.1915647963055562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183210 + }, + { + "epoch": 0.8885838891428308, + "grad_norm": 1.482873468461321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183220 + }, + { + "epoch": 0.8886323873356669, + "grad_norm": 2.1173672593022275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183230 + }, + { + "epoch": 0.888680885528503, + "grad_norm": 2.5092759869949077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183240 + }, + { + "epoch": 0.8887293837213391, + "grad_norm": 1.9241840121253517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183250 + }, + { + "epoch": 0.8887778819141752, + "grad_norm": 1.4755482169448442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183260 + }, + { + "epoch": 0.8888263801070113, + "grad_norm": 1.625876322464137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183270 + }, + { + "epoch": 0.8888748782998473, + "grad_norm": 1.0768342662004216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183280 + }, + { + "epoch": 0.8889233764926835, + "grad_norm": 1.8518559130598078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183290 + }, + { + "epoch": 0.8889718746855195, + "grad_norm": 1.590251130778597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183300 + }, + { + "epoch": 0.8890203728783557, + "grad_norm": 1.7566778254263227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183310 + }, + { + "epoch": 0.8890688710711917, + "grad_norm": 1.123813486714198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183320 + }, + { + "epoch": 0.8891173692640278, + "grad_norm": 1.354824696875312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183330 + }, + { + "epoch": 0.8891658674568639, + "grad_norm": 1.4347007137871515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183340 + }, + { + "epoch": 0.8892143656497, + "grad_norm": 1.2989461950496661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183350 + }, + { + "epoch": 0.889262863842536, + "grad_norm": 1.202189547910848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183360 + }, + { + "epoch": 0.8893113620353722, + "grad_norm": 1.9956505781237865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183370 + }, + { + "epoch": 0.8893598602282082, + "grad_norm": 1.1179625225565815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183380 + }, + { + "epoch": 0.8894083584210444, + "grad_norm": 2.152034106472911e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183390 + }, + { + "epoch": 0.8894568566138804, + "grad_norm": 1.7651844430588426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183400 + }, + { + "epoch": 0.8895053548067166, + "grad_norm": 1.414052253068121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183410 + }, + { + "epoch": 0.8895538529995526, + "grad_norm": 1.4815954685332144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183420 + }, + { + "epoch": 0.8896023511923887, + "grad_norm": 1.4849384832871237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183430 + }, + { + "epoch": 0.8896508493852248, + "grad_norm": 1.7677230346180295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183440 + }, + { + "epoch": 0.8896993475780609, + "grad_norm": 1.695298479376106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183450 + }, + { + "epoch": 0.8897478457708969, + "grad_norm": 2.796418385742072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183460 + }, + { + "epoch": 0.8897963439637331, + "grad_norm": 1.1909290442702058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183470 + }, + { + "epoch": 0.8898448421565691, + "grad_norm": 1.643114799776413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183480 + }, + { + "epoch": 0.8898933403494053, + "grad_norm": 1.4021168226463487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183490 + }, + { + "epoch": 0.8899418385422413, + "grad_norm": 1.3480615734806634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183500 + }, + { + "epoch": 0.8899903367350774, + "grad_norm": 2.658601871985411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183510 + }, + { + "epoch": 0.8900388349279135, + "grad_norm": 1.67560241237652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183520 + }, + { + "epoch": 0.8900873331207496, + "grad_norm": 1.43451535095096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183530 + }, + { + "epoch": 0.8901358313135856, + "grad_norm": 1.4377652846064848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183540 + }, + { + "epoch": 0.8901843295064218, + "grad_norm": 1.4617187460430614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183550 + }, + { + "epoch": 0.8902328276992578, + "grad_norm": 2.8397604268093346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183560 + }, + { + "epoch": 0.890281325892094, + "grad_norm": 1.0306257181014189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183570 + }, + { + "epoch": 0.89032982408493, + "grad_norm": 1.584315789671109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183580 + }, + { + "epoch": 0.8903783222777661, + "grad_norm": 1.6808346714469735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183590 + }, + { + "epoch": 0.8904268204706022, + "grad_norm": 1.6655551604571883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183600 + }, + { + "epoch": 0.8904753186634383, + "grad_norm": 1.7928684314938437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183610 + }, + { + "epoch": 0.8905238168562744, + "grad_norm": 1.411747696522525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183620 + }, + { + "epoch": 0.8905723150491105, + "grad_norm": 1.3274344290437057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183630 + }, + { + "epoch": 0.8906208132419465, + "grad_norm": 1.6172410965964445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183640 + }, + { + "epoch": 0.8906693114347827, + "grad_norm": 2.2003725064223545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183650 + }, + { + "epoch": 0.8907178096276187, + "grad_norm": 1.5839392020211562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183660 + }, + { + "epoch": 0.8907663078204549, + "grad_norm": 1.9562286013297125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183670 + }, + { + "epoch": 0.8908148060132909, + "grad_norm": 2.0945604362054837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183680 + }, + { + "epoch": 0.890863304206127, + "grad_norm": 1.4511439161424278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183690 + }, + { + "epoch": 0.8909118023989631, + "grad_norm": 1.500493418404858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183700 + }, + { + "epoch": 0.8909603005917992, + "grad_norm": 1.544230698868887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183710 + }, + { + "epoch": 0.8910087987846352, + "grad_norm": 1.8080321240177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183720 + }, + { + "epoch": 0.8910572969774714, + "grad_norm": 1.5167339384447587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183730 + }, + { + "epoch": 0.8911057951703074, + "grad_norm": 1.3952099919833927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183740 + }, + { + "epoch": 0.8911542933631436, + "grad_norm": 1.9386238392371524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183750 + }, + { + "epoch": 0.8912027915559796, + "grad_norm": 1.523213200016471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183760 + }, + { + "epoch": 0.8912512897488157, + "grad_norm": 1.3501678886029822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183770 + }, + { + "epoch": 0.8912997879416519, + "grad_norm": 1.2883307753952522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183780 + }, + { + "epoch": 0.8913482861344879, + "grad_norm": 1.8469666684950425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183790 + }, + { + "epoch": 0.891396784327324, + "grad_norm": 1.898447443693385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183800 + }, + { + "epoch": 0.8914452825201601, + "grad_norm": 1.3326545200698092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183810 + }, + { + "epoch": 0.8914937807129962, + "grad_norm": 1.2543594607450359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183820 + }, + { + "epoch": 0.8915422789058323, + "grad_norm": 1.8538266033374384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183830 + }, + { + "epoch": 0.8915907770986684, + "grad_norm": 1.2712141561621593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183840 + }, + { + "epoch": 0.8916392752915044, + "grad_norm": 1.8527099854281914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183850 + }, + { + "epoch": 0.8916877734843406, + "grad_norm": 2.405584353937229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183860 + }, + { + "epoch": 0.8917362716771766, + "grad_norm": 1.6145845549431215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183870 + }, + { + "epoch": 0.8917847698700128, + "grad_norm": 1.5059081093227178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183880 + }, + { + "epoch": 0.8918332680628488, + "grad_norm": 1.2639609359155202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183890 + }, + { + "epoch": 0.8918817662556849, + "grad_norm": 2.391871412044111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183900 + }, + { + "epoch": 0.891930264448521, + "grad_norm": 1.8949995350681093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183910 + }, + { + "epoch": 0.8919787626413571, + "grad_norm": 1.6620512965914713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183920 + }, + { + "epoch": 0.8920272608341931, + "grad_norm": 2.4302067913595238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183930 + }, + { + "epoch": 0.8920757590270293, + "grad_norm": 1.7328686041651054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183940 + }, + { + "epoch": 0.8921242572198653, + "grad_norm": 1.9437683462797395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183950 + }, + { + "epoch": 0.8921727554127015, + "grad_norm": 1.4305900464250954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183960 + }, + { + "epoch": 0.8922212536055375, + "grad_norm": 1.5792430474448338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183970 + }, + { + "epoch": 0.8922697517983736, + "grad_norm": 2.3245849689601528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183980 + }, + { + "epoch": 0.8923182499912097, + "grad_norm": 1.94094695871172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 183990 + }, + { + "epoch": 0.8923667481840458, + "grad_norm": 1.7890599224301695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184000 + }, + { + "epoch": 0.8924152463768819, + "grad_norm": 1.8041038885030503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184010 + }, + { + "epoch": 0.892463744569718, + "grad_norm": 2.2794690579530652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184020 + }, + { + "epoch": 0.892512242762554, + "grad_norm": 1.6042545070149572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184030 + }, + { + "epoch": 0.8925607409553902, + "grad_norm": 1.770387214605762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184040 + }, + { + "epoch": 0.8926092391482262, + "grad_norm": 1.6465808272414506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184050 + }, + { + "epoch": 0.8926577373410624, + "grad_norm": 1.635451951642608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184060 + }, + { + "epoch": 0.8927062355338984, + "grad_norm": 1.2957190875795277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184070 + }, + { + "epoch": 0.8927547337267345, + "grad_norm": 1.3417698951911916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184080 + }, + { + "epoch": 0.8928032319195706, + "grad_norm": 1.596015763993819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184090 + }, + { + "epoch": 0.8928517301124067, + "grad_norm": 1.518462511285179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184100 + }, + { + "epoch": 0.8929002283052427, + "grad_norm": 1.6409645198223188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184110 + }, + { + "epoch": 0.8929487264980789, + "grad_norm": 1.3214403793426754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184120 + }, + { + "epoch": 0.8929972246909149, + "grad_norm": 1.7410103581028125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184130 + }, + { + "epoch": 0.8930457228837511, + "grad_norm": 1.318509479375507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184140 + }, + { + "epoch": 0.8930942210765871, + "grad_norm": 2.3362307643992608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184150 + }, + { + "epoch": 0.8931427192694232, + "grad_norm": 1.6706012573308726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184160 + }, + { + "epoch": 0.8931912174622593, + "grad_norm": 1.3141744581446346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184170 + }, + { + "epoch": 0.8932397156550954, + "grad_norm": 1.322111131685233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184180 + }, + { + "epoch": 0.8932882138479314, + "grad_norm": 2.0524311139524798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184190 + }, + { + "epoch": 0.8933367120407676, + "grad_norm": 1.708846575354528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184200 + }, + { + "epoch": 0.8933852102336036, + "grad_norm": 1.4114683644095294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184210 + }, + { + "epoch": 0.8934337084264398, + "grad_norm": 1.4788254176778537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184220 + }, + { + "epoch": 0.8934822066192758, + "grad_norm": 2.0625131824658638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184230 + }, + { + "epoch": 0.8935307048121119, + "grad_norm": 1.568290031173092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184240 + }, + { + "epoch": 0.893579203004948, + "grad_norm": 1.5137318953861723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184250 + }, + { + "epoch": 0.8936277011977841, + "grad_norm": 1.6772833788536445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184260 + }, + { + "epoch": 0.8936761993906202, + "grad_norm": 2.048494529560685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184270 + }, + { + "epoch": 0.8937246975834563, + "grad_norm": 2.5304771611445176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184280 + }, + { + "epoch": 0.8937731957762924, + "grad_norm": 2.7032651672698194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184290 + }, + { + "epoch": 0.8938216939691285, + "grad_norm": 1.564320584179768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184300 + }, + { + "epoch": 0.8938701921619646, + "grad_norm": 1.9375077542349572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184310 + }, + { + "epoch": 0.8939186903548006, + "grad_norm": 1.0091909530274279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184320 + }, + { + "epoch": 0.8939671885476368, + "grad_norm": 1.9923716010339376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184330 + }, + { + "epoch": 0.8940156867404728, + "grad_norm": 1.5682397602745368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184340 + }, + { + "epoch": 0.894064184933309, + "grad_norm": 1.6084708676089576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184350 + }, + { + "epoch": 0.894112683126145, + "grad_norm": 1.3156519429458058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184360 + }, + { + "epoch": 0.8941611813189811, + "grad_norm": 1.5668584651962192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184370 + }, + { + "epoch": 0.8942096795118172, + "grad_norm": 1.5941806097430344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184380 + }, + { + "epoch": 0.8942581777046533, + "grad_norm": 1.2497074486361726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184390 + }, + { + "epoch": 0.8943066758974894, + "grad_norm": 1.935154081422752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184400 + }, + { + "epoch": 0.8943551740903255, + "grad_norm": 1.698225027269018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184410 + }, + { + "epoch": 0.8944036722831615, + "grad_norm": 2.7221966902857275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184420 + }, + { + "epoch": 0.8944521704759977, + "grad_norm": 1.8827869041615486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184430 + }, + { + "epoch": 0.8945006686688337, + "grad_norm": 1.7517233885655514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184440 + }, + { + "epoch": 0.8945491668616699, + "grad_norm": 1.4192311326155505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184450 + }, + { + "epoch": 0.8945976650545059, + "grad_norm": 1.7141076114057796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184460 + }, + { + "epoch": 0.894646163247342, + "grad_norm": 1.6097891020194766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184470 + }, + { + "epoch": 0.8946946614401781, + "grad_norm": 1.4133033410246298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184480 + }, + { + "epoch": 0.8947431596330142, + "grad_norm": 1.521051196107237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184490 + }, + { + "epoch": 0.8947916578258502, + "grad_norm": 1.4235099321524558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184500 + }, + { + "epoch": 0.8948401560186864, + "grad_norm": 1.5353350590885384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184510 + }, + { + "epoch": 0.8948886542115224, + "grad_norm": 2.433668733203831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184520 + }, + { + "epoch": 0.8949371524043586, + "grad_norm": 1.0501446823241167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184530 + }, + { + "epoch": 0.8949856505971946, + "grad_norm": 1.6339424036004857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184540 + }, + { + "epoch": 0.8950341487900307, + "grad_norm": 1.6593194374081577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184550 + }, + { + "epoch": 0.8950826469828668, + "grad_norm": 1.443913344445491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184560 + }, + { + "epoch": 0.8951311451757029, + "grad_norm": 2.159900525100511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184570 + }, + { + "epoch": 0.895179643368539, + "grad_norm": 1.4935524816905854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184580 + }, + { + "epoch": 0.8952281415613751, + "grad_norm": 2.069612392574527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184590 + }, + { + "epoch": 0.8952766397542111, + "grad_norm": 1.9317875299407206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184600 + }, + { + "epoch": 0.8953251379470473, + "grad_norm": 2.1057900312371203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184610 + }, + { + "epoch": 0.8953736361398833, + "grad_norm": 2.284925670892335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184620 + }, + { + "epoch": 0.8954221343327194, + "grad_norm": 2.1524495963376467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184630 + }, + { + "epoch": 0.8954706325255555, + "grad_norm": 1.9719166743925598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184640 + }, + { + "epoch": 0.8955191307183916, + "grad_norm": 1.6966559712727758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184650 + }, + { + "epoch": 0.8955676289112277, + "grad_norm": 1.1732469218372898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184660 + }, + { + "epoch": 0.8956161271040638, + "grad_norm": 1.3416212141237338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184670 + }, + { + "epoch": 0.8956646252968998, + "grad_norm": 1.9643294990601134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184680 + }, + { + "epoch": 0.895713123489736, + "grad_norm": 1.6026305615923775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184690 + }, + { + "epoch": 0.895761621682572, + "grad_norm": 1.4929607772273812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184700 + }, + { + "epoch": 0.8958101198754082, + "grad_norm": 9.674410605953199e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184710 + }, + { + "epoch": 0.8958586180682442, + "grad_norm": 1.3008414789794642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184720 + }, + { + "epoch": 0.8959071162610803, + "grad_norm": 1.5424554078435904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184730 + }, + { + "epoch": 0.8959556144539164, + "grad_norm": 1.313545450187803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184740 + }, + { + "epoch": 0.8960041126467525, + "grad_norm": 1.873944732722066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184750 + }, + { + "epoch": 0.8960526108395885, + "grad_norm": 1.5784328510903833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184760 + }, + { + "epoch": 0.8961011090324247, + "grad_norm": 1.4744368392882734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184770 + }, + { + "epoch": 0.8961496072252607, + "grad_norm": 1.7811261798783562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184780 + }, + { + "epoch": 0.8961981054180969, + "grad_norm": 1.6494247745413304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184790 + }, + { + "epoch": 0.896246603610933, + "grad_norm": 1.4732538744510748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184800 + }, + { + "epoch": 0.896295101803769, + "grad_norm": 1.640057512020121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184810 + }, + { + "epoch": 0.8963435999966052, + "grad_norm": 2.3054342435102626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184820 + }, + { + "epoch": 0.8963920981894412, + "grad_norm": 1.3559873224266994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184830 + }, + { + "epoch": 0.8964405963822774, + "grad_norm": 1.5961830968080903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184840 + }, + { + "epoch": 0.8964890945751134, + "grad_norm": 2.2124103438159182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184850 + }, + { + "epoch": 0.8965375927679495, + "grad_norm": 1.2628294854266642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184860 + }, + { + "epoch": 0.8965860909607856, + "grad_norm": 1.0111794068734525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184870 + }, + { + "epoch": 0.8966345891536217, + "grad_norm": 1.5461624869317347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184880 + }, + { + "epoch": 0.8966830873464577, + "grad_norm": 1.9654224914233964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184890 + }, + { + "epoch": 0.8967315855392939, + "grad_norm": 1.3181140623430565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184900 + }, + { + "epoch": 0.8967800837321299, + "grad_norm": 1.391414716778172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184910 + }, + { + "epoch": 0.8968285819249661, + "grad_norm": 3.905556056338355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184920 + }, + { + "epoch": 0.8968770801178021, + "grad_norm": 1.6467469166059345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184930 + }, + { + "epoch": 0.8969255783106382, + "grad_norm": 2.1663064231347562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184940 + }, + { + "epoch": 0.8969740765034743, + "grad_norm": 1.7253428907793023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184950 + }, + { + "epoch": 0.8970225746963104, + "grad_norm": 1.2743565314110583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184960 + }, + { + "epoch": 0.8970710728891464, + "grad_norm": 1.557659601303385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184970 + }, + { + "epoch": 0.8971195710819826, + "grad_norm": 1.670404436993067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184980 + }, + { + "epoch": 0.8971680692748186, + "grad_norm": 2.2014651435142696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 184990 + }, + { + "epoch": 0.8972165674676548, + "grad_norm": 1.7687973752344988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185000 + }, + { + "epoch": 0.8972650656604908, + "grad_norm": 1.668544946653583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185010 + }, + { + "epoch": 0.897313563853327, + "grad_norm": 1.1243598052601556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185020 + }, + { + "epoch": 0.897362062046163, + "grad_norm": 1.1667292021400044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185030 + }, + { + "epoch": 0.8974105602389991, + "grad_norm": 1.3824660527461674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185040 + }, + { + "epoch": 0.8974590584318352, + "grad_norm": 1.2402621152318716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185050 + }, + { + "epoch": 0.8975075566246713, + "grad_norm": 1.362525381409796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185060 + }, + { + "epoch": 0.8975560548175073, + "grad_norm": 1.931798365717441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185070 + }, + { + "epoch": 0.8976045530103435, + "grad_norm": 1.4654036206707133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185080 + }, + { + "epoch": 0.8976530512031795, + "grad_norm": 1.1966312385425226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185090 + }, + { + "epoch": 0.8977015493960157, + "grad_norm": 1.559185314192746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185100 + }, + { + "epoch": 0.8977500475888517, + "grad_norm": 1.3954621458367455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185110 + }, + { + "epoch": 0.8977985457816878, + "grad_norm": 1.7012053987741638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185120 + }, + { + "epoch": 0.8978470439745239, + "grad_norm": 1.2657795700476981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185130 + }, + { + "epoch": 0.89789554216736, + "grad_norm": 1.864653675909267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185140 + }, + { + "epoch": 0.897944040360196, + "grad_norm": 1.4875389808821637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185150 + }, + { + "epoch": 0.8979925385530322, + "grad_norm": 1.8864504625071277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185160 + }, + { + "epoch": 0.8980410367458682, + "grad_norm": 2.5441318385333034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185170 + }, + { + "epoch": 0.8980895349387044, + "grad_norm": 1.4502119505266364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185180 + }, + { + "epoch": 0.8981380331315404, + "grad_norm": 1.3185861291731271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185190 + }, + { + "epoch": 0.8981865313243765, + "grad_norm": 2.4157309042038833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185200 + }, + { + "epoch": 0.8982350295172126, + "grad_norm": 2.1346989953485718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185210 + }, + { + "epoch": 0.8982835277100487, + "grad_norm": 1.4927584501833735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185220 + }, + { + "epoch": 0.8983320259028847, + "grad_norm": 1.8044385541315933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185230 + }, + { + "epoch": 0.8983805240957209, + "grad_norm": 1.9240435022993552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185240 + }, + { + "epoch": 0.8984290222885569, + "grad_norm": 2.3452749076113832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185250 + }, + { + "epoch": 0.8984775204813931, + "grad_norm": 2.0157113311825015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185260 + }, + { + "epoch": 0.8985260186742291, + "grad_norm": 1.329944865346988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185270 + }, + { + "epoch": 0.8985745168670652, + "grad_norm": 2.0347174611856644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185280 + }, + { + "epoch": 0.8986230150599013, + "grad_norm": 2.1880927292272645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185290 + }, + { + "epoch": 0.8986715132527374, + "grad_norm": 1.6500372623795556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185300 + }, + { + "epoch": 0.8987200114455736, + "grad_norm": 1.840025376509402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185310 + }, + { + "epoch": 0.8987685096384096, + "grad_norm": 1.6180878859017866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185320 + }, + { + "epoch": 0.8988170078312457, + "grad_norm": 1.2817238825846289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185330 + }, + { + "epoch": 0.8988655060240818, + "grad_norm": 1.8787240207984723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185340 + }, + { + "epoch": 0.8989140042169179, + "grad_norm": 1.622707124226963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185350 + }, + { + "epoch": 0.898962502409754, + "grad_norm": 2.2483838790776645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185360 + }, + { + "epoch": 0.8990110006025901, + "grad_norm": 1.6818082926306488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185370 + }, + { + "epoch": 0.8990594987954261, + "grad_norm": 3.148637262029297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185380 + }, + { + "epoch": 0.8991079969882623, + "grad_norm": 1.4665650915901551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185390 + }, + { + "epoch": 0.8991564951810983, + "grad_norm": 1.613226174868032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185400 + }, + { + "epoch": 0.8992049933739344, + "grad_norm": 1.4879739218542909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185410 + }, + { + "epoch": 0.8992534915667705, + "grad_norm": 1.1790112885989856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185420 + }, + { + "epoch": 0.8993019897596066, + "grad_norm": 2.6882517545345763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185430 + }, + { + "epoch": 0.8993504879524427, + "grad_norm": 2.1191434385059438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185440 + }, + { + "epoch": 0.8993989861452788, + "grad_norm": 1.643061331435547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185450 + }, + { + "epoch": 0.8994474843381148, + "grad_norm": 1.0759671376092683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185460 + }, + { + "epoch": 0.899495982530951, + "grad_norm": 1.924117931650926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185470 + }, + { + "epoch": 0.899544480723787, + "grad_norm": 1.8458196748838418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185480 + }, + { + "epoch": 0.8995929789166232, + "grad_norm": 1.831832996401772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185490 + }, + { + "epoch": 0.8996414771094592, + "grad_norm": 1.5726087099210417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185500 + }, + { + "epoch": 0.8996899753022953, + "grad_norm": 1.7551979425434183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185510 + }, + { + "epoch": 0.8997384734951314, + "grad_norm": 1.4245824075942437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185520 + }, + { + "epoch": 0.8997869716879675, + "grad_norm": 2.7388571410824625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185530 + }, + { + "epoch": 0.8998354698808035, + "grad_norm": 1.340800359628247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185540 + }, + { + "epoch": 0.8998839680736397, + "grad_norm": 1.618341194387085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185550 + }, + { + "epoch": 0.8999324662664757, + "grad_norm": 1.2431623730435604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185560 + }, + { + "epoch": 0.8999809644593119, + "grad_norm": 1.659291548605779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185570 + }, + { + "epoch": 0.9000294626521479, + "grad_norm": 1.9861174038737772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185580 + }, + { + "epoch": 0.900077960844984, + "grad_norm": 1.580746555873702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185590 + }, + { + "epoch": 0.9001264590378201, + "grad_norm": 1.6131149749298856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185600 + }, + { + "epoch": 0.9001749572306562, + "grad_norm": 2.2169619029455134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185610 + }, + { + "epoch": 0.9002234554234922, + "grad_norm": 1.775997304775956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185620 + }, + { + "epoch": 0.9002719536163284, + "grad_norm": 1.2038525731838945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185630 + }, + { + "epoch": 0.9003204518091644, + "grad_norm": 2.000809118385405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185640 + }, + { + "epoch": 0.9003689500020006, + "grad_norm": 1.318841214015265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185650 + }, + { + "epoch": 0.9004174481948366, + "grad_norm": 1.4099923895116717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185660 + }, + { + "epoch": 0.9004659463876727, + "grad_norm": 1.783404712796255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185670 + }, + { + "epoch": 0.9005144445805088, + "grad_norm": 2.2912034935984593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185680 + }, + { + "epoch": 0.9005629427733449, + "grad_norm": 2.0450427129503623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185690 + }, + { + "epoch": 0.900611440966181, + "grad_norm": 1.664927751221512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185700 + }, + { + "epoch": 0.9006599391590171, + "grad_norm": 1.3559882106051191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185710 + }, + { + "epoch": 0.9007084373518531, + "grad_norm": 1.4031153128257756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185720 + }, + { + "epoch": 0.9007569355446893, + "grad_norm": 1.4371245526945131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185730 + }, + { + "epoch": 0.9008054337375253, + "grad_norm": 2.09477892809673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185740 + }, + { + "epoch": 0.9008539319303615, + "grad_norm": 1.623699041886084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185750 + }, + { + "epoch": 0.9009024301231975, + "grad_norm": 1.11683533532414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185760 + }, + { + "epoch": 0.9009509283160336, + "grad_norm": 1.3600446102657315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185770 + }, + { + "epoch": 0.9009994265088697, + "grad_norm": 2.0180142001891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185780 + }, + { + "epoch": 0.9010479247017058, + "grad_norm": 2.124826181670869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185790 + }, + { + "epoch": 0.9010964228945418, + "grad_norm": 1.4066902309650686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185800 + }, + { + "epoch": 0.901144921087378, + "grad_norm": 1.906797209016986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185810 + }, + { + "epoch": 0.9011934192802141, + "grad_norm": 1.5851641776976066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185820 + }, + { + "epoch": 0.9012419174730502, + "grad_norm": 1.952575345853802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185830 + }, + { + "epoch": 0.9012904156658863, + "grad_norm": 1.548181316479713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185840 + }, + { + "epoch": 0.9013389138587223, + "grad_norm": 1.4259447844722217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185850 + }, + { + "epoch": 0.9013874120515585, + "grad_norm": 1.2447584296637615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185860 + }, + { + "epoch": 0.9014359102443945, + "grad_norm": 1.4658521507726618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185870 + }, + { + "epoch": 0.9014844084372307, + "grad_norm": 9.861665262178576e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185880 + }, + { + "epoch": 0.9015329066300667, + "grad_norm": 1.6557192950017452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185890 + }, + { + "epoch": 0.9015814048229028, + "grad_norm": 1.364542256965251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185900 + }, + { + "epoch": 0.9016299030157389, + "grad_norm": 1.0591429955297826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185910 + }, + { + "epoch": 0.901678401208575, + "grad_norm": 2.5770974687588932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185920 + }, + { + "epoch": 0.901726899401411, + "grad_norm": 1.6150947246273972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185930 + }, + { + "epoch": 0.9017753975942472, + "grad_norm": 1.5001539566128486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185940 + }, + { + "epoch": 0.9018238957870832, + "grad_norm": 1.9973663611949632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185950 + }, + { + "epoch": 0.9018723939799194, + "grad_norm": 1.2361490497880823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185960 + }, + { + "epoch": 0.9019208921727554, + "grad_norm": 1.1460680404695722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185970 + }, + { + "epoch": 0.9019693903655915, + "grad_norm": 1.6910950861870333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185980 + }, + { + "epoch": 0.9020178885584276, + "grad_norm": 1.8445298621827533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 185990 + }, + { + "epoch": 0.9020663867512637, + "grad_norm": 1.5706907774415413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186000 + }, + { + "epoch": 0.9021148849440997, + "grad_norm": 1.863380205691101e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186010 + }, + { + "epoch": 0.9021633831369359, + "grad_norm": 1.1973813940358013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186020 + }, + { + "epoch": 0.9022118813297719, + "grad_norm": 1.813576488984836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186030 + }, + { + "epoch": 0.9022603795226081, + "grad_norm": 2.231295681554002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186040 + }, + { + "epoch": 0.9023088777154441, + "grad_norm": 1.3676303645127064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186050 + }, + { + "epoch": 0.9023573759082802, + "grad_norm": 1.1314733150413758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186060 + }, + { + "epoch": 0.9024058741011163, + "grad_norm": 1.4975528372929148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186070 + }, + { + "epoch": 0.9024543722939524, + "grad_norm": 1.2198119847539601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186080 + }, + { + "epoch": 0.9025028704867885, + "grad_norm": 1.3810023347105016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186090 + }, + { + "epoch": 0.9025513686796246, + "grad_norm": 1.9735143297339164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186100 + }, + { + "epoch": 0.9025998668724606, + "grad_norm": 1.1661245302718726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186110 + }, + { + "epoch": 0.9026483650652968, + "grad_norm": 1.9606865464538714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186120 + }, + { + "epoch": 0.9026968632581328, + "grad_norm": 1.6214420028859422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186130 + }, + { + "epoch": 0.902745361450969, + "grad_norm": 1.498278123790442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186140 + }, + { + "epoch": 0.902793859643805, + "grad_norm": 1.3504544149611775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186150 + }, + { + "epoch": 0.9028423578366411, + "grad_norm": 1.4971242023875675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186160 + }, + { + "epoch": 0.9028908560294772, + "grad_norm": 1.1519619924627023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186170 + }, + { + "epoch": 0.9029393542223133, + "grad_norm": 1.8003762036755688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186180 + }, + { + "epoch": 0.9029878524151493, + "grad_norm": 1.9374827076035217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186190 + }, + { + "epoch": 0.9030363506079855, + "grad_norm": 1.806533234116614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186200 + }, + { + "epoch": 0.9030848488008215, + "grad_norm": 1.1323680659813817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186210 + }, + { + "epoch": 0.9031333469936577, + "grad_norm": 2.7080979236870917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186220 + }, + { + "epoch": 0.9031818451864937, + "grad_norm": 1.9877552048797043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186230 + }, + { + "epoch": 0.9032303433793298, + "grad_norm": 1.8231013143577002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186240 + }, + { + "epoch": 0.9032788415721659, + "grad_norm": 1.676210104051279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186250 + }, + { + "epoch": 0.903327339765002, + "grad_norm": 1.5226847338567495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186260 + }, + { + "epoch": 0.903375837957838, + "grad_norm": 1.3130543763395508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186270 + }, + { + "epoch": 0.9034243361506742, + "grad_norm": 1.326231124920696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186280 + }, + { + "epoch": 0.9034728343435102, + "grad_norm": 1.8327435569176487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186290 + }, + { + "epoch": 0.9035213325363464, + "grad_norm": 2.2966977653027243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186300 + }, + { + "epoch": 0.9035698307291824, + "grad_norm": 1.6204806385644588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186310 + }, + { + "epoch": 0.9036183289220185, + "grad_norm": 1.914033198602283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186320 + }, + { + "epoch": 0.9036668271148547, + "grad_norm": 1.4348386478957309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186330 + }, + { + "epoch": 0.9037153253076907, + "grad_norm": 2.4096117101635173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186340 + }, + { + "epoch": 0.9037638235005269, + "grad_norm": 1.651618219966622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186350 + }, + { + "epoch": 0.9038123216933629, + "grad_norm": 2.1200452948733073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186360 + }, + { + "epoch": 0.903860819886199, + "grad_norm": 1.7626643256107855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186370 + }, + { + "epoch": 0.9039093180790351, + "grad_norm": 1.2526506942833748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186380 + }, + { + "epoch": 0.9039578162718712, + "grad_norm": 1.629030776939544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186390 + }, + { + "epoch": 0.9040063144647073, + "grad_norm": 1.802197502343006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186400 + }, + { + "epoch": 0.9040548126575434, + "grad_norm": 1.227202250930759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186410 + }, + { + "epoch": 0.9041033108503794, + "grad_norm": 1.54118264816816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186420 + }, + { + "epoch": 0.9041518090432156, + "grad_norm": 1.699501517293811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186430 + }, + { + "epoch": 0.9042003072360516, + "grad_norm": 1.4033575190808278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186440 + }, + { + "epoch": 0.9042488054288877, + "grad_norm": 1.663748960822886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186450 + }, + { + "epoch": 0.9042973036217238, + "grad_norm": 1.4922392210792168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186460 + }, + { + "epoch": 0.9043458018145599, + "grad_norm": 2.2473285454793768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186470 + }, + { + "epoch": 0.904394300007396, + "grad_norm": 1.3216745919919504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186480 + }, + { + "epoch": 0.9044427982002321, + "grad_norm": 2.0303106751384803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186490 + }, + { + "epoch": 0.9044912963930681, + "grad_norm": 1.555710937850563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186500 + }, + { + "epoch": 0.9045397945859043, + "grad_norm": 1.3041331570207149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186510 + }, + { + "epoch": 0.9045882927787403, + "grad_norm": 1.7565335852509634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186520 + }, + { + "epoch": 0.9046367909715765, + "grad_norm": 2.2652249853649664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186530 + }, + { + "epoch": 0.9046852891644125, + "grad_norm": 1.864440513088539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186540 + }, + { + "epoch": 0.9047337873572486, + "grad_norm": 1.6148559822681818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186550 + }, + { + "epoch": 0.9047822855500847, + "grad_norm": 3.5778050744283973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186560 + }, + { + "epoch": 0.9048307837429208, + "grad_norm": 1.2943393024045236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186570 + }, + { + "epoch": 0.9048792819357568, + "grad_norm": 1.3580681468283728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186580 + }, + { + "epoch": 0.904927780128593, + "grad_norm": 1.773748969924327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186590 + }, + { + "epoch": 0.904976278321429, + "grad_norm": 1.3300747170319482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186600 + }, + { + "epoch": 0.9050247765142652, + "grad_norm": 1.4545452842185114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186610 + }, + { + "epoch": 0.9050732747071012, + "grad_norm": 1.0943963069109941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186620 + }, + { + "epoch": 0.9051217728999373, + "grad_norm": 2.0298569936016975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186630 + }, + { + "epoch": 0.9051702710927734, + "grad_norm": 2.0081321494558324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186640 + }, + { + "epoch": 0.9052187692856095, + "grad_norm": 1.2857726439108319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186650 + }, + { + "epoch": 0.9052672674784455, + "grad_norm": 2.03807744014739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186660 + }, + { + "epoch": 0.9053157656712817, + "grad_norm": 1.4102072398713972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186670 + }, + { + "epoch": 0.9053642638641177, + "grad_norm": 1.2386620618087818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186680 + }, + { + "epoch": 0.9054127620569539, + "grad_norm": 1.5113268858613083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186690 + }, + { + "epoch": 0.9054612602497899, + "grad_norm": 1.2925145398412496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186700 + }, + { + "epoch": 0.905509758442626, + "grad_norm": 1.6101772359888855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186710 + }, + { + "epoch": 0.9055582566354621, + "grad_norm": 2.0656377941463688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186720 + }, + { + "epoch": 0.9056067548282982, + "grad_norm": 1.3905465223729152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186730 + }, + { + "epoch": 0.9056552530211343, + "grad_norm": 1.785869052639555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186740 + }, + { + "epoch": 0.9057037512139704, + "grad_norm": 1.6406396241563925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186750 + }, + { + "epoch": 0.9057522494068064, + "grad_norm": 1.8210027263876327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186760 + }, + { + "epoch": 0.9058007475996426, + "grad_norm": 1.542596095305271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186770 + }, + { + "epoch": 0.9058492457924786, + "grad_norm": 1.048615505538919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186780 + }, + { + "epoch": 0.9058977439853148, + "grad_norm": 1.4902497014190885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186790 + }, + { + "epoch": 0.9059462421781508, + "grad_norm": 1.4366206890770172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186800 + }, + { + "epoch": 0.9059947403709869, + "grad_norm": 1.242482383645438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186810 + }, + { + "epoch": 0.906043238563823, + "grad_norm": 1.6280164771842465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186820 + }, + { + "epoch": 0.9060917367566591, + "grad_norm": 1.5996459268308172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186830 + }, + { + "epoch": 0.9061402349494952, + "grad_norm": 2.2067617067023093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186840 + }, + { + "epoch": 0.9061887331423313, + "grad_norm": 1.6111522782580323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186850 + }, + { + "epoch": 0.9062372313351674, + "grad_norm": 2.1237060110479433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186860 + }, + { + "epoch": 0.9062857295280035, + "grad_norm": 1.7276134300914237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186870 + }, + { + "epoch": 0.9063342277208396, + "grad_norm": 2.0859946658902118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186880 + }, + { + "epoch": 0.9063827259136756, + "grad_norm": 1.8723577355217458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186890 + }, + { + "epoch": 0.9064312241065118, + "grad_norm": 1.5490210003576976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186900 + }, + { + "epoch": 0.9064797222993478, + "grad_norm": 2.1931080951276272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186910 + }, + { + "epoch": 0.906528220492184, + "grad_norm": 1.4784093949060662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186920 + }, + { + "epoch": 0.90657671868502, + "grad_norm": 1.68566671732151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186930 + }, + { + "epoch": 0.9066252168778561, + "grad_norm": 1.8137459534273148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186940 + }, + { + "epoch": 0.9066737150706922, + "grad_norm": 1.3980452351347594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186950 + }, + { + "epoch": 0.9067222132635283, + "grad_norm": 2.0612500151173663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186960 + }, + { + "epoch": 0.9067707114563643, + "grad_norm": 1.6749142517369364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186970 + }, + { + "epoch": 0.9068192096492005, + "grad_norm": 1.4195273401185204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186980 + }, + { + "epoch": 0.9068677078420365, + "grad_norm": 2.8376463845347644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 186990 + }, + { + "epoch": 0.9069162060348727, + "grad_norm": 1.7243559469193315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187000 + }, + { + "epoch": 0.9069647042277087, + "grad_norm": 2.1245369907774148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187010 + }, + { + "epoch": 0.9070132024205448, + "grad_norm": 1.674788308037023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187020 + }, + { + "epoch": 0.9070617006133809, + "grad_norm": 1.789031855992107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187030 + }, + { + "epoch": 0.907110198806217, + "grad_norm": 2.1334562561037274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187040 + }, + { + "epoch": 0.907158696999053, + "grad_norm": 1.528870363642909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187050 + }, + { + "epoch": 0.9072071951918892, + "grad_norm": 1.2933676352133716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187060 + }, + { + "epoch": 0.9072556933847252, + "grad_norm": 1.4013394888934272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187070 + }, + { + "epoch": 0.9073041915775614, + "grad_norm": 1.478990352410392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187080 + }, + { + "epoch": 0.9073526897703974, + "grad_norm": 1.196401822056714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187090 + }, + { + "epoch": 0.9074011879632335, + "grad_norm": 1.8663428136278526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187100 + }, + { + "epoch": 0.9074496861560696, + "grad_norm": 1.7275283425988164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187110 + }, + { + "epoch": 0.9074981843489057, + "grad_norm": 2.5977932693876937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187120 + }, + { + "epoch": 0.9075466825417418, + "grad_norm": 1.2710852814734608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187130 + }, + { + "epoch": 0.9075951807345779, + "grad_norm": 1.3957388134144821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187140 + }, + { + "epoch": 0.9076436789274139, + "grad_norm": 2.4675109955296648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187150 + }, + { + "epoch": 0.9076921771202501, + "grad_norm": 1.510039382424111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187160 + }, + { + "epoch": 0.9077406753130861, + "grad_norm": 1.1676864808407572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187170 + }, + { + "epoch": 0.9077891735059223, + "grad_norm": 1.3085734273943217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187180 + }, + { + "epoch": 0.9078376716987583, + "grad_norm": 1.7588321910011473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187190 + }, + { + "epoch": 0.9078861698915944, + "grad_norm": 1.7209682567909113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187200 + }, + { + "epoch": 0.9079346680844305, + "grad_norm": 1.2248684733151549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187210 + }, + { + "epoch": 0.9079831662772666, + "grad_norm": 1.9533317185960186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187220 + }, + { + "epoch": 0.9080316644701026, + "grad_norm": 1.5184141943791474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187230 + }, + { + "epoch": 0.9080801626629388, + "grad_norm": 1.4480770360592032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187240 + }, + { + "epoch": 0.9081286608557748, + "grad_norm": 2.274529720125429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187250 + }, + { + "epoch": 0.908177159048611, + "grad_norm": 1.940884786222341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187260 + }, + { + "epoch": 0.908225657241447, + "grad_norm": 2.2124902798736912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187270 + }, + { + "epoch": 0.9082741554342831, + "grad_norm": 1.9447339738576375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187280 + }, + { + "epoch": 0.9083226536271192, + "grad_norm": 1.5110883211377768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187290 + }, + { + "epoch": 0.9083711518199553, + "grad_norm": 2.1242717807012923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187300 + }, + { + "epoch": 0.9084196500127913, + "grad_norm": 1.6340761632704925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187310 + }, + { + "epoch": 0.9084681482056275, + "grad_norm": 1.6631133803457487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187320 + }, + { + "epoch": 0.9085166463984635, + "grad_norm": 1.115321968114813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187330 + }, + { + "epoch": 0.9085651445912997, + "grad_norm": 2.3526485648517337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187340 + }, + { + "epoch": 0.9086136427841358, + "grad_norm": 2.0539719258749756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187350 + }, + { + "epoch": 0.9086621409769718, + "grad_norm": 1.1399129640210504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187360 + }, + { + "epoch": 0.908710639169808, + "grad_norm": 1.031647212101916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187370 + }, + { + "epoch": 0.908759137362644, + "grad_norm": 1.796368742645882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187380 + }, + { + "epoch": 0.9088076355554802, + "grad_norm": 1.2710324348574886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187390 + }, + { + "epoch": 0.9088561337483162, + "grad_norm": 1.890998646558728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187400 + }, + { + "epoch": 0.9089046319411523, + "grad_norm": 1.2678037286661947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187410 + }, + { + "epoch": 0.9089531301339884, + "grad_norm": 1.7599690593783635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187420 + }, + { + "epoch": 0.9090016283268245, + "grad_norm": 2.6788235629737756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187430 + }, + { + "epoch": 0.9090501265196606, + "grad_norm": 1.5939821906840734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187440 + }, + { + "epoch": 0.9090986247124967, + "grad_norm": 1.478024902468178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187450 + }, + { + "epoch": 0.9091471229053327, + "grad_norm": 1.8541165047736285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187460 + }, + { + "epoch": 0.9091956210981689, + "grad_norm": 2.059443993118748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187470 + }, + { + "epoch": 0.9092441192910049, + "grad_norm": 1.7347717928828388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187480 + }, + { + "epoch": 0.909292617483841, + "grad_norm": 1.621279821506505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187490 + }, + { + "epoch": 0.9093411156766771, + "grad_norm": 2.5888340360324946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187500 + }, + { + "epoch": 0.9093896138695132, + "grad_norm": 1.4315378216167574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187510 + }, + { + "epoch": 0.9094381120623493, + "grad_norm": 1.8402049661858655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187520 + }, + { + "epoch": 0.9094866102551854, + "grad_norm": 1.606949950883063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187530 + }, + { + "epoch": 0.9095351084480214, + "grad_norm": 1.5393862184964746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187540 + }, + { + "epoch": 0.9095836066408576, + "grad_norm": 1.6557509141534865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187550 + }, + { + "epoch": 0.9096321048336936, + "grad_norm": 1.6171345151860805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187560 + }, + { + "epoch": 0.9096806030265298, + "grad_norm": 1.790426651382404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187570 + }, + { + "epoch": 0.9097291012193658, + "grad_norm": 1.80257053727928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187580 + }, + { + "epoch": 0.9097775994122019, + "grad_norm": 1.4485786792306499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187590 + }, + { + "epoch": 0.909826097605038, + "grad_norm": 1.512651159885081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187600 + }, + { + "epoch": 0.9098745957978741, + "grad_norm": 1.2665196891248343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187610 + }, + { + "epoch": 0.9099230939907101, + "grad_norm": 1.6832302662805887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187620 + }, + { + "epoch": 0.9099715921835463, + "grad_norm": 2.691706058044474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187630 + }, + { + "epoch": 0.9100200903763823, + "grad_norm": 2.8397437290550442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187640 + }, + { + "epoch": 0.9100685885692185, + "grad_norm": 2.169890933600982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187650 + }, + { + "epoch": 0.9101170867620545, + "grad_norm": 1.5142752829433448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187660 + }, + { + "epoch": 0.9101655849548906, + "grad_norm": 1.695760865061402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187670 + }, + { + "epoch": 0.9102140831477267, + "grad_norm": 1.6124912960435722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187680 + }, + { + "epoch": 0.9102625813405628, + "grad_norm": 2.2815171973888937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187690 + }, + { + "epoch": 0.9103110795333988, + "grad_norm": 1.4926834879247508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187700 + }, + { + "epoch": 0.910359577726235, + "grad_norm": 1.6381397571763046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187710 + }, + { + "epoch": 0.910408075919071, + "grad_norm": 1.5837882116898072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187720 + }, + { + "epoch": 0.9104565741119072, + "grad_norm": 1.4362795397460104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187730 + }, + { + "epoch": 0.9105050723047432, + "grad_norm": 1.7747634473153084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187740 + }, + { + "epoch": 0.9105535704975793, + "grad_norm": 1.341202970905897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187750 + }, + { + "epoch": 0.9106020686904154, + "grad_norm": 1.386282200144251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187760 + }, + { + "epoch": 0.9106505668832515, + "grad_norm": 3.108269908125294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187770 + }, + { + "epoch": 0.9106990650760876, + "grad_norm": 1.5032963318617476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187780 + }, + { + "epoch": 0.9107475632689237, + "grad_norm": 1.2021502904246972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187790 + }, + { + "epoch": 0.9107960614617597, + "grad_norm": 2.0448620574597953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187800 + }, + { + "epoch": 0.9108445596545959, + "grad_norm": 1.4575066487054755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187810 + }, + { + "epoch": 0.9108930578474319, + "grad_norm": 2.736047655105267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187820 + }, + { + "epoch": 0.910941556040268, + "grad_norm": 2.595674963856709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187830 + }, + { + "epoch": 0.9109900542331041, + "grad_norm": 1.4615502585968443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187840 + }, + { + "epoch": 0.9110385524259402, + "grad_norm": 1.4232733214214477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187850 + }, + { + "epoch": 0.9110870506187764, + "grad_norm": 2.8314914857219264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187860 + }, + { + "epoch": 0.9111355488116124, + "grad_norm": 9.687432189764422e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187870 + }, + { + "epoch": 0.9111840470044485, + "grad_norm": 1.743748434535064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187880 + }, + { + "epoch": 0.9112325451972846, + "grad_norm": 1.7746675240459808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187890 + }, + { + "epoch": 0.9112810433901207, + "grad_norm": 1.0726135535321646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187900 + }, + { + "epoch": 0.9113295415829568, + "grad_norm": 1.0399527461402158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187910 + }, + { + "epoch": 0.9113780397757929, + "grad_norm": 1.0730016875015735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187920 + }, + { + "epoch": 0.9114265379686289, + "grad_norm": 1.0502131608802756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187930 + }, + { + "epoch": 0.9114750361614651, + "grad_norm": 1.885901745879437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187940 + }, + { + "epoch": 0.9115235343543011, + "grad_norm": 1.4747353560551346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187950 + }, + { + "epoch": 0.9115720325471373, + "grad_norm": 3.0693724966113223e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 187960 + }, + { + "epoch": 0.9116205307399733, + "grad_norm": 0.02091110125184059, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 187970 + }, + { + "epoch": 0.9116690289328094, + "grad_norm": 1.4612901395594236e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 187980 + }, + { + "epoch": 0.9117175271256455, + "grad_norm": 0.003951618913561106, + "learning_rate": 0.0002, + "loss": 0.0045, + "step": 187990 + }, + { + "epoch": 0.9117660253184816, + "grad_norm": 0.07177754491567612, + "learning_rate": 0.0002, + "loss": 0.004, + "step": 188000 + }, + { + "epoch": 0.9118145235113176, + "grad_norm": 3.780804399866611e-05, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 188010 + }, + { + "epoch": 0.9118630217041538, + "grad_norm": 0.001478625345043838, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188020 + }, + { + "epoch": 0.9119115198969898, + "grad_norm": 4.769066799781285e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188030 + }, + { + "epoch": 0.911960018089826, + "grad_norm": 3.50093214365188e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188040 + }, + { + "epoch": 0.912008516282662, + "grad_norm": 1.7510817997390404e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188050 + }, + { + "epoch": 0.9120570144754981, + "grad_norm": 1.8995822756551206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188060 + }, + { + "epoch": 0.9121055126683342, + "grad_norm": 1.663451075728517e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188070 + }, + { + "epoch": 0.9121540108611703, + "grad_norm": 1.5211922800517641e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188080 + }, + { + "epoch": 0.9122025090540063, + "grad_norm": 1.3754138308286201e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188090 + }, + { + "epoch": 0.9122510072468425, + "grad_norm": 1.170504492620239e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188100 + }, + { + "epoch": 0.9122995054396785, + "grad_norm": 2.3321514163399115e-05, + "learning_rate": 0.0002, + "loss": 0.0051, + "step": 188110 + }, + { + "epoch": 0.9123480036325147, + "grad_norm": 0.001803169259801507, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188120 + }, + { + "epoch": 0.9123965018253507, + "grad_norm": 0.0005978455883450806, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 188130 + }, + { + "epoch": 0.9124450000181868, + "grad_norm": 0.00017074451898224652, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188140 + }, + { + "epoch": 0.9124934982110229, + "grad_norm": 5.715606675948948e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188150 + }, + { + "epoch": 0.912541996403859, + "grad_norm": 2.8403808755683713e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188160 + }, + { + "epoch": 0.9125904945966951, + "grad_norm": 2.380219302722253e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188170 + }, + { + "epoch": 0.9126389927895312, + "grad_norm": 2.210991078754887e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188180 + }, + { + "epoch": 0.9126874909823672, + "grad_norm": 3.072721665375866e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188190 + }, + { + "epoch": 0.9127359891752034, + "grad_norm": 2.978263728437014e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188200 + }, + { + "epoch": 0.9127844873680394, + "grad_norm": 1.8439772247802466e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188210 + }, + { + "epoch": 0.9128329855608756, + "grad_norm": 1.7513717466499656e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188220 + }, + { + "epoch": 0.9128814837537116, + "grad_norm": 1.62164906214457e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188230 + }, + { + "epoch": 0.9129299819465477, + "grad_norm": 2.3496624635299668e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188240 + }, + { + "epoch": 0.9129784801393838, + "grad_norm": 2.19764970097458e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188250 + }, + { + "epoch": 0.9130269783322199, + "grad_norm": 1.3725282769883052e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188260 + }, + { + "epoch": 0.9130754765250559, + "grad_norm": 1.3780572771793231e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188270 + }, + { + "epoch": 0.9131239747178921, + "grad_norm": 1.3501896319212392e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188280 + }, + { + "epoch": 0.9131724729107281, + "grad_norm": 1.906536090245936e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188290 + }, + { + "epoch": 0.9132209711035643, + "grad_norm": 1.831992449297104e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188300 + }, + { + "epoch": 0.9132694692964003, + "grad_norm": 1.1098329196101986e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188310 + }, + { + "epoch": 0.9133179674892364, + "grad_norm": 1.136735318141291e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188320 + }, + { + "epoch": 0.9133664656820725, + "grad_norm": 1.0727945664257277e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188330 + }, + { + "epoch": 0.9134149638749086, + "grad_norm": 1.661172427702695e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188340 + }, + { + "epoch": 0.9134634620677446, + "grad_norm": 1.4738956451765262e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188350 + }, + { + "epoch": 0.9135119602605808, + "grad_norm": 9.292224603996146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188360 + }, + { + "epoch": 0.9135604584534169, + "grad_norm": 9.16987846721895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188370 + }, + { + "epoch": 0.913608956646253, + "grad_norm": 9.045015758601949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188380 + }, + { + "epoch": 0.9136574548390891, + "grad_norm": 1.2928769137943164e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188390 + }, + { + "epoch": 0.9137059530319251, + "grad_norm": 1.2888458513771184e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188400 + }, + { + "epoch": 0.9137544512247613, + "grad_norm": 8.342633009306155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188410 + }, + { + "epoch": 0.9138029494175973, + "grad_norm": 7.70620954426704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188420 + }, + { + "epoch": 0.9138514476104335, + "grad_norm": 8.017422260309104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188430 + }, + { + "epoch": 0.9138999458032695, + "grad_norm": 1.1122309842903633e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188440 + }, + { + "epoch": 0.9139484439961056, + "grad_norm": 1.1453524166427087e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188450 + }, + { + "epoch": 0.9139969421889417, + "grad_norm": 7.274077688634861e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188460 + }, + { + "epoch": 0.9140454403817778, + "grad_norm": 6.6370666900184005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188470 + }, + { + "epoch": 0.9140939385746139, + "grad_norm": 6.581033630936872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188480 + }, + { + "epoch": 0.91414243676745, + "grad_norm": 9.302671969635412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188490 + }, + { + "epoch": 0.914190934960286, + "grad_norm": 9.684828910394572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188500 + }, + { + "epoch": 0.9142394331531222, + "grad_norm": 6.301896064542234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188510 + }, + { + "epoch": 0.9142879313459582, + "grad_norm": 6.0329248299240135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188520 + }, + { + "epoch": 0.9143364295387943, + "grad_norm": 5.898425115447026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188530 + }, + { + "epoch": 0.9143849277316304, + "grad_norm": 9.190610398945864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188540 + }, + { + "epoch": 0.9144334259244665, + "grad_norm": 8.461295692541171e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188550 + }, + { + "epoch": 0.9144819241173026, + "grad_norm": 5.374796728574438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188560 + }, + { + "epoch": 0.9145304223101387, + "grad_norm": 3.896722409990616e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188570 + }, + { + "epoch": 0.9145789205029747, + "grad_norm": 6.518047030112939e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188580 + }, + { + "epoch": 0.9146274186958109, + "grad_norm": 7.676099812670145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188590 + }, + { + "epoch": 0.9146759168886469, + "grad_norm": 7.498926606785972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188600 + }, + { + "epoch": 0.914724415081483, + "grad_norm": 5.85981433687266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188610 + }, + { + "epoch": 0.9147729132743191, + "grad_norm": 5.298103587847436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188620 + }, + { + "epoch": 0.9148214114671552, + "grad_norm": 5.390556452766759e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188630 + }, + { + "epoch": 0.9148699096599913, + "grad_norm": 7.327825187530834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188640 + }, + { + "epoch": 0.9149184078528274, + "grad_norm": 6.872394351375988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188650 + }, + { + "epoch": 0.9149669060456634, + "grad_norm": 4.893277491646586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188660 + }, + { + "epoch": 0.9150154042384996, + "grad_norm": 4.8685988076613285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188670 + }, + { + "epoch": 0.9150639024313356, + "grad_norm": 4.973691829945892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188680 + }, + { + "epoch": 0.9151124006241718, + "grad_norm": 6.05668492426048e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188690 + }, + { + "epoch": 0.9151608988170078, + "grad_norm": 6.170918823045213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188700 + }, + { + "epoch": 0.9152093970098439, + "grad_norm": 4.740504209621577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188710 + }, + { + "epoch": 0.91525789520268, + "grad_norm": 4.451395398064051e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188720 + }, + { + "epoch": 0.9153063933955161, + "grad_norm": 4.291256573196733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188730 + }, + { + "epoch": 0.9153548915883521, + "grad_norm": 6.062409283913439e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188740 + }, + { + "epoch": 0.9154033897811883, + "grad_norm": 5.896487891732249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188750 + }, + { + "epoch": 0.9154518879740243, + "grad_norm": 4.162618097325321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188760 + }, + { + "epoch": 0.9155003861668605, + "grad_norm": 3.994803591922391e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188770 + }, + { + "epoch": 0.9155488843596965, + "grad_norm": 3.877467406709911e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188780 + }, + { + "epoch": 0.9155973825525326, + "grad_norm": 5.657163910655072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188790 + }, + { + "epoch": 0.9156458807453687, + "grad_norm": 5.590739419858437e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188800 + }, + { + "epoch": 0.9156943789382048, + "grad_norm": 3.8233706618484575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188810 + }, + { + "epoch": 0.9157428771310409, + "grad_norm": 3.561152880138252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188820 + }, + { + "epoch": 0.915791375323877, + "grad_norm": 3.7984409573255107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188830 + }, + { + "epoch": 0.915839873516713, + "grad_norm": 4.534765594144119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188840 + }, + { + "epoch": 0.9158883717095492, + "grad_norm": 5.13740133101237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188850 + }, + { + "epoch": 0.9159368699023852, + "grad_norm": 3.6026165162184043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188860 + }, + { + "epoch": 0.9159853680952214, + "grad_norm": 3.491305278657819e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188870 + }, + { + "epoch": 0.9160338662880575, + "grad_norm": 4.712277586804703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188880 + }, + { + "epoch": 0.9160823644808935, + "grad_norm": 4.6504933379765134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188890 + }, + { + "epoch": 0.9161308626737297, + "grad_norm": 4.606802121998044e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 188900 + }, + { + "epoch": 0.9161793608665657, + "grad_norm": 2.2165782866068184e-05, + "learning_rate": 0.0002, + "loss": 0.0029, + "step": 188910 + }, + { + "epoch": 0.9162278590594019, + "grad_norm": 0.0024238135665655136, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 188920 + }, + { + "epoch": 0.9162763572522379, + "grad_norm": 0.0036124438047409058, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 188930 + }, + { + "epoch": 0.916324855445074, + "grad_norm": 0.00011354849993949756, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 188940 + }, + { + "epoch": 0.9163733536379101, + "grad_norm": 0.0003884669567923993, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188950 + }, + { + "epoch": 0.9164218518307462, + "grad_norm": 0.027946939691901207, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 188960 + }, + { + "epoch": 0.9164703500235822, + "grad_norm": 0.00040703071863390505, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188970 + }, + { + "epoch": 0.9165188482164184, + "grad_norm": 7.063292287057266e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188980 + }, + { + "epoch": 0.9165673464092544, + "grad_norm": 2.9939421438029967e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 188990 + }, + { + "epoch": 0.9166158446020906, + "grad_norm": 2.680141005839687e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189000 + }, + { + "epoch": 0.9166643427949266, + "grad_norm": 3.55743286490906e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189010 + }, + { + "epoch": 0.9167128409877627, + "grad_norm": 0.00018112355610355735, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189020 + }, + { + "epoch": 0.9167613391805988, + "grad_norm": 2.8188414944452234e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189030 + }, + { + "epoch": 0.9168098373734349, + "grad_norm": 1.9298926417832263e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189040 + }, + { + "epoch": 0.9168583355662709, + "grad_norm": 1.759588485583663e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189050 + }, + { + "epoch": 0.9169068337591071, + "grad_norm": 2.458030212437734e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189060 + }, + { + "epoch": 0.9169553319519431, + "grad_norm": 2.4381219191127457e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189070 + }, + { + "epoch": 0.9170038301447793, + "grad_norm": 2.1708356143790297e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189080 + }, + { + "epoch": 0.9170523283376153, + "grad_norm": 1.4344406736199744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189090 + }, + { + "epoch": 0.9171008265304514, + "grad_norm": 0.0302122849971056, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189100 + }, + { + "epoch": 0.9171493247232875, + "grad_norm": 1.4897168512106873e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189110 + }, + { + "epoch": 0.9171978229161236, + "grad_norm": 7.094941975083202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189120 + }, + { + "epoch": 0.9172463211089597, + "grad_norm": 1.3122753443894908e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189130 + }, + { + "epoch": 0.9172948193017958, + "grad_norm": 0.00014079058018978685, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189140 + }, + { + "epoch": 0.9173433174946318, + "grad_norm": 1.2069700460415334e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189150 + }, + { + "epoch": 0.917391815687468, + "grad_norm": 1.251743105967762e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189160 + }, + { + "epoch": 0.917440313880304, + "grad_norm": 1.1723726856871508e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189170 + }, + { + "epoch": 0.9174888120731401, + "grad_norm": 1.1169496247021016e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189180 + }, + { + "epoch": 0.9175373102659762, + "grad_norm": 9.193103323923424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189190 + }, + { + "epoch": 0.9175858084588123, + "grad_norm": 9.455712643102743e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189200 + }, + { + "epoch": 0.9176343066516484, + "grad_norm": 1.0424813808640465e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189210 + }, + { + "epoch": 0.9176828048444845, + "grad_norm": 1.034773413266521e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189220 + }, + { + "epoch": 0.9177313030373205, + "grad_norm": 1.0271136488881893e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189230 + }, + { + "epoch": 0.9177798012301567, + "grad_norm": 7.857019227230921e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189240 + }, + { + "epoch": 0.9178282994229927, + "grad_norm": 7.578551503684139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189250 + }, + { + "epoch": 0.9178767976158289, + "grad_norm": 9.145532203547191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189260 + }, + { + "epoch": 0.9179252958086649, + "grad_norm": 8.719285688130185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189270 + }, + { + "epoch": 0.917973794001501, + "grad_norm": 8.599321517976932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189280 + }, + { + "epoch": 0.9180222921943371, + "grad_norm": 7.08566585672088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189290 + }, + { + "epoch": 0.9180707903871732, + "grad_norm": 6.618391125812195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189300 + }, + { + "epoch": 0.9181192885800092, + "grad_norm": 8.130135029205121e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189310 + }, + { + "epoch": 0.9181677867728454, + "grad_norm": 5.862783291377127e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189320 + }, + { + "epoch": 0.9182162849656814, + "grad_norm": 7.5109992394573055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189330 + }, + { + "epoch": 0.9182647831585176, + "grad_norm": 6.269828645599773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189340 + }, + { + "epoch": 0.9183132813513536, + "grad_norm": 8.08460117696086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189350 + }, + { + "epoch": 0.9183617795441897, + "grad_norm": 7.137588454497745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189360 + }, + { + "epoch": 0.9184102777370258, + "grad_norm": 7.485543846996734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189370 + }, + { + "epoch": 0.9184587759298619, + "grad_norm": 6.7569339989859145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189380 + }, + { + "epoch": 0.918507274122698, + "grad_norm": 5.358209364203503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189390 + }, + { + "epoch": 0.9185557723155341, + "grad_norm": 5.580608558375388e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189400 + }, + { + "epoch": 0.9186042705083702, + "grad_norm": 6.5339017965015955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189410 + }, + { + "epoch": 0.9186527687012063, + "grad_norm": 6.282333288254449e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189420 + }, + { + "epoch": 0.9187012668940424, + "grad_norm": 6.368509730236838e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189430 + }, + { + "epoch": 0.9187497650868784, + "grad_norm": 4.962876573699759e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189440 + }, + { + "epoch": 0.9187982632797146, + "grad_norm": 5.021759079681942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189450 + }, + { + "epoch": 0.9188467614725506, + "grad_norm": 2.8099084374844097e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 189460 + }, + { + "epoch": 0.9188952596653868, + "grad_norm": 7.761011511320248e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189470 + }, + { + "epoch": 0.9189437578582228, + "grad_norm": 8.033836638787761e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189480 + }, + { + "epoch": 0.9189922560510589, + "grad_norm": 7.011050183791667e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189490 + }, + { + "epoch": 0.919040754243895, + "grad_norm": 4.4770709791919217e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189500 + }, + { + "epoch": 0.9190892524367311, + "grad_norm": 3.0729399441042915e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189510 + }, + { + "epoch": 0.9191377506295672, + "grad_norm": 2.4887614927138202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189520 + }, + { + "epoch": 0.9191862488224033, + "grad_norm": 1.982011235668324e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189530 + }, + { + "epoch": 0.9192347470152393, + "grad_norm": 1.7718513845466077e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189540 + }, + { + "epoch": 0.9192832452080755, + "grad_norm": 1.6109723219415173e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189550 + }, + { + "epoch": 0.9193317434009115, + "grad_norm": 2.3098886231309734e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189560 + }, + { + "epoch": 0.9193802415937476, + "grad_norm": 1.3671635315404274e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189570 + }, + { + "epoch": 0.9194287397865837, + "grad_norm": 1.2025173418805934e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189580 + }, + { + "epoch": 0.9194772379794198, + "grad_norm": 0.037977177649736404, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189590 + }, + { + "epoch": 0.9195257361722559, + "grad_norm": 7.787489266775083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189600 + }, + { + "epoch": 0.919574234365092, + "grad_norm": 8.88538397703087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189610 + }, + { + "epoch": 0.919622732557928, + "grad_norm": 7.116000688256463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189620 + }, + { + "epoch": 0.9196712307507642, + "grad_norm": 7.125309366529109e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189630 + }, + { + "epoch": 0.9197197289436002, + "grad_norm": 6.5982430896838196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189640 + }, + { + "epoch": 0.9197682271364364, + "grad_norm": 6.3011602833285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189650 + }, + { + "epoch": 0.9198167253292724, + "grad_norm": 6.612248853343772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189660 + }, + { + "epoch": 0.9198652235221085, + "grad_norm": 6.305681836238364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189670 + }, + { + "epoch": 0.9199137217149446, + "grad_norm": 6.047694569133455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189680 + }, + { + "epoch": 0.9199622199077807, + "grad_norm": 5.986035375826759e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189690 + }, + { + "epoch": 0.9200107181006167, + "grad_norm": 5.432301804830786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189700 + }, + { + "epoch": 0.9200592162934529, + "grad_norm": 5.500286533788312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189710 + }, + { + "epoch": 0.9201077144862889, + "grad_norm": 5.396428150561405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189720 + }, + { + "epoch": 0.9201562126791251, + "grad_norm": 5.369385235098889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189730 + }, + { + "epoch": 0.9202047108719611, + "grad_norm": 5.003336809750181e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189740 + }, + { + "epoch": 0.9202532090647972, + "grad_norm": 4.924027507513529e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189750 + }, + { + "epoch": 0.9203017072576333, + "grad_norm": 5.23033440913423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189760 + }, + { + "epoch": 0.9203502054504694, + "grad_norm": 4.991458354197675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189770 + }, + { + "epoch": 0.9203987036433054, + "grad_norm": 5.003150818083668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189780 + }, + { + "epoch": 0.9204472018361416, + "grad_norm": 4.598421583068557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189790 + }, + { + "epoch": 0.9204957000289776, + "grad_norm": 4.4228027036297135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189800 + }, + { + "epoch": 0.9205441982218138, + "grad_norm": 4.471397460292792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189810 + }, + { + "epoch": 0.9205926964146498, + "grad_norm": 4.593273843056522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189820 + }, + { + "epoch": 0.920641194607486, + "grad_norm": 4.628689111996209e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189830 + }, + { + "epoch": 0.920689692800322, + "grad_norm": 4.345037268649321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189840 + }, + { + "epoch": 0.9207381909931581, + "grad_norm": 4.0332897697226144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189850 + }, + { + "epoch": 0.9207866891859942, + "grad_norm": 4.313447789172642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189860 + }, + { + "epoch": 0.9208351873788303, + "grad_norm": 4.137090400035959e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189870 + }, + { + "epoch": 0.9208836855716663, + "grad_norm": 4.010307748103514e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189880 + }, + { + "epoch": 0.9209321837645025, + "grad_norm": 3.636439259935287e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189890 + }, + { + "epoch": 0.9209806819573385, + "grad_norm": 3.7309539493435295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189900 + }, + { + "epoch": 0.9210291801501747, + "grad_norm": 3.981272129749414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189910 + }, + { + "epoch": 0.9210776783430108, + "grad_norm": 3.7363547562563326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189920 + }, + { + "epoch": 0.9211261765358468, + "grad_norm": 3.816334356088191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189930 + }, + { + "epoch": 0.921174674728683, + "grad_norm": 3.37861456500832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189940 + }, + { + "epoch": 0.921223172921519, + "grad_norm": 3.538234523148276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189950 + }, + { + "epoch": 0.9212716711143552, + "grad_norm": 3.5925620522903046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189960 + }, + { + "epoch": 0.9213201693071912, + "grad_norm": 3.4641343518160284e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189970 + }, + { + "epoch": 0.9213686675000273, + "grad_norm": 3.4644151583052007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189980 + }, + { + "epoch": 0.9214171656928634, + "grad_norm": 3.1810973268875387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 189990 + }, + { + "epoch": 0.9214656638856995, + "grad_norm": 3.167649538227124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190000 + }, + { + "epoch": 0.9215141620785355, + "grad_norm": 3.471232503216015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190010 + }, + { + "epoch": 0.9215626602713717, + "grad_norm": 3.3353635444655083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190020 + }, + { + "epoch": 0.9216111584642077, + "grad_norm": 3.195505769326701e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190030 + }, + { + "epoch": 0.9216596566570439, + "grad_norm": 2.8931790438946337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190040 + }, + { + "epoch": 0.9217081548498799, + "grad_norm": 2.9896787054894958e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190050 + }, + { + "epoch": 0.921756653042716, + "grad_norm": 3.1253207453119103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190060 + }, + { + "epoch": 0.9218051512355521, + "grad_norm": 3.0607216103817336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190070 + }, + { + "epoch": 0.9218536494283882, + "grad_norm": 2.98811619359185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190080 + }, + { + "epoch": 0.9219021476212242, + "grad_norm": 2.858027528418461e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190090 + }, + { + "epoch": 0.9219506458140604, + "grad_norm": 2.8242793632671237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190100 + }, + { + "epoch": 0.9219991440068964, + "grad_norm": 2.998300033141277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190110 + }, + { + "epoch": 0.9220476421997326, + "grad_norm": 2.7924845653615193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190120 + }, + { + "epoch": 0.9220961403925686, + "grad_norm": 2.824942384904716e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190130 + }, + { + "epoch": 0.9221446385854047, + "grad_norm": 2.7282192149868933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190140 + }, + { + "epoch": 0.9221931367782408, + "grad_norm": 2.668645038284012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190150 + }, + { + "epoch": 0.9222416349710769, + "grad_norm": 2.6552027065918082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190160 + }, + { + "epoch": 0.922290133163913, + "grad_norm": 2.720716565818293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190170 + }, + { + "epoch": 0.9223386313567491, + "grad_norm": 2.643184870976256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190180 + }, + { + "epoch": 0.9223871295495851, + "grad_norm": 2.5339688818348804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190190 + }, + { + "epoch": 0.9224356277424213, + "grad_norm": 2.4820506041578483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190200 + }, + { + "epoch": 0.9224841259352573, + "grad_norm": 2.44484954237123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190210 + }, + { + "epoch": 0.9225326241280934, + "grad_norm": 2.5166102659568423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190220 + }, + { + "epoch": 0.9225811223209295, + "grad_norm": 2.5725278192112455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190230 + }, + { + "epoch": 0.9226296205137656, + "grad_norm": 2.451635282341158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190240 + }, + { + "epoch": 0.9226781187066017, + "grad_norm": 2.431426082694088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190250 + }, + { + "epoch": 0.9227266168994378, + "grad_norm": 2.3673244413657812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190260 + }, + { + "epoch": 0.9227751150922738, + "grad_norm": 2.336512579859118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190270 + }, + { + "epoch": 0.92282361328511, + "grad_norm": 2.3152369976742193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190280 + }, + { + "epoch": 0.922872111477946, + "grad_norm": 2.325219156773528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190290 + }, + { + "epoch": 0.9229206096707822, + "grad_norm": 2.2748695300833788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190300 + }, + { + "epoch": 0.9229691078636182, + "grad_norm": 2.2602953322348185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190310 + }, + { + "epoch": 0.9230176060564543, + "grad_norm": 2.2111489670351148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190320 + }, + { + "epoch": 0.9230661042492904, + "grad_norm": 2.088723476845189e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190330 + }, + { + "epoch": 0.9231146024421265, + "grad_norm": 2.2012948193150805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190340 + }, + { + "epoch": 0.9231631006349625, + "grad_norm": 2.1639507394866087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190350 + }, + { + "epoch": 0.9232115988277987, + "grad_norm": 2.0369620870042127e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190360 + }, + { + "epoch": 0.9232600970206347, + "grad_norm": 1.989650172617985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190370 + }, + { + "epoch": 0.9233085952134709, + "grad_norm": 2.146843826267286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190380 + }, + { + "epoch": 0.9233570934063069, + "grad_norm": 2.1253154045552947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190390 + }, + { + "epoch": 0.923405591599143, + "grad_norm": 2.2088840978540247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190400 + }, + { + "epoch": 0.9234540897919791, + "grad_norm": 1.8547071931607206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190410 + }, + { + "epoch": 0.9235025879848152, + "grad_norm": 1.8922056597148185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190420 + }, + { + "epoch": 0.9235510861776514, + "grad_norm": 1.958315806405153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190430 + }, + { + "epoch": 0.9235995843704874, + "grad_norm": 2.0326651792856865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190440 + }, + { + "epoch": 0.9236480825633235, + "grad_norm": 2.012338654822088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190450 + }, + { + "epoch": 0.9236965807561596, + "grad_norm": 1.7985910290008178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190460 + }, + { + "epoch": 0.9237450789489957, + "grad_norm": 1.804129851734615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190470 + }, + { + "epoch": 0.9237935771418317, + "grad_norm": 1.768605557117553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190480 + }, + { + "epoch": 0.9238420753346679, + "grad_norm": 1.7909160305862315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190490 + }, + { + "epoch": 0.9238905735275039, + "grad_norm": 1.7629922695050482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190500 + }, + { + "epoch": 0.9239390717203401, + "grad_norm": 1.6344757796105114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190510 + }, + { + "epoch": 0.9239875699131761, + "grad_norm": 1.5928739003356895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190520 + }, + { + "epoch": 0.9240360681060122, + "grad_norm": 1.6289482118736487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190530 + }, + { + "epoch": 0.9240845662988483, + "grad_norm": 1.754959043864801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190540 + }, + { + "epoch": 0.9241330644916844, + "grad_norm": 1.656580025155563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190550 + }, + { + "epoch": 0.9241815626845205, + "grad_norm": 1.6272171023956616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190560 + }, + { + "epoch": 0.9242300608773566, + "grad_norm": 1.5344561461461126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190570 + }, + { + "epoch": 0.9242785590701926, + "grad_norm": 1.4838959714325028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190580 + }, + { + "epoch": 0.9243270572630288, + "grad_norm": 1.6134148381752311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190590 + }, + { + "epoch": 0.9243755554558648, + "grad_norm": 1.6063133898569504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190600 + }, + { + "epoch": 0.924424053648701, + "grad_norm": 1.4112266626398196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190610 + }, + { + "epoch": 0.924472551841537, + "grad_norm": 1.4468715789917042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190620 + }, + { + "epoch": 0.9245210500343731, + "grad_norm": 1.4209382470653509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190630 + }, + { + "epoch": 0.9245695482272092, + "grad_norm": 1.5279788385669235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190640 + }, + { + "epoch": 0.9246180464200453, + "grad_norm": 1.551523496345908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190650 + }, + { + "epoch": 0.9246665446128813, + "grad_norm": 1.3900889825890772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190660 + }, + { + "epoch": 0.9247150428057175, + "grad_norm": 1.354112328044721e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190670 + }, + { + "epoch": 0.9247635409985535, + "grad_norm": 1.4297202142188326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190680 + }, + { + "epoch": 0.9248120391913897, + "grad_norm": 1.4521273214995745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190690 + }, + { + "epoch": 0.9248605373842257, + "grad_norm": 1.4974655186961172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190700 + }, + { + "epoch": 0.9249090355770618, + "grad_norm": 1.2574016636790475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190710 + }, + { + "epoch": 0.9249575337698979, + "grad_norm": 1.2754200042763841e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190720 + }, + { + "epoch": 0.925006031962734, + "grad_norm": 1.235573449775984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190730 + }, + { + "epoch": 0.92505453015557, + "grad_norm": 1.365296611766098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190740 + }, + { + "epoch": 0.9251030283484062, + "grad_norm": 1.362709554086905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190750 + }, + { + "epoch": 0.9251515265412422, + "grad_norm": 1.1694983186316676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190760 + }, + { + "epoch": 0.9252000247340784, + "grad_norm": 1.2136722489231033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190770 + }, + { + "epoch": 0.9252485229269144, + "grad_norm": 1.1741947218979476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190780 + }, + { + "epoch": 0.9252970211197505, + "grad_norm": 1.2354378213785822e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190790 + }, + { + "epoch": 0.9253455193125866, + "grad_norm": 1.3386214732236112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190800 + }, + { + "epoch": 0.9253940175054227, + "grad_norm": 1.1480771036076476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190810 + }, + { + "epoch": 0.9254425156982587, + "grad_norm": 1.1029386541849817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190820 + }, + { + "epoch": 0.9254910138910949, + "grad_norm": 1.0777628176583676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190830 + }, + { + "epoch": 0.9255395120839309, + "grad_norm": 1.1920654969799216e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190840 + }, + { + "epoch": 0.9255880102767671, + "grad_norm": 1.124951495512505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190850 + }, + { + "epoch": 0.9256365084696031, + "grad_norm": 1.124795517171151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190860 + }, + { + "epoch": 0.9256850066624392, + "grad_norm": 1.033730995914084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190870 + }, + { + "epoch": 0.9257335048552753, + "grad_norm": 1.1017723409167957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190880 + }, + { + "epoch": 0.9257820030481114, + "grad_norm": 1.1052424042645725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190890 + }, + { + "epoch": 0.9258305012409475, + "grad_norm": 1.2011053058813559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190900 + }, + { + "epoch": 0.9258789994337836, + "grad_norm": 1.0622027275530854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190910 + }, + { + "epoch": 0.9259274976266196, + "grad_norm": 9.671679208622663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190920 + }, + { + "epoch": 0.9259759958194558, + "grad_norm": 4.02813702748972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190930 + }, + { + "epoch": 0.9260244940122919, + "grad_norm": 1.0764775879579247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190940 + }, + { + "epoch": 0.926072992205128, + "grad_norm": 0.007695420645177364, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190950 + }, + { + "epoch": 0.9261214903979641, + "grad_norm": 1.1399695267755305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190960 + }, + { + "epoch": 0.9261699885908001, + "grad_norm": 1.915927441586973e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190970 + }, + { + "epoch": 0.9262184867836363, + "grad_norm": 1.0417417115604621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190980 + }, + { + "epoch": 0.9262669849764723, + "grad_norm": 1.2360734444882837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 190990 + }, + { + "epoch": 0.9263154831693085, + "grad_norm": 1.2762747019223752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191000 + }, + { + "epoch": 0.9263639813621445, + "grad_norm": 1.00476040643116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191010 + }, + { + "epoch": 0.9264124795549806, + "grad_norm": 1.0623751904859091e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191020 + }, + { + "epoch": 0.9264609777478167, + "grad_norm": 9.96740141090413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191030 + }, + { + "epoch": 0.9265094759406528, + "grad_norm": 1.1897174090336193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191040 + }, + { + "epoch": 0.9265579741334888, + "grad_norm": 1.2960006188222906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191050 + }, + { + "epoch": 0.926606472326325, + "grad_norm": 8.824179644761898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191060 + }, + { + "epoch": 0.926654970519161, + "grad_norm": 9.679447430244181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191070 + }, + { + "epoch": 0.9267034687119972, + "grad_norm": 8.545471814613848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191080 + }, + { + "epoch": 0.9267519669048332, + "grad_norm": 1.1221020486118505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191090 + }, + { + "epoch": 0.9268004650976693, + "grad_norm": 1.1710772014339454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191100 + }, + { + "epoch": 0.9268489632905054, + "grad_norm": 8.5509429936792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191110 + }, + { + "epoch": 0.9268974614833415, + "grad_norm": 8.796994848125905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191120 + }, + { + "epoch": 0.9269459596761775, + "grad_norm": 9.088034857995808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191130 + }, + { + "epoch": 0.9269944578690137, + "grad_norm": 8.420558401667222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191140 + }, + { + "epoch": 0.9270429560618497, + "grad_norm": 6.961121243875823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191150 + }, + { + "epoch": 0.9270914542546859, + "grad_norm": 6.961934104765533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191160 + }, + { + "epoch": 0.9271399524475219, + "grad_norm": 7.28650434211886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191170 + }, + { + "epoch": 0.927188450640358, + "grad_norm": 6.572078632416378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191180 + }, + { + "epoch": 0.9272369488331941, + "grad_norm": 6.368990170813049e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191190 + }, + { + "epoch": 0.9272854470260302, + "grad_norm": 7.055085688989493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191200 + }, + { + "epoch": 0.9273339452188663, + "grad_norm": 6.813183404119627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191210 + }, + { + "epoch": 0.9273824434117024, + "grad_norm": 6.960647738196712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191220 + }, + { + "epoch": 0.9274309416045384, + "grad_norm": 6.579984983545728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191230 + }, + { + "epoch": 0.9274794397973746, + "grad_norm": 6.428481924558582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191240 + }, + { + "epoch": 0.9275279379902106, + "grad_norm": 6.204996907399618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191250 + }, + { + "epoch": 0.9275764361830467, + "grad_norm": 6.336676392493246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191260 + }, + { + "epoch": 0.9276249343758828, + "grad_norm": 6.05369905315456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191270 + }, + { + "epoch": 0.9276734325687189, + "grad_norm": 6.410601258721726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191280 + }, + { + "epoch": 0.927721930761555, + "grad_norm": 5.816548309667269e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191290 + }, + { + "epoch": 0.9277704289543911, + "grad_norm": 5.709328547709447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191300 + }, + { + "epoch": 0.9278189271472271, + "grad_norm": 6.232340297174233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191310 + }, + { + "epoch": 0.9278674253400633, + "grad_norm": 6.125767413323047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191320 + }, + { + "epoch": 0.9279159235328993, + "grad_norm": 6.63115429233585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191330 + }, + { + "epoch": 0.9279644217257355, + "grad_norm": 5.641318239213433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191340 + }, + { + "epoch": 0.9280129199185715, + "grad_norm": 5.459067438096099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191350 + }, + { + "epoch": 0.9280614181114076, + "grad_norm": 5.972781877972011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191360 + }, + { + "epoch": 0.9281099163042437, + "grad_norm": 6.069807341191336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191370 + }, + { + "epoch": 0.9281584144970798, + "grad_norm": 6.331661666081345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191380 + }, + { + "epoch": 0.9282069126899158, + "grad_norm": 5.21870617831155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191390 + }, + { + "epoch": 0.928255410882752, + "grad_norm": 5.545699650610914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191400 + }, + { + "epoch": 0.928303909075588, + "grad_norm": 5.839913796990004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191410 + }, + { + "epoch": 0.9283524072684242, + "grad_norm": 6.016730367264245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191420 + }, + { + "epoch": 0.9284009054612602, + "grad_norm": 5.810312586618238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191430 + }, + { + "epoch": 0.9284494036540963, + "grad_norm": 7.623401074852154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191440 + }, + { + "epoch": 0.9284979018469325, + "grad_norm": 5.695615072909277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191450 + }, + { + "epoch": 0.9285464000397685, + "grad_norm": 5.681376933353022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191460 + }, + { + "epoch": 0.9285948982326047, + "grad_norm": 6.705465693812585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191470 + }, + { + "epoch": 0.9286433964254407, + "grad_norm": 5.542206054087728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191480 + }, + { + "epoch": 0.9286918946182768, + "grad_norm": 4.4826538214692846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191490 + }, + { + "epoch": 0.9287403928111129, + "grad_norm": 4.996180109628767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191500 + }, + { + "epoch": 0.928788891003949, + "grad_norm": 5.653947710015927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191510 + }, + { + "epoch": 0.928837389196785, + "grad_norm": 5.107553420202748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191520 + }, + { + "epoch": 0.9288858873896212, + "grad_norm": 4.970381155544601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191530 + }, + { + "epoch": 0.9289343855824572, + "grad_norm": 6.390859539351368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191540 + }, + { + "epoch": 0.9289828837752934, + "grad_norm": 4.6289295596579905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191550 + }, + { + "epoch": 0.9290313819681294, + "grad_norm": 5.229360340308631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191560 + }, + { + "epoch": 0.9290798801609655, + "grad_norm": 5.419339004220092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191570 + }, + { + "epoch": 0.9291283783538016, + "grad_norm": 5.115604153616005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191580 + }, + { + "epoch": 0.9291768765466377, + "grad_norm": 4.6447127033388824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191590 + }, + { + "epoch": 0.9292253747394738, + "grad_norm": 4.94702192099794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191600 + }, + { + "epoch": 0.9292738729323099, + "grad_norm": 5.461610612655932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191610 + }, + { + "epoch": 0.9293223711251459, + "grad_norm": 4.998973963665776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191620 + }, + { + "epoch": 0.9293708693179821, + "grad_norm": 4.702958733560081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191630 + }, + { + "epoch": 0.9294193675108181, + "grad_norm": 4.41411913243428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191640 + }, + { + "epoch": 0.9294678657036543, + "grad_norm": 4.6878957959961554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191650 + }, + { + "epoch": 0.9295163638964903, + "grad_norm": 5.089781893730105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191660 + }, + { + "epoch": 0.9295648620893264, + "grad_norm": 4.950308039042284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191670 + }, + { + "epoch": 0.9296133602821625, + "grad_norm": 4.739698624689481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191680 + }, + { + "epoch": 0.9296618584749986, + "grad_norm": 5.069881012786936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191690 + }, + { + "epoch": 0.9297103566678346, + "grad_norm": 4.4072891114410595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191700 + }, + { + "epoch": 0.9297588548606708, + "grad_norm": 4.375247613097599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191710 + }, + { + "epoch": 0.9298073530535068, + "grad_norm": 4.299479883229651e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191720 + }, + { + "epoch": 0.929855851246343, + "grad_norm": 4.469445684662787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191730 + }, + { + "epoch": 0.929904349439179, + "grad_norm": 4.367996950804809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191740 + }, + { + "epoch": 0.9299528476320151, + "grad_norm": 6.269400500968914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191750 + }, + { + "epoch": 0.9300013458248512, + "grad_norm": 4.555514294679597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191760 + }, + { + "epoch": 0.9300498440176873, + "grad_norm": 4.906547133032291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191770 + }, + { + "epoch": 0.9300983422105233, + "grad_norm": 4.219501761326683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191780 + }, + { + "epoch": 0.9301468404033595, + "grad_norm": 4.154879889028962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191790 + }, + { + "epoch": 0.9301953385961955, + "grad_norm": 4.388456318338285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191800 + }, + { + "epoch": 0.9302438367890317, + "grad_norm": 4.145749699091539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191810 + }, + { + "epoch": 0.9302923349818677, + "grad_norm": 4.193375389149878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191820 + }, + { + "epoch": 0.9303408331747038, + "grad_norm": 4.0730498085395084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191830 + }, + { + "epoch": 0.9303893313675399, + "grad_norm": 3.9159277775979717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191840 + }, + { + "epoch": 0.930437829560376, + "grad_norm": 4.331280649694236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191850 + }, + { + "epoch": 0.930486327753212, + "grad_norm": 4.6756909455325513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191860 + }, + { + "epoch": 0.9305348259460482, + "grad_norm": 4.292231210456521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191870 + }, + { + "epoch": 0.9305833241388842, + "grad_norm": 4.54369029512236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191880 + }, + { + "epoch": 0.9306318223317204, + "grad_norm": 3.756088347017794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191890 + }, + { + "epoch": 0.9306803205245564, + "grad_norm": 1.4774893770663766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191900 + }, + { + "epoch": 0.9307288187173925, + "grad_norm": 4.1924354832190147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191910 + }, + { + "epoch": 0.9307773169102286, + "grad_norm": 4.2077400053130987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191920 + }, + { + "epoch": 0.9308258151030647, + "grad_norm": 3.8982625483185984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191930 + }, + { + "epoch": 0.9308743132959008, + "grad_norm": 3.535624841788376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191940 + }, + { + "epoch": 0.9309228114887369, + "grad_norm": 3.707222617777006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191950 + }, + { + "epoch": 0.930971309681573, + "grad_norm": 4.3261184146103915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191960 + }, + { + "epoch": 0.9310198078744091, + "grad_norm": 3.9396920215040154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191970 + }, + { + "epoch": 0.9310683060672452, + "grad_norm": 4.052077997585002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191980 + }, + { + "epoch": 0.9311168042600813, + "grad_norm": 3.462402560217015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 191990 + }, + { + "epoch": 0.9311653024529174, + "grad_norm": 3.5167673217983975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192000 + }, + { + "epoch": 0.9312138006457534, + "grad_norm": 3.671686101824889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192010 + }, + { + "epoch": 0.9312622988385896, + "grad_norm": 3.8637037391708873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192020 + }, + { + "epoch": 0.9313107970314256, + "grad_norm": 3.8841443483761395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192030 + }, + { + "epoch": 0.9313592952242618, + "grad_norm": 4.4832623302681895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192040 + }, + { + "epoch": 0.9314077934170978, + "grad_norm": 3.65625908216316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192050 + }, + { + "epoch": 0.9314562916099339, + "grad_norm": 3.5832374578603776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192060 + }, + { + "epoch": 0.93150478980277, + "grad_norm": 3.8862503970449325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192070 + }, + { + "epoch": 0.9315532879956061, + "grad_norm": 3.642281853899476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192080 + }, + { + "epoch": 0.9316017861884421, + "grad_norm": 3.4239482715747727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192090 + }, + { + "epoch": 0.9316502843812783, + "grad_norm": 3.308573752747179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192100 + }, + { + "epoch": 0.9316987825741143, + "grad_norm": 3.6984317830501823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192110 + }, + { + "epoch": 0.9317472807669505, + "grad_norm": 4.0305874904333905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192120 + }, + { + "epoch": 0.9317957789597865, + "grad_norm": 3.4260682468811865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192130 + }, + { + "epoch": 0.9318442771526226, + "grad_norm": 3.2193847232520056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192140 + }, + { + "epoch": 0.9318927753454587, + "grad_norm": 3.122864029592165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192150 + }, + { + "epoch": 0.9319412735382948, + "grad_norm": 3.376038932856318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192160 + }, + { + "epoch": 0.9319897717311308, + "grad_norm": 3.668300223580445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192170 + }, + { + "epoch": 0.932038269923967, + "grad_norm": 3.5961411981588753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192180 + }, + { + "epoch": 0.932086768116803, + "grad_norm": 3.2068470545709715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192190 + }, + { + "epoch": 0.9321352663096392, + "grad_norm": 3.4033405427180696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192200 + }, + { + "epoch": 0.9321837645024752, + "grad_norm": 3.467197302597924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192210 + }, + { + "epoch": 0.9322322626953113, + "grad_norm": 3.116977609352034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192220 + }, + { + "epoch": 0.9322807608881474, + "grad_norm": 3.2865349908206554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192230 + }, + { + "epoch": 0.9323292590809835, + "grad_norm": 3.177455596414802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192240 + }, + { + "epoch": 0.9323777572738196, + "grad_norm": 3.586303876090824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192250 + }, + { + "epoch": 0.9324262554666557, + "grad_norm": 3.901140530615521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192260 + }, + { + "epoch": 0.9324747536594917, + "grad_norm": 3.537861630320549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192270 + }, + { + "epoch": 0.9325232518523279, + "grad_norm": 3.3648322528279095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192280 + }, + { + "epoch": 0.9325717500451639, + "grad_norm": 3.082076887039875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192290 + }, + { + "epoch": 0.932620248238, + "grad_norm": 2.8726105938403634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192300 + }, + { + "epoch": 0.9326687464308361, + "grad_norm": 4.1361593616784376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192310 + }, + { + "epoch": 0.9327172446236722, + "grad_norm": 3.3557290635144454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192320 + }, + { + "epoch": 0.9327657428165083, + "grad_norm": 3.0963778385739715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192330 + }, + { + "epoch": 0.9328142410093444, + "grad_norm": 3.321934514133318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192340 + }, + { + "epoch": 0.9328627392021804, + "grad_norm": 3.0857444244247745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192350 + }, + { + "epoch": 0.9329112373950166, + "grad_norm": 3.2528916449336975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192360 + }, + { + "epoch": 0.9329597355878526, + "grad_norm": 3.169795945723308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192370 + }, + { + "epoch": 0.9330082337806888, + "grad_norm": 3.0202392053979565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192380 + }, + { + "epoch": 0.9330567319735248, + "grad_norm": 2.9865668693673797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192390 + }, + { + "epoch": 0.9331052301663609, + "grad_norm": 3.3774728080970817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192400 + }, + { + "epoch": 0.933153728359197, + "grad_norm": 3.110336024292337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192410 + }, + { + "epoch": 0.9332022265520331, + "grad_norm": 2.961147913538298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192420 + }, + { + "epoch": 0.9332507247448691, + "grad_norm": 3.1752722406963585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192430 + }, + { + "epoch": 0.9332992229377053, + "grad_norm": 2.6424007160130714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192440 + }, + { + "epoch": 0.9333477211305413, + "grad_norm": 2.9052969807707996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192450 + }, + { + "epoch": 0.9333962193233775, + "grad_norm": 3.082531065956573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192460 + }, + { + "epoch": 0.9334447175162136, + "grad_norm": 2.8921070338583377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192470 + }, + { + "epoch": 0.9334932157090496, + "grad_norm": 3.178151075644564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192480 + }, + { + "epoch": 0.9335417139018858, + "grad_norm": 2.654637683008332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192490 + }, + { + "epoch": 0.9335902120947218, + "grad_norm": 2.9897728381911293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192500 + }, + { + "epoch": 0.933638710287558, + "grad_norm": 2.972685138047382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192510 + }, + { + "epoch": 0.933687208480394, + "grad_norm": 3.3133704846477485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192520 + }, + { + "epoch": 0.9337357066732301, + "grad_norm": 2.868922592824674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192530 + }, + { + "epoch": 0.9337842048660662, + "grad_norm": 2.611195952795242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192540 + }, + { + "epoch": 0.9338327030589023, + "grad_norm": 2.886793879497418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192550 + }, + { + "epoch": 0.9338812012517383, + "grad_norm": 2.905973133238149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192560 + }, + { + "epoch": 0.9339296994445745, + "grad_norm": 2.8564758736138174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192570 + }, + { + "epoch": 0.9339781976374105, + "grad_norm": 2.753677961209178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192580 + }, + { + "epoch": 0.9340266958302467, + "grad_norm": 2.6886425530392444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192590 + }, + { + "epoch": 0.9340751940230827, + "grad_norm": 2.7333027219356154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192600 + }, + { + "epoch": 0.9341236922159188, + "grad_norm": 2.84535047967438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192610 + }, + { + "epoch": 0.9341721904087549, + "grad_norm": 2.8042998678756703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192620 + }, + { + "epoch": 0.934220688601591, + "grad_norm": 2.851923852631444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192630 + }, + { + "epoch": 0.934269186794427, + "grad_norm": 2.5508836642984534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192640 + }, + { + "epoch": 0.9343176849872632, + "grad_norm": 2.7097971155853884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192650 + }, + { + "epoch": 0.9343661831800992, + "grad_norm": 2.7933808155466977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192660 + }, + { + "epoch": 0.9344146813729354, + "grad_norm": 2.6181811563219526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192670 + }, + { + "epoch": 0.9344631795657714, + "grad_norm": 2.639374372392922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192680 + }, + { + "epoch": 0.9345116777586076, + "grad_norm": 2.643855907535908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192690 + }, + { + "epoch": 0.9345601759514436, + "grad_norm": 2.403559733465954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192700 + }, + { + "epoch": 0.9346086741442797, + "grad_norm": 2.9156760206205945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192710 + }, + { + "epoch": 0.9346571723371158, + "grad_norm": 2.921700286151463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192720 + }, + { + "epoch": 0.9347056705299519, + "grad_norm": 2.978024156163883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192730 + }, + { + "epoch": 0.9347541687227879, + "grad_norm": 2.5300235506620083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192740 + }, + { + "epoch": 0.9348026669156241, + "grad_norm": 2.520427244689927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192750 + }, + { + "epoch": 0.9348511651084601, + "grad_norm": 2.7693775450643443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192760 + }, + { + "epoch": 0.9348996633012963, + "grad_norm": 2.6521357199271733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192770 + }, + { + "epoch": 0.9349481614941323, + "grad_norm": 2.77558456218685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192780 + }, + { + "epoch": 0.9349966596869684, + "grad_norm": 2.451935756653256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192790 + }, + { + "epoch": 0.9350451578798045, + "grad_norm": 2.3889975864221924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192800 + }, + { + "epoch": 0.9350936560726406, + "grad_norm": 2.720349527862709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192810 + }, + { + "epoch": 0.9351421542654766, + "grad_norm": 2.5906996370395063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192820 + }, + { + "epoch": 0.9351906524583128, + "grad_norm": 2.588457732599636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192830 + }, + { + "epoch": 0.9352391506511488, + "grad_norm": 2.564284500294889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192840 + }, + { + "epoch": 0.935287648843985, + "grad_norm": 2.857913159459713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192850 + }, + { + "epoch": 0.935336147036821, + "grad_norm": 2.5136131398539874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192860 + }, + { + "epoch": 0.9353846452296571, + "grad_norm": 2.7753989684242697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192870 + }, + { + "epoch": 0.9354331434224932, + "grad_norm": 2.3948766170178715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192880 + }, + { + "epoch": 0.9354816416153293, + "grad_norm": 2.1318635390343843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192890 + }, + { + "epoch": 0.9355301398081654, + "grad_norm": 4.958482691108657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192900 + }, + { + "epoch": 0.9355786380010015, + "grad_norm": 2.5231517497559253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192910 + }, + { + "epoch": 0.9356271361938375, + "grad_norm": 2.437312218717125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192920 + }, + { + "epoch": 0.9356756343866737, + "grad_norm": 2.5334114184261125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192930 + }, + { + "epoch": 0.9357241325795097, + "grad_norm": 2.2260465470935742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192940 + }, + { + "epoch": 0.9357726307723458, + "grad_norm": 2.529350240365602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192950 + }, + { + "epoch": 0.9358211289651819, + "grad_norm": 2.5797811531447223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192960 + }, + { + "epoch": 0.935869627158018, + "grad_norm": 2.2622060669164057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192970 + }, + { + "epoch": 0.9359181253508542, + "grad_norm": 2.5040370132956014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192980 + }, + { + "epoch": 0.9359666235436902, + "grad_norm": 2.0637401121348375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 192990 + }, + { + "epoch": 0.9360151217365263, + "grad_norm": 2.2260955745423416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193000 + }, + { + "epoch": 0.9360636199293624, + "grad_norm": 2.540163848152588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193010 + }, + { + "epoch": 0.9361121181221985, + "grad_norm": 2.3231929446865252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193020 + }, + { + "epoch": 0.9361606163150346, + "grad_norm": 2.439893478367594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193030 + }, + { + "epoch": 0.9362091145078707, + "grad_norm": 1.9705461795638257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193040 + }, + { + "epoch": 0.9362576127007067, + "grad_norm": 2.2483894213110034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193050 + }, + { + "epoch": 0.9363061108935429, + "grad_norm": 2.3468915344437846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193060 + }, + { + "epoch": 0.9363546090863789, + "grad_norm": 2.2634314689184976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193070 + }, + { + "epoch": 0.936403107279215, + "grad_norm": 2.347901499888394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193080 + }, + { + "epoch": 0.9364516054720511, + "grad_norm": 1.9613725044109742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193090 + }, + { + "epoch": 0.9365001036648872, + "grad_norm": 2.2207683514352539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193100 + }, + { + "epoch": 0.9365486018577233, + "grad_norm": 2.3357188183581457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193110 + }, + { + "epoch": 0.9365971000505594, + "grad_norm": 2.2137083988127415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193120 + }, + { + "epoch": 0.9366455982433954, + "grad_norm": 2.4975946644190117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193130 + }, + { + "epoch": 0.9366940964362316, + "grad_norm": 1.9730988753963175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193140 + }, + { + "epoch": 0.9367425946290676, + "grad_norm": 1.9433461773132876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193150 + }, + { + "epoch": 0.9367910928219038, + "grad_norm": 2.2169346891587338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193160 + }, + { + "epoch": 0.9368395910147398, + "grad_norm": 2.1960970286727388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193170 + }, + { + "epoch": 0.9368880892075759, + "grad_norm": 2.1513993431199196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193180 + }, + { + "epoch": 0.936936587400412, + "grad_norm": 2.0229950337125047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193190 + }, + { + "epoch": 0.9369850855932481, + "grad_norm": 1.9519474392382108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193200 + }, + { + "epoch": 0.9370335837860841, + "grad_norm": 2.2693693324526976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193210 + }, + { + "epoch": 0.9370820819789203, + "grad_norm": 2.2737360438895848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193220 + }, + { + "epoch": 0.9371305801717563, + "grad_norm": 2.1193756083448534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193230 + }, + { + "epoch": 0.9371790783645925, + "grad_norm": 1.8710706228830531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193240 + }, + { + "epoch": 0.9372275765574285, + "grad_norm": 1.9582320476274617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193250 + }, + { + "epoch": 0.9372760747502646, + "grad_norm": 2.1667260341473593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193260 + }, + { + "epoch": 0.9373245729431007, + "grad_norm": 2.1956434181902296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193270 + }, + { + "epoch": 0.9373730711359368, + "grad_norm": 2.1955571583021083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193280 + }, + { + "epoch": 0.9374215693287729, + "grad_norm": 1.9309041476844868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193290 + }, + { + "epoch": 0.937470067521609, + "grad_norm": 1.7087745618482586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193300 + }, + { + "epoch": 0.937518565714445, + "grad_norm": 2.146181685702686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193310 + }, + { + "epoch": 0.9375670639072812, + "grad_norm": 2.284602231839017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193320 + }, + { + "epoch": 0.9376155621001172, + "grad_norm": 2.0561913061101222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193330 + }, + { + "epoch": 0.9376640602929533, + "grad_norm": 1.9045243959681102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193340 + }, + { + "epoch": 0.9377125584857894, + "grad_norm": 1.7615646186186495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193350 + }, + { + "epoch": 0.9377610566786255, + "grad_norm": 2.1772707725631335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193360 + }, + { + "epoch": 0.9378095548714616, + "grad_norm": 2.0603063433100033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193370 + }, + { + "epoch": 0.9378580530642977, + "grad_norm": 1.9565278819300147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193380 + }, + { + "epoch": 0.9379065512571337, + "grad_norm": 1.8421144432068104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193390 + }, + { + "epoch": 0.9379550494499699, + "grad_norm": 1.652262540119409e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193400 + }, + { + "epoch": 0.9380035476428059, + "grad_norm": 2.0395405897488672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193410 + }, + { + "epoch": 0.9380520458356421, + "grad_norm": 1.989333213714417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193420 + }, + { + "epoch": 0.9381005440284781, + "grad_norm": 2.1630205537803704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193430 + }, + { + "epoch": 0.9381490422213142, + "grad_norm": 1.7447165134854004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193440 + }, + { + "epoch": 0.9381975404141503, + "grad_norm": 1.810276586411419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193450 + }, + { + "epoch": 0.9382460386069864, + "grad_norm": 1.930458495280618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193460 + }, + { + "epoch": 0.9382945367998224, + "grad_norm": 1.9046704835545825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193470 + }, + { + "epoch": 0.9383430349926586, + "grad_norm": 1.9128712835936312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193480 + }, + { + "epoch": 0.9383915331854947, + "grad_norm": 1.641253248862995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193490 + }, + { + "epoch": 0.9384400313783308, + "grad_norm": 1.8008917379574996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193500 + }, + { + "epoch": 0.9384885295711669, + "grad_norm": 1.935532623065228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193510 + }, + { + "epoch": 0.9385370277640029, + "grad_norm": 2.3287505257485464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193520 + }, + { + "epoch": 0.9385855259568391, + "grad_norm": 1.950270700490364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193530 + }, + { + "epoch": 0.9386340241496751, + "grad_norm": 1.6573017092014197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193540 + }, + { + "epoch": 0.9386825223425113, + "grad_norm": 1.6885483944406587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193550 + }, + { + "epoch": 0.9387310205353473, + "grad_norm": 1.94604254488695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193560 + }, + { + "epoch": 0.9387795187281834, + "grad_norm": 1.7979201061280037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193570 + }, + { + "epoch": 0.9388280169210195, + "grad_norm": 1.8113435373834363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193580 + }, + { + "epoch": 0.9388765151138556, + "grad_norm": 1.558419597813554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193590 + }, + { + "epoch": 0.9389250133066916, + "grad_norm": 1.576619297338766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193600 + }, + { + "epoch": 0.9389735114995278, + "grad_norm": 1.7569992394328438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193610 + }, + { + "epoch": 0.9390220096923638, + "grad_norm": 1.9580676280384068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193620 + }, + { + "epoch": 0.9390705078852, + "grad_norm": 1.7835785115494218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193630 + }, + { + "epoch": 0.939119006078036, + "grad_norm": 1.5500478411922813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193640 + }, + { + "epoch": 0.9391675042708721, + "grad_norm": 1.5206175874027394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193650 + }, + { + "epoch": 0.9392160024637082, + "grad_norm": 1.7192341772442887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193660 + }, + { + "epoch": 0.9392645006565443, + "grad_norm": 1.9932178929593647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193670 + }, + { + "epoch": 0.9393129988493804, + "grad_norm": 1.785747514304603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193680 + }, + { + "epoch": 0.9393614970422165, + "grad_norm": 1.708041139636407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193690 + }, + { + "epoch": 0.9394099952350525, + "grad_norm": 1.485430232150975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193700 + }, + { + "epoch": 0.9394584934278887, + "grad_norm": 3.950409279696032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193710 + }, + { + "epoch": 0.9395069916207247, + "grad_norm": 1.7379456096477952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193720 + }, + { + "epoch": 0.9395554898135609, + "grad_norm": 1.8239497023841977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193730 + }, + { + "epoch": 0.9396039880063969, + "grad_norm": 1.479661619896433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193740 + }, + { + "epoch": 0.939652486199233, + "grad_norm": 1.4522125013627374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193750 + }, + { + "epoch": 0.9397009843920691, + "grad_norm": 1.853929916251218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193760 + }, + { + "epoch": 0.9397494825849052, + "grad_norm": 1.7902004856296116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193770 + }, + { + "epoch": 0.9397979807777412, + "grad_norm": 2.0519028964827157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193780 + }, + { + "epoch": 0.9398464789705774, + "grad_norm": 1.4527466873914818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193790 + }, + { + "epoch": 0.9398949771634134, + "grad_norm": 1.4571922690720385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193800 + }, + { + "epoch": 0.9399434753562496, + "grad_norm": 1.7240741101431922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193810 + }, + { + "epoch": 0.9399919735490856, + "grad_norm": 1.7347329617223295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193820 + }, + { + "epoch": 0.9400404717419217, + "grad_norm": 1.6605665109636902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193830 + }, + { + "epoch": 0.9400889699347578, + "grad_norm": 1.4847709906007367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193840 + }, + { + "epoch": 0.9401374681275939, + "grad_norm": 2.303257105040757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193850 + }, + { + "epoch": 0.9401859663204299, + "grad_norm": 1.838715206758934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193860 + }, + { + "epoch": 0.9402344645132661, + "grad_norm": 1.6168834804375365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193870 + }, + { + "epoch": 0.9402829627061021, + "grad_norm": 1.6294259808091738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193880 + }, + { + "epoch": 0.9403314608989383, + "grad_norm": 1.395785602653632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193890 + }, + { + "epoch": 0.9403799590917743, + "grad_norm": 1.3880854510261997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193900 + }, + { + "epoch": 0.9404284572846104, + "grad_norm": 1.6776940015006403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193910 + }, + { + "epoch": 0.9404769554774465, + "grad_norm": 1.7155842613192362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193920 + }, + { + "epoch": 0.9405254536702826, + "grad_norm": 1.6346967868230422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193930 + }, + { + "epoch": 0.9405739518631187, + "grad_norm": 1.489944452259806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193940 + }, + { + "epoch": 0.9406224500559548, + "grad_norm": 1.3946412025234167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193950 + }, + { + "epoch": 0.9406709482487908, + "grad_norm": 1.5443822576344246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193960 + }, + { + "epoch": 0.940719446441627, + "grad_norm": 1.5461141344985663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193970 + }, + { + "epoch": 0.940767944634463, + "grad_norm": 1.731615952849097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193980 + }, + { + "epoch": 0.9408164428272991, + "grad_norm": 1.3883563099170715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 193990 + }, + { + "epoch": 0.9408649410201353, + "grad_norm": 1.50932862652553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194000 + }, + { + "epoch": 0.9409134392129713, + "grad_norm": 1.618322613694545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194010 + }, + { + "epoch": 0.9409619374058075, + "grad_norm": 1.575942576437228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194020 + }, + { + "epoch": 0.9410104355986435, + "grad_norm": 1.568009651009561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194030 + }, + { + "epoch": 0.9410589337914796, + "grad_norm": 1.4069281917272747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194040 + }, + { + "epoch": 0.9411074319843157, + "grad_norm": 1.4303576278962282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194050 + }, + { + "epoch": 0.9411559301771518, + "grad_norm": 1.5890994120582036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194060 + }, + { + "epoch": 0.9412044283699879, + "grad_norm": 1.5701789379818365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194070 + }, + { + "epoch": 0.941252926562824, + "grad_norm": 1.670404969900119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194080 + }, + { + "epoch": 0.94130142475566, + "grad_norm": 2.1808325811889517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194090 + }, + { + "epoch": 0.9413499229484962, + "grad_norm": 1.433803049621929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194100 + }, + { + "epoch": 0.9413984211413322, + "grad_norm": 1.6429305560450302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194110 + }, + { + "epoch": 0.9414469193341684, + "grad_norm": 1.4983490359554708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194120 + }, + { + "epoch": 0.9414954175270044, + "grad_norm": 1.555357300730975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194130 + }, + { + "epoch": 0.9415439157198405, + "grad_norm": 1.4959003635794943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194140 + }, + { + "epoch": 0.9415924139126766, + "grad_norm": 2.0047566806624673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194150 + }, + { + "epoch": 0.9416409121055127, + "grad_norm": 1.5602519454205321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194160 + }, + { + "epoch": 0.9416894102983487, + "grad_norm": 1.626140431199019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194170 + }, + { + "epoch": 0.9417379084911849, + "grad_norm": 1.4411585880225175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194180 + }, + { + "epoch": 0.9417864066840209, + "grad_norm": 1.36593371280469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194190 + }, + { + "epoch": 0.9418349048768571, + "grad_norm": 1.3333264803350175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194200 + }, + { + "epoch": 0.9418834030696931, + "grad_norm": 1.5043755752230936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194210 + }, + { + "epoch": 0.9419319012625292, + "grad_norm": 1.4593275921015447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194220 + }, + { + "epoch": 0.9419803994553653, + "grad_norm": 1.4738365905486717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194230 + }, + { + "epoch": 0.9420288976482014, + "grad_norm": 1.3053643499461032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194240 + }, + { + "epoch": 0.9420773958410374, + "grad_norm": 1.4340876930418744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194250 + }, + { + "epoch": 0.9421258940338736, + "grad_norm": 1.409506751315348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194260 + }, + { + "epoch": 0.9421743922267096, + "grad_norm": 1.473329263035339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194270 + }, + { + "epoch": 0.9422228904195458, + "grad_norm": 1.4108189816397498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194280 + }, + { + "epoch": 0.9422713886123818, + "grad_norm": 1.2887122124993766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194290 + }, + { + "epoch": 0.9423198868052179, + "grad_norm": 1.3645374963289214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194300 + }, + { + "epoch": 0.942368384998054, + "grad_norm": 1.4894764888140344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194310 + }, + { + "epoch": 0.9424168831908901, + "grad_norm": 1.4145348359306809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194320 + }, + { + "epoch": 0.9424653813837262, + "grad_norm": 1.402234772740485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194330 + }, + { + "epoch": 0.9425138795765623, + "grad_norm": 1.2855862507876736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194340 + }, + { + "epoch": 0.9425623777693983, + "grad_norm": 1.2906116353406105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194350 + }, + { + "epoch": 0.9426108759622345, + "grad_norm": 1.460997367530581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194360 + }, + { + "epoch": 0.9426593741550705, + "grad_norm": 1.435522136716827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194370 + }, + { + "epoch": 0.9427078723479067, + "grad_norm": 1.424917854819796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194380 + }, + { + "epoch": 0.9427563705407427, + "grad_norm": 1.217892133809073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194390 + }, + { + "epoch": 0.9428048687335788, + "grad_norm": 1.4282869642556761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194400 + }, + { + "epoch": 0.9428533669264149, + "grad_norm": 1.4584324503630341e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194410 + }, + { + "epoch": 0.942901865119251, + "grad_norm": 1.4027391159743274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194420 + }, + { + "epoch": 0.942950363312087, + "grad_norm": 1.3429715295387723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194430 + }, + { + "epoch": 0.9429988615049232, + "grad_norm": 1.2896843770704436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194440 + }, + { + "epoch": 0.9430473596977592, + "grad_norm": 1.331366661361244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194450 + }, + { + "epoch": 0.9430958578905954, + "grad_norm": 1.402247846726823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194460 + }, + { + "epoch": 0.9431443560834314, + "grad_norm": 1.4081929577969277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194470 + }, + { + "epoch": 0.9431928542762675, + "grad_norm": 1.3856045200100198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194480 + }, + { + "epoch": 0.9432413524691036, + "grad_norm": 1.2309412511513074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194490 + }, + { + "epoch": 0.9432898506619397, + "grad_norm": 1.1973735070114344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194500 + }, + { + "epoch": 0.9433383488547759, + "grad_norm": 1.3322231495749293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194510 + }, + { + "epoch": 0.9433868470476119, + "grad_norm": 1.4474021270416415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194520 + }, + { + "epoch": 0.943435345240448, + "grad_norm": 1.4143178361791797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194530 + }, + { + "epoch": 0.9434838434332841, + "grad_norm": 1.2248297309724876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194540 + }, + { + "epoch": 0.9435323416261202, + "grad_norm": 1.277554702028283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194550 + }, + { + "epoch": 0.9435808398189562, + "grad_norm": 1.3439736790132883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194560 + }, + { + "epoch": 0.9436293380117924, + "grad_norm": 1.3146444644007715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194570 + }, + { + "epoch": 0.9436778362046284, + "grad_norm": 1.3641793827900983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194580 + }, + { + "epoch": 0.9437263343974646, + "grad_norm": 1.1971658864240453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194590 + }, + { + "epoch": 0.9437748325903006, + "grad_norm": 1.2147542349794094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194600 + }, + { + "epoch": 0.9438233307831367, + "grad_norm": 1.2964665074832737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194610 + }, + { + "epoch": 0.9438718289759728, + "grad_norm": 1.333897046151833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194620 + }, + { + "epoch": 0.9439203271688089, + "grad_norm": 1.4940016512809962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194630 + }, + { + "epoch": 0.943968825361645, + "grad_norm": 1.1651264486545188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194640 + }, + { + "epoch": 0.9440173235544811, + "grad_norm": 1.1552295831052106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194650 + }, + { + "epoch": 0.9440658217473171, + "grad_norm": 1.355397643010292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194660 + }, + { + "epoch": 0.9441143199401533, + "grad_norm": 1.315822686365209e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194670 + }, + { + "epoch": 0.9441628181329893, + "grad_norm": 1.44890563547051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194680 + }, + { + "epoch": 0.9442113163258254, + "grad_norm": 1.264822770963292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194690 + }, + { + "epoch": 0.9442598145186615, + "grad_norm": 1.1727088633506355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194700 + }, + { + "epoch": 0.9443083127114976, + "grad_norm": 1.233790243304611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194710 + }, + { + "epoch": 0.9443568109043337, + "grad_norm": 1.3213148974955402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194720 + }, + { + "epoch": 0.9444053090971698, + "grad_norm": 1.2643940294765343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194730 + }, + { + "epoch": 0.9444538072900058, + "grad_norm": 1.1707476232913905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194740 + }, + { + "epoch": 0.944502305482842, + "grad_norm": 1.2237536850534525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194750 + }, + { + "epoch": 0.944550803675678, + "grad_norm": 1.358786221317132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194760 + }, + { + "epoch": 0.9445993018685142, + "grad_norm": 1.3309841051523108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194770 + }, + { + "epoch": 0.9446478000613502, + "grad_norm": 1.2851330666308058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194780 + }, + { + "epoch": 0.9446962982541863, + "grad_norm": 1.1490268292391193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194790 + }, + { + "epoch": 0.9447447964470224, + "grad_norm": 1.1351356477007357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194800 + }, + { + "epoch": 0.9447932946398585, + "grad_norm": 1.211302702586181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194810 + }, + { + "epoch": 0.9448417928326945, + "grad_norm": 1.2501106994022848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194820 + }, + { + "epoch": 0.9448902910255307, + "grad_norm": 1.2767064561103325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194830 + }, + { + "epoch": 0.9449387892183667, + "grad_norm": 1.1342513772660823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194840 + }, + { + "epoch": 0.9449872874112029, + "grad_norm": 1.116189807248702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194850 + }, + { + "epoch": 0.9450357856040389, + "grad_norm": 1.3473693627474859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194860 + }, + { + "epoch": 0.945084283796875, + "grad_norm": 1.2126623971653316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194870 + }, + { + "epoch": 0.9451327819897111, + "grad_norm": 1.257154593758969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194880 + }, + { + "epoch": 0.9451812801825472, + "grad_norm": 1.1715799530520599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194890 + }, + { + "epoch": 0.9452297783753832, + "grad_norm": 1.1129532140330411e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194900 + }, + { + "epoch": 0.9452782765682194, + "grad_norm": 1.4703860529152735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194910 + }, + { + "epoch": 0.9453267747610554, + "grad_norm": 1.3101333706799778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194920 + }, + { + "epoch": 0.9453752729538916, + "grad_norm": 1.2797138992937107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194930 + }, + { + "epoch": 0.9454237711467276, + "grad_norm": 1.1842713121268389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194940 + }, + { + "epoch": 0.9454722693395637, + "grad_norm": 1.1533099808502811e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194950 + }, + { + "epoch": 0.9455207675323998, + "grad_norm": 1.2405698157635925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194960 + }, + { + "epoch": 0.9455692657252359, + "grad_norm": 1.1716244330273184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194970 + }, + { + "epoch": 0.945617763918072, + "grad_norm": 1.2659120329772122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194980 + }, + { + "epoch": 0.9456662621109081, + "grad_norm": 1.2845201524669392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 194990 + }, + { + "epoch": 0.9457147603037441, + "grad_norm": 1.1040711456189456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195000 + }, + { + "epoch": 0.9457632584965803, + "grad_norm": 1.1628439722244366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195010 + }, + { + "epoch": 0.9458117566894163, + "grad_norm": 1.1956254297729174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195020 + }, + { + "epoch": 0.9458602548822524, + "grad_norm": 1.216686200677941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195030 + }, + { + "epoch": 0.9459087530750886, + "grad_norm": 1.0849294085346628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195040 + }, + { + "epoch": 0.9459572512679246, + "grad_norm": 1.1257203169634522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195050 + }, + { + "epoch": 0.9460057494607608, + "grad_norm": 1.266113827114168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195060 + }, + { + "epoch": 0.9460542476535968, + "grad_norm": 1.1868522875602139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195070 + }, + { + "epoch": 0.946102745846433, + "grad_norm": 1.1913802921981187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195080 + }, + { + "epoch": 0.946151244039269, + "grad_norm": 1.1087877282989211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195090 + }, + { + "epoch": 0.9461997422321051, + "grad_norm": 1.0861189281285988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195100 + }, + { + "epoch": 0.9462482404249412, + "grad_norm": 1.1979167879871966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195110 + }, + { + "epoch": 0.9462967386177773, + "grad_norm": 1.1797581578321115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195120 + }, + { + "epoch": 0.9463452368106133, + "grad_norm": 1.208233584293339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195130 + }, + { + "epoch": 0.9463937350034495, + "grad_norm": 1.0893057833527564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195140 + }, + { + "epoch": 0.9464422331962855, + "grad_norm": 1.0951305284834234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195150 + }, + { + "epoch": 0.9464907313891217, + "grad_norm": 1.1468809191228502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195160 + }, + { + "epoch": 0.9465392295819577, + "grad_norm": 1.1923901865884545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195170 + }, + { + "epoch": 0.9465877277747938, + "grad_norm": 1.1703949809316327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195180 + }, + { + "epoch": 0.9466362259676299, + "grad_norm": 1.0821656815096503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195190 + }, + { + "epoch": 0.946684724160466, + "grad_norm": 1.0950533635423199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195200 + }, + { + "epoch": 0.946733222353302, + "grad_norm": 1.1948671385653142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195210 + }, + { + "epoch": 0.9467817205461382, + "grad_norm": 1.1897935081606192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195220 + }, + { + "epoch": 0.9468302187389742, + "grad_norm": 1.1748859662930045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195230 + }, + { + "epoch": 0.9468787169318104, + "grad_norm": 1.0716553333622869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195240 + }, + { + "epoch": 0.9469272151246464, + "grad_norm": 1.0375354975167284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195250 + }, + { + "epoch": 0.9469757133174825, + "grad_norm": 1.1307198377608074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195260 + }, + { + "epoch": 0.9470242115103186, + "grad_norm": 1.096558577273754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195270 + }, + { + "epoch": 0.9470727097031547, + "grad_norm": 1.4334428044548986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195280 + }, + { + "epoch": 0.9471212078959907, + "grad_norm": 1.0457635823968303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195290 + }, + { + "epoch": 0.9471697060888269, + "grad_norm": 1.0393974037015141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195300 + }, + { + "epoch": 0.9472182042816629, + "grad_norm": 1.0816241768907275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195310 + }, + { + "epoch": 0.9472667024744991, + "grad_norm": 1.0955916707189317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195320 + }, + { + "epoch": 0.9473152006673351, + "grad_norm": 1.0661322846772237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195330 + }, + { + "epoch": 0.9473636988601712, + "grad_norm": 1.0935877270412675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195340 + }, + { + "epoch": 0.9474121970530073, + "grad_norm": 9.879968843051756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195350 + }, + { + "epoch": 0.9474606952458434, + "grad_norm": 1.2018936956792459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195360 + }, + { + "epoch": 0.9475091934386795, + "grad_norm": 1.0912163617149417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195370 + }, + { + "epoch": 0.9475576916315156, + "grad_norm": 1.0770159519779554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195380 + }, + { + "epoch": 0.9476061898243516, + "grad_norm": 1.0010266038307236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195390 + }, + { + "epoch": 0.9476546880171878, + "grad_norm": 1.0063860855780149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195400 + }, + { + "epoch": 0.9477031862100238, + "grad_norm": 1.0269557293440812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195410 + }, + { + "epoch": 0.94775168440286, + "grad_norm": 1.0233778624524348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195420 + }, + { + "epoch": 0.947800182595696, + "grad_norm": 1.0708009057225354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195430 + }, + { + "epoch": 0.9478486807885321, + "grad_norm": 1.1054969917267954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195440 + }, + { + "epoch": 0.9478971789813682, + "grad_norm": 1.1523994203344046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195450 + }, + { + "epoch": 0.9479456771742043, + "grad_norm": 1.0315667964277964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195460 + }, + { + "epoch": 0.9479941753670403, + "grad_norm": 1.0617945633839554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195470 + }, + { + "epoch": 0.9480426735598765, + "grad_norm": 9.700865888362387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195480 + }, + { + "epoch": 0.9480911717527125, + "grad_norm": 1.000431453235251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195490 + }, + { + "epoch": 0.9481396699455487, + "grad_norm": 9.798934996751996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195500 + }, + { + "epoch": 0.9481881681383847, + "grad_norm": 1.0078802858970448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195510 + }, + { + "epoch": 0.9482366663312208, + "grad_norm": 1.0400074756944377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195520 + }, + { + "epoch": 0.9482851645240569, + "grad_norm": 1.0220443869002338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195530 + }, + { + "epoch": 0.948333662716893, + "grad_norm": 9.833752301346976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195540 + }, + { + "epoch": 0.9483821609097292, + "grad_norm": 9.963334690610282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195550 + }, + { + "epoch": 0.9484306591025652, + "grad_norm": 9.897419062099289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195560 + }, + { + "epoch": 0.9484791572954013, + "grad_norm": 9.913183873777598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195570 + }, + { + "epoch": 0.9485276554882374, + "grad_norm": 1.0043323328545739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195580 + }, + { + "epoch": 0.9485761536810735, + "grad_norm": 9.843166282053062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195590 + }, + { + "epoch": 0.9486246518739095, + "grad_norm": 1.0110829151699363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195600 + }, + { + "epoch": 0.9486731500667457, + "grad_norm": 9.719590821077873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195610 + }, + { + "epoch": 0.9487216482595817, + "grad_norm": 1.0355522306326748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195620 + }, + { + "epoch": 0.9487701464524179, + "grad_norm": 9.310259940775722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195630 + }, + { + "epoch": 0.9488186446452539, + "grad_norm": 9.194240391252606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195640 + }, + { + "epoch": 0.94886714283809, + "grad_norm": 9.085331242886241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195650 + }, + { + "epoch": 0.9489156410309261, + "grad_norm": 9.505125575515194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195660 + }, + { + "epoch": 0.9489641392237622, + "grad_norm": 1.0401634398249371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195670 + }, + { + "epoch": 0.9490126374165982, + "grad_norm": 9.92778694808294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195680 + }, + { + "epoch": 0.9490611356094344, + "grad_norm": 1.008066448093814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195690 + }, + { + "epoch": 0.9491096338022704, + "grad_norm": 8.720554944829928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195700 + }, + { + "epoch": 0.9491581319951066, + "grad_norm": 9.681949819650981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195710 + }, + { + "epoch": 0.9492066301879426, + "grad_norm": 1.271573921712843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195720 + }, + { + "epoch": 0.9492551283807787, + "grad_norm": 1.0472865596966585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195730 + }, + { + "epoch": 0.9493036265736148, + "grad_norm": 8.889573166470655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195740 + }, + { + "epoch": 0.9493521247664509, + "grad_norm": 8.7306382567931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195750 + }, + { + "epoch": 0.949400622959287, + "grad_norm": 1.0772486547239168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195760 + }, + { + "epoch": 0.9494491211521231, + "grad_norm": 1.0029062735839034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195770 + }, + { + "epoch": 0.9494976193449591, + "grad_norm": 9.751930463153258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195780 + }, + { + "epoch": 0.9495461175377953, + "grad_norm": 8.247535276950657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195790 + }, + { + "epoch": 0.9495946157306313, + "grad_norm": 8.689752917234728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195800 + }, + { + "epoch": 0.9496431139234675, + "grad_norm": 1.0364989577738015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195810 + }, + { + "epoch": 0.9496916121163035, + "grad_norm": 1.3843940394053789e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195820 + }, + { + "epoch": 0.9497401103091396, + "grad_norm": 1.0679492135068358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195830 + }, + { + "epoch": 0.9497886085019757, + "grad_norm": 1.1280891243359292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195840 + }, + { + "epoch": 0.9498371066948118, + "grad_norm": 8.542977525394235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195850 + }, + { + "epoch": 0.9498856048876478, + "grad_norm": 9.707422776727981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195860 + }, + { + "epoch": 0.949934103080484, + "grad_norm": 2.975970971874631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195870 + }, + { + "epoch": 0.94998260127332, + "grad_norm": 1.0637909753086205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195880 + }, + { + "epoch": 0.9500310994661562, + "grad_norm": 8.594945910544993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195890 + }, + { + "epoch": 0.9500795976589922, + "grad_norm": 8.360309777799557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195900 + }, + { + "epoch": 0.9501280958518283, + "grad_norm": 9.043571935762884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195910 + }, + { + "epoch": 0.9501765940446644, + "grad_norm": 9.319942506635925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195920 + }, + { + "epoch": 0.9502250922375005, + "grad_norm": 9.767250475078981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195930 + }, + { + "epoch": 0.9502735904303365, + "grad_norm": 8.323532085796614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195940 + }, + { + "epoch": 0.9503220886231727, + "grad_norm": 8.481237045998569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195950 + }, + { + "epoch": 0.9503705868160087, + "grad_norm": 1.1819042811112013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195960 + }, + { + "epoch": 0.9504190850088449, + "grad_norm": 1.0603671540820869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195970 + }, + { + "epoch": 0.9504675832016809, + "grad_norm": 9.064724793006462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195980 + }, + { + "epoch": 0.950516081394517, + "grad_norm": 8.310711052672559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 195990 + }, + { + "epoch": 0.9505645795873531, + "grad_norm": 7.883195962676837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196000 + }, + { + "epoch": 0.9506130777801892, + "grad_norm": 9.106077669684964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196010 + }, + { + "epoch": 0.9506615759730253, + "grad_norm": 9.03966963505809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196020 + }, + { + "epoch": 0.9507100741658614, + "grad_norm": 8.901915293790807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196030 + }, + { + "epoch": 0.9507585723586974, + "grad_norm": 8.182090738273473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196040 + }, + { + "epoch": 0.9508070705515336, + "grad_norm": 8.198041712148552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196050 + }, + { + "epoch": 0.9508555687443697, + "grad_norm": 8.802180673228577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196060 + }, + { + "epoch": 0.9509040669372057, + "grad_norm": 8.995055367222449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196070 + }, + { + "epoch": 0.9509525651300419, + "grad_norm": 8.795675654482693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196080 + }, + { + "epoch": 0.9510010633228779, + "grad_norm": 7.933039114504936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196090 + }, + { + "epoch": 0.9510495615157141, + "grad_norm": 7.913580901686146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196100 + }, + { + "epoch": 0.9510980597085501, + "grad_norm": 8.588377653495627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196110 + }, + { + "epoch": 0.9511465579013862, + "grad_norm": 1.2207073041281546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196120 + }, + { + "epoch": 0.9511950560942223, + "grad_norm": 8.834346942876437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196130 + }, + { + "epoch": 0.9512435542870584, + "grad_norm": 8.037035570396256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196140 + }, + { + "epoch": 0.9512920524798945, + "grad_norm": 7.763398457427684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196150 + }, + { + "epoch": 0.9513405506727306, + "grad_norm": 9.080868323962932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196160 + }, + { + "epoch": 0.9513890488655666, + "grad_norm": 8.908708082344674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196170 + }, + { + "epoch": 0.9514375470584028, + "grad_norm": 1.1390491039264816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196180 + }, + { + "epoch": 0.9514860452512388, + "grad_norm": 7.640670673936256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196190 + }, + { + "epoch": 0.951534543444075, + "grad_norm": 7.987544847765093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196200 + }, + { + "epoch": 0.951583041636911, + "grad_norm": 8.595925748977606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196210 + }, + { + "epoch": 0.9516315398297471, + "grad_norm": 8.714267352161187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196220 + }, + { + "epoch": 0.9516800380225832, + "grad_norm": 8.658540195938258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196230 + }, + { + "epoch": 0.9517285362154193, + "grad_norm": 7.988370498424047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196240 + }, + { + "epoch": 0.9517770344082553, + "grad_norm": 8.088247938076165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196250 + }, + { + "epoch": 0.9518255326010915, + "grad_norm": 8.688630259712227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196260 + }, + { + "epoch": 0.9518740307939275, + "grad_norm": 1.0229230440472747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196270 + }, + { + "epoch": 0.9519225289867637, + "grad_norm": 8.601509193795209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196280 + }, + { + "epoch": 0.9519710271795997, + "grad_norm": 7.568696247517437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196290 + }, + { + "epoch": 0.9520195253724358, + "grad_norm": 8.1271217311496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196300 + }, + { + "epoch": 0.9520680235652719, + "grad_norm": 8.715256427649365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196310 + }, + { + "epoch": 0.952116521758108, + "grad_norm": 8.416922270271243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196320 + }, + { + "epoch": 0.952165019950944, + "grad_norm": 8.080046143277286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196330 + }, + { + "epoch": 0.9522135181437802, + "grad_norm": 7.683696878757473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196340 + }, + { + "epoch": 0.9522620163366162, + "grad_norm": 7.642639587857047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196350 + }, + { + "epoch": 0.9523105145294524, + "grad_norm": 8.879840862618948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196360 + }, + { + "epoch": 0.9523590127222884, + "grad_norm": 8.492630598766482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196370 + }, + { + "epoch": 0.9524075109151245, + "grad_norm": 8.29308604011203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196380 + }, + { + "epoch": 0.9524560091079606, + "grad_norm": 7.470475082982375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196390 + }, + { + "epoch": 0.9525045073007967, + "grad_norm": 7.985281769151698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196400 + }, + { + "epoch": 0.9525530054936328, + "grad_norm": 8.794113171006757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196410 + }, + { + "epoch": 0.9526015036864689, + "grad_norm": 7.887081920898709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196420 + }, + { + "epoch": 0.9526500018793049, + "grad_norm": 8.356527558817106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196430 + }, + { + "epoch": 0.9526985000721411, + "grad_norm": 7.624143449902476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196440 + }, + { + "epoch": 0.9527469982649771, + "grad_norm": 7.928277767632608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196450 + }, + { + "epoch": 0.9527954964578133, + "grad_norm": 1.147494970155094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196460 + }, + { + "epoch": 0.9528439946506493, + "grad_norm": 9.156855895753324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196470 + }, + { + "epoch": 0.9528924928434854, + "grad_norm": 1.0066901978689202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196480 + }, + { + "epoch": 0.9529409910363215, + "grad_norm": 7.795046741421174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196490 + }, + { + "epoch": 0.9529894892291576, + "grad_norm": 8.011325292045512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196500 + }, + { + "epoch": 0.9530379874219936, + "grad_norm": 1.052117610811365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196510 + }, + { + "epoch": 0.9530864856148298, + "grad_norm": 1.012532067079519e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196520 + }, + { + "epoch": 0.9531349838076658, + "grad_norm": 9.577128423643444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196530 + }, + { + "epoch": 0.953183482000502, + "grad_norm": 8.364396109072914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196540 + }, + { + "epoch": 0.953231980193338, + "grad_norm": 7.698403692302236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196550 + }, + { + "epoch": 0.9532804783861741, + "grad_norm": 1.0019079610401604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196560 + }, + { + "epoch": 0.9533289765790103, + "grad_norm": 1.047677784526968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196570 + }, + { + "epoch": 0.9533774747718463, + "grad_norm": 1.000242235704718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196580 + }, + { + "epoch": 0.9534259729646825, + "grad_norm": 7.515131272839426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196590 + }, + { + "epoch": 0.9534744711575185, + "grad_norm": 7.510762145557237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196600 + }, + { + "epoch": 0.9535229693503546, + "grad_norm": 8.806040341369226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196610 + }, + { + "epoch": 0.9535714675431907, + "grad_norm": 9.524857347287252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196620 + }, + { + "epoch": 0.9536199657360268, + "grad_norm": 9.574733184081197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196630 + }, + { + "epoch": 0.9536684639288628, + "grad_norm": 7.5244834363275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196640 + }, + { + "epoch": 0.953716962121699, + "grad_norm": 7.851124905755569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196650 + }, + { + "epoch": 0.953765460314535, + "grad_norm": 9.583398963286527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196660 + }, + { + "epoch": 0.9538139585073712, + "grad_norm": 9.250992150100501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196670 + }, + { + "epoch": 0.9538624567002072, + "grad_norm": 9.57431041115342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196680 + }, + { + "epoch": 0.9539109548930433, + "grad_norm": 7.349019170987958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196690 + }, + { + "epoch": 0.9539594530858794, + "grad_norm": 7.245191113725014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196700 + }, + { + "epoch": 0.9540079512787155, + "grad_norm": 9.182432592069745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196710 + }, + { + "epoch": 0.9540564494715515, + "grad_norm": 9.565392389276894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196720 + }, + { + "epoch": 0.9541049476643877, + "grad_norm": 9.276137546976315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196730 + }, + { + "epoch": 0.9541534458572237, + "grad_norm": 7.124345557940615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196740 + }, + { + "epoch": 0.9542019440500599, + "grad_norm": 7.5348872030645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196750 + }, + { + "epoch": 0.9542504422428959, + "grad_norm": 8.96902747626882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196760 + }, + { + "epoch": 0.954298940435732, + "grad_norm": 9.082964425033424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196770 + }, + { + "epoch": 0.9543474386285681, + "grad_norm": 8.378205507142411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196780 + }, + { + "epoch": 0.9543959368214042, + "grad_norm": 7.448136329912813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196790 + }, + { + "epoch": 0.9544444350142403, + "grad_norm": 7.31292288946861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196800 + }, + { + "epoch": 0.9544929332070764, + "grad_norm": 8.591661071477574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196810 + }, + { + "epoch": 0.9545414313999124, + "grad_norm": 8.179988242318359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196820 + }, + { + "epoch": 0.9545899295927486, + "grad_norm": 8.763262115962789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196830 + }, + { + "epoch": 0.9546384277855846, + "grad_norm": 7.278654834408371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196840 + }, + { + "epoch": 0.9546869259784208, + "grad_norm": 7.321725803421941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196850 + }, + { + "epoch": 0.9547354241712568, + "grad_norm": 8.03036002139379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196860 + }, + { + "epoch": 0.9547839223640929, + "grad_norm": 8.668108364418003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196870 + }, + { + "epoch": 0.954832420556929, + "grad_norm": 8.29023534265616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196880 + }, + { + "epoch": 0.9548809187497651, + "grad_norm": 6.874516600419156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196890 + }, + { + "epoch": 0.9549294169426011, + "grad_norm": 7.271912494388744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196900 + }, + { + "epoch": 0.9549779151354373, + "grad_norm": 8.577632115702727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196910 + }, + { + "epoch": 0.9550264133282733, + "grad_norm": 7.928765910492075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196920 + }, + { + "epoch": 0.9550749115211095, + "grad_norm": 8.064157697162955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196930 + }, + { + "epoch": 0.9551234097139455, + "grad_norm": 6.902212845716349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196940 + }, + { + "epoch": 0.9551719079067816, + "grad_norm": 7.354516640134534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196950 + }, + { + "epoch": 0.9552204060996177, + "grad_norm": 7.869903839718972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196960 + }, + { + "epoch": 0.9552689042924538, + "grad_norm": 7.682649538764963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196970 + }, + { + "epoch": 0.9553174024852898, + "grad_norm": 7.642369581617459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196980 + }, + { + "epoch": 0.955365900678126, + "grad_norm": 6.895224657910148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 196990 + }, + { + "epoch": 0.955414398870962, + "grad_norm": 7.537560264836429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197000 + }, + { + "epoch": 0.9554628970637982, + "grad_norm": 7.967440041056761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197010 + }, + { + "epoch": 0.9555113952566342, + "grad_norm": 7.771835441872099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197020 + }, + { + "epoch": 0.9555598934494703, + "grad_norm": 8.015400965177832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197030 + }, + { + "epoch": 0.9556083916423064, + "grad_norm": 6.902720883772417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197040 + }, + { + "epoch": 0.9556568898351425, + "grad_norm": 6.829540666331013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197050 + }, + { + "epoch": 0.9557053880279786, + "grad_norm": 8.141638119241179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197060 + }, + { + "epoch": 0.9557538862208147, + "grad_norm": 7.635017595930549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197070 + }, + { + "epoch": 0.9558023844136508, + "grad_norm": 7.132204871140857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197080 + }, + { + "epoch": 0.9558508826064869, + "grad_norm": 6.984702594081682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197090 + }, + { + "epoch": 0.955899380799323, + "grad_norm": 1.1805003197196129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197100 + }, + { + "epoch": 0.955947878992159, + "grad_norm": 7.66689041142854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197110 + }, + { + "epoch": 0.9559963771849952, + "grad_norm": 7.20027557576941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197120 + }, + { + "epoch": 0.9560448753778312, + "grad_norm": 7.804294455127092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197130 + }, + { + "epoch": 0.9560933735706674, + "grad_norm": 6.649269579384054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197140 + }, + { + "epoch": 0.9561418717635034, + "grad_norm": 7.034425664187438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197150 + }, + { + "epoch": 0.9561903699563395, + "grad_norm": 8.20006818003094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197160 + }, + { + "epoch": 0.9562388681491756, + "grad_norm": 7.15888717195412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197170 + }, + { + "epoch": 0.9562873663420117, + "grad_norm": 7.097389698174084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197180 + }, + { + "epoch": 0.9563358645348478, + "grad_norm": 6.660949480874478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197190 + }, + { + "epoch": 0.9563843627276839, + "grad_norm": 6.62568595544144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197200 + }, + { + "epoch": 0.9564328609205199, + "grad_norm": 7.304609539460216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197210 + }, + { + "epoch": 0.9564813591133561, + "grad_norm": 6.798045859568447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197220 + }, + { + "epoch": 0.9565298573061921, + "grad_norm": 7.322689299371632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197230 + }, + { + "epoch": 0.9565783554990283, + "grad_norm": 6.6375712037825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197240 + }, + { + "epoch": 0.9566268536918643, + "grad_norm": 6.565473853470394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197250 + }, + { + "epoch": 0.9566753518847004, + "grad_norm": 6.999225377057883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197260 + }, + { + "epoch": 0.9567238500775365, + "grad_norm": 7.288555536888452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197270 + }, + { + "epoch": 0.9567723482703726, + "grad_norm": 7.143547975374531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197280 + }, + { + "epoch": 0.9568208464632086, + "grad_norm": 6.844233979563796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197290 + }, + { + "epoch": 0.9568693446560448, + "grad_norm": 6.61923067468706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197300 + }, + { + "epoch": 0.9569178428488808, + "grad_norm": 6.89862460490076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197310 + }, + { + "epoch": 0.956966341041717, + "grad_norm": 7.504387156131997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197320 + }, + { + "epoch": 0.957014839234553, + "grad_norm": 6.706476085582835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197330 + }, + { + "epoch": 0.9570633374273891, + "grad_norm": 6.911321293046058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197340 + }, + { + "epoch": 0.9571118356202252, + "grad_norm": 6.55252350156843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197350 + }, + { + "epoch": 0.9571603338130613, + "grad_norm": 6.737243296583983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197360 + }, + { + "epoch": 0.9572088320058973, + "grad_norm": 6.632031812614514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197370 + }, + { + "epoch": 0.9572573301987335, + "grad_norm": 6.69954900445191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197380 + }, + { + "epoch": 0.9573058283915695, + "grad_norm": 6.636162197537487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197390 + }, + { + "epoch": 0.9573543265844057, + "grad_norm": 6.388131623680238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197400 + }, + { + "epoch": 0.9574028247772417, + "grad_norm": 6.811454511534976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197410 + }, + { + "epoch": 0.9574513229700778, + "grad_norm": 7.378407218538996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197420 + }, + { + "epoch": 0.9574998211629139, + "grad_norm": 6.852696543546699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197430 + }, + { + "epoch": 0.95754831935575, + "grad_norm": 6.331210045118496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197440 + }, + { + "epoch": 0.957596817548586, + "grad_norm": 6.373068117682124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197450 + }, + { + "epoch": 0.9576453157414222, + "grad_norm": 6.880271996578813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197460 + }, + { + "epoch": 0.9576938139342582, + "grad_norm": 6.958299536563572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197470 + }, + { + "epoch": 0.9577423121270944, + "grad_norm": 6.791342599399286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197480 + }, + { + "epoch": 0.9577908103199304, + "grad_norm": 6.258120066604533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197490 + }, + { + "epoch": 0.9578393085127666, + "grad_norm": 6.36049648505832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197500 + }, + { + "epoch": 0.9578878067056026, + "grad_norm": 6.462573054477616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197510 + }, + { + "epoch": 0.9579363048984387, + "grad_norm": 7.162410042838019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197520 + }, + { + "epoch": 0.9579848030912748, + "grad_norm": 6.967272270230751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197530 + }, + { + "epoch": 0.9580333012841109, + "grad_norm": 6.211053005245049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197540 + }, + { + "epoch": 0.9580817994769469, + "grad_norm": 6.387497109017204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197550 + }, + { + "epoch": 0.9581302976697831, + "grad_norm": 7.122640965917526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197560 + }, + { + "epoch": 0.9581787958626191, + "grad_norm": 6.670890684290498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197570 + }, + { + "epoch": 0.9582272940554553, + "grad_norm": 6.512129147040469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197580 + }, + { + "epoch": 0.9582757922482914, + "grad_norm": 6.30269809676065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197590 + }, + { + "epoch": 0.9583242904411274, + "grad_norm": 6.351615411404055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197600 + }, + { + "epoch": 0.9583727886339636, + "grad_norm": 6.497574389641159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197610 + }, + { + "epoch": 0.9584212868267996, + "grad_norm": 6.458080292759405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197620 + }, + { + "epoch": 0.9584697850196358, + "grad_norm": 6.75698714758255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197630 + }, + { + "epoch": 0.9585182832124718, + "grad_norm": 6.510319394692488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197640 + }, + { + "epoch": 0.9585667814053079, + "grad_norm": 6.573412036914306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197650 + }, + { + "epoch": 0.958615279598144, + "grad_norm": 6.244748362860264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197660 + }, + { + "epoch": 0.9586637777909801, + "grad_norm": 6.988204148683508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197670 + }, + { + "epoch": 0.9587122759838161, + "grad_norm": 6.398263963092177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197680 + }, + { + "epoch": 0.9587607741766523, + "grad_norm": 6.304737354412282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197690 + }, + { + "epoch": 0.9588092723694883, + "grad_norm": 5.971092065237826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197700 + }, + { + "epoch": 0.9588577705623245, + "grad_norm": 6.481033665295399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197710 + }, + { + "epoch": 0.9589062687551605, + "grad_norm": 6.128280460870883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197720 + }, + { + "epoch": 0.9589547669479966, + "grad_norm": 6.378516559379932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197730 + }, + { + "epoch": 0.9590032651408327, + "grad_norm": 5.951521941938154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197740 + }, + { + "epoch": 0.9590517633336688, + "grad_norm": 6.104721705924021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197750 + }, + { + "epoch": 0.9591002615265048, + "grad_norm": 6.420797404871337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197760 + }, + { + "epoch": 0.959148759719341, + "grad_norm": 6.419336529006614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197770 + }, + { + "epoch": 0.959197257912177, + "grad_norm": 5.883683584784194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197780 + }, + { + "epoch": 0.9592457561050132, + "grad_norm": 6.439251620804498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197790 + }, + { + "epoch": 0.9592942542978492, + "grad_norm": 6.137116770332796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197800 + }, + { + "epoch": 0.9593427524906853, + "grad_norm": 5.988763973618916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197810 + }, + { + "epoch": 0.9593912506835214, + "grad_norm": 6.181698353202592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197820 + }, + { + "epoch": 0.9594397488763575, + "grad_norm": 6.216242098844305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197830 + }, + { + "epoch": 0.9594882470691936, + "grad_norm": 5.863416419060741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197840 + }, + { + "epoch": 0.9595367452620297, + "grad_norm": 6.202565572266394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197850 + }, + { + "epoch": 0.9595852434548657, + "grad_norm": 6.117215889389627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197860 + }, + { + "epoch": 0.9596337416477019, + "grad_norm": 6.263699248165722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197870 + }, + { + "epoch": 0.9596822398405379, + "grad_norm": 5.979929085242475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197880 + }, + { + "epoch": 0.959730738033374, + "grad_norm": 6.091763538051964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197890 + }, + { + "epoch": 0.9597792362262101, + "grad_norm": 5.969309313513804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197900 + }, + { + "epoch": 0.9598277344190462, + "grad_norm": 6.281878484060144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197910 + }, + { + "epoch": 0.9598762326118823, + "grad_norm": 6.466481750067032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197920 + }, + { + "epoch": 0.9599247308047184, + "grad_norm": 5.96180385059597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197930 + }, + { + "epoch": 0.9599732289975544, + "grad_norm": 6.178801470468898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197940 + }, + { + "epoch": 0.9600217271903906, + "grad_norm": 5.841366856884633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197950 + }, + { + "epoch": 0.9600702253832266, + "grad_norm": 6.1060625000664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197960 + }, + { + "epoch": 0.9601187235760628, + "grad_norm": 6.050991174788578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197970 + }, + { + "epoch": 0.9601672217688988, + "grad_norm": 5.7467030245561546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197980 + }, + { + "epoch": 0.9602157199617349, + "grad_norm": 5.984450268670116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 197990 + }, + { + "epoch": 0.960264218154571, + "grad_norm": 6.194035506723594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198000 + }, + { + "epoch": 0.9603127163474071, + "grad_norm": 6.058806434339203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198010 + }, + { + "epoch": 0.9603612145402431, + "grad_norm": 6.00068688072497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198020 + }, + { + "epoch": 0.9604097127330793, + "grad_norm": 5.866626651140905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198030 + }, + { + "epoch": 0.9604582109259153, + "grad_norm": 6.081020842430007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198040 + }, + { + "epoch": 0.9605067091187515, + "grad_norm": 5.5903626616782276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198050 + }, + { + "epoch": 0.9605552073115875, + "grad_norm": 5.671962810538389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198060 + }, + { + "epoch": 0.9606037055044236, + "grad_norm": 5.90518034471188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198070 + }, + { + "epoch": 0.9606522036972597, + "grad_norm": 5.968193050875925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198080 + }, + { + "epoch": 0.9607007018900958, + "grad_norm": 5.6542109660995266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198090 + }, + { + "epoch": 0.960749200082932, + "grad_norm": 5.5118359654215965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198100 + }, + { + "epoch": 0.960797698275768, + "grad_norm": 5.741290465266502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198110 + }, + { + "epoch": 0.9608461964686041, + "grad_norm": 5.7887699966840955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198120 + }, + { + "epoch": 0.9608946946614402, + "grad_norm": 5.7239827100374896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198130 + }, + { + "epoch": 0.9609431928542763, + "grad_norm": 6.050839829185861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198140 + }, + { + "epoch": 0.9609916910471124, + "grad_norm": 5.8822809734238035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198150 + }, + { + "epoch": 0.9610401892399485, + "grad_norm": 5.822381865527859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198160 + }, + { + "epoch": 0.9610886874327845, + "grad_norm": 5.9149940057068306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198170 + }, + { + "epoch": 0.9611371856256207, + "grad_norm": 5.556113080729119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198180 + }, + { + "epoch": 0.9611856838184567, + "grad_norm": 5.5636473206277515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198190 + }, + { + "epoch": 0.9612341820112928, + "grad_norm": 5.47420420105027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198200 + }, + { + "epoch": 0.9612826802041289, + "grad_norm": 5.699469340925134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198210 + }, + { + "epoch": 0.961331178396965, + "grad_norm": 5.513766154763289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198220 + }, + { + "epoch": 0.9613796765898011, + "grad_norm": 5.6014879845633914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198230 + }, + { + "epoch": 0.9614281747826372, + "grad_norm": 5.4665083126792524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198240 + }, + { + "epoch": 0.9614766729754732, + "grad_norm": 5.626341703646176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198250 + }, + { + "epoch": 0.9615251711683094, + "grad_norm": 4.007015093066002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198260 + }, + { + "epoch": 0.9615736693611454, + "grad_norm": 5.6456663344306435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198270 + }, + { + "epoch": 0.9616221675539816, + "grad_norm": 5.799261870720329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198280 + }, + { + "epoch": 0.9616706657468176, + "grad_norm": 5.5588611047596714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198290 + }, + { + "epoch": 0.9617191639396537, + "grad_norm": 5.555041937554961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198300 + }, + { + "epoch": 0.9617676621324898, + "grad_norm": 6.08020584991209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198310 + }, + { + "epoch": 0.9618161603253259, + "grad_norm": 5.68435893910646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198320 + }, + { + "epoch": 0.9618646585181619, + "grad_norm": 5.348618969946983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198330 + }, + { + "epoch": 0.9619131567109981, + "grad_norm": 5.513492240538653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198340 + }, + { + "epoch": 0.9619616549038341, + "grad_norm": 5.128538305143593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198350 + }, + { + "epoch": 0.9620101530966703, + "grad_norm": 5.360525889841483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198360 + }, + { + "epoch": 0.9620586512895063, + "grad_norm": 5.341793496427272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198370 + }, + { + "epoch": 0.9621071494823424, + "grad_norm": 5.187591511912615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198380 + }, + { + "epoch": 0.9621556476751785, + "grad_norm": 5.29695149964482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198390 + }, + { + "epoch": 0.9622041458680146, + "grad_norm": 5.4720171505096005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198400 + }, + { + "epoch": 0.9622526440608506, + "grad_norm": 5.383590107044256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198410 + }, + { + "epoch": 0.9623011422536868, + "grad_norm": 5.205311381928368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198420 + }, + { + "epoch": 0.9623496404465228, + "grad_norm": 5.1446463089632744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198430 + }, + { + "epoch": 0.962398138639359, + "grad_norm": 5.3793232979160166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198440 + }, + { + "epoch": 0.962446636832195, + "grad_norm": 5.558385041126712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198450 + }, + { + "epoch": 0.9624951350250311, + "grad_norm": 6.579684708185596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198460 + }, + { + "epoch": 0.9625436332178672, + "grad_norm": 5.3268308874976356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198470 + }, + { + "epoch": 0.9625921314107033, + "grad_norm": 5.2020968865917894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198480 + }, + { + "epoch": 0.9626406296035394, + "grad_norm": 5.65004256714019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198490 + }, + { + "epoch": 0.9626891277963755, + "grad_norm": 5.057225749283134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198500 + }, + { + "epoch": 0.9627376259892115, + "grad_norm": 5.491837029580893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198510 + }, + { + "epoch": 0.9627861241820477, + "grad_norm": 5.287842697043743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198520 + }, + { + "epoch": 0.9628346223748837, + "grad_norm": 5.2671801142878394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198530 + }, + { + "epoch": 0.9628831205677199, + "grad_norm": 5.433757976902598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198540 + }, + { + "epoch": 0.9629316187605559, + "grad_norm": 5.328298868789716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198550 + }, + { + "epoch": 0.962980116953392, + "grad_norm": 5.102265987488863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198560 + }, + { + "epoch": 0.9630286151462281, + "grad_norm": 4.932918074018744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198570 + }, + { + "epoch": 0.9630771133390642, + "grad_norm": 5.233510691482479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198580 + }, + { + "epoch": 0.9631256115319002, + "grad_norm": 5.112135781359939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198590 + }, + { + "epoch": 0.9631741097247364, + "grad_norm": 5.454090867829109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198600 + }, + { + "epoch": 0.9632226079175725, + "grad_norm": 5.098913646861547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198610 + }, + { + "epoch": 0.9632711061104086, + "grad_norm": 5.1156877844960036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198620 + }, + { + "epoch": 0.9633196043032447, + "grad_norm": 4.9304190952170757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198630 + }, + { + "epoch": 0.9633681024960807, + "grad_norm": 5.339004971460781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198640 + }, + { + "epoch": 0.9634166006889169, + "grad_norm": 9.394658917472043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198650 + }, + { + "epoch": 0.9634650988817529, + "grad_norm": 5.357336974043392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198660 + }, + { + "epoch": 0.9635135970745891, + "grad_norm": 5.124290680669219e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198670 + }, + { + "epoch": 0.9635620952674251, + "grad_norm": 5.008753234392316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198680 + }, + { + "epoch": 0.9636105934602612, + "grad_norm": 5.12501117100328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198690 + }, + { + "epoch": 0.9636590916530973, + "grad_norm": 4.962523547646924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198700 + }, + { + "epoch": 0.9637075898459334, + "grad_norm": 5.112319456657133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198710 + }, + { + "epoch": 0.9637560880387694, + "grad_norm": 5.150884163640512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198720 + }, + { + "epoch": 0.9638045862316056, + "grad_norm": 4.8084807247050776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198730 + }, + { + "epoch": 0.9638530844244416, + "grad_norm": 5.618507614713053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198740 + }, + { + "epoch": 0.9639015826172778, + "grad_norm": 5.0301299125976584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198750 + }, + { + "epoch": 0.9639500808101138, + "grad_norm": 5.1910514997643986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198760 + }, + { + "epoch": 0.9639985790029499, + "grad_norm": 5.0047052724266905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198770 + }, + { + "epoch": 0.964047077195786, + "grad_norm": 4.942698339505114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198780 + }, + { + "epoch": 0.9640955753886221, + "grad_norm": 4.9708809513049346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198790 + }, + { + "epoch": 0.9641440735814581, + "grad_norm": 4.8684608344728986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198800 + }, + { + "epoch": 0.9641925717742943, + "grad_norm": 5.115850143511125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198810 + }, + { + "epoch": 0.9642410699671303, + "grad_norm": 4.832380895436472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198820 + }, + { + "epoch": 0.9642895681599665, + "grad_norm": 5.11822229043446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198830 + }, + { + "epoch": 0.9643380663528025, + "grad_norm": 5.196476493551927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198840 + }, + { + "epoch": 0.9643865645456386, + "grad_norm": 5.046780060524725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198850 + }, + { + "epoch": 0.9644350627384747, + "grad_norm": 6.047748257742569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198860 + }, + { + "epoch": 0.9644835609313108, + "grad_norm": 5.1874120998718354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198870 + }, + { + "epoch": 0.9645320591241469, + "grad_norm": 5.1748628493442084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198880 + }, + { + "epoch": 0.964580557316983, + "grad_norm": 5.0598483625208246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198890 + }, + { + "epoch": 0.964629055509819, + "grad_norm": 4.851150237072943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198900 + }, + { + "epoch": 0.9646775537026552, + "grad_norm": 4.840873657485645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198910 + }, + { + "epoch": 0.9647260518954912, + "grad_norm": 4.86839191182753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198920 + }, + { + "epoch": 0.9647745500883274, + "grad_norm": 4.5510923030178674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198930 + }, + { + "epoch": 0.9648230482811634, + "grad_norm": 4.849126966632866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198940 + }, + { + "epoch": 0.9648715464739995, + "grad_norm": 5.2339775180598735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198950 + }, + { + "epoch": 0.9649200446668356, + "grad_norm": 4.541447040651292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198960 + }, + { + "epoch": 0.9649685428596717, + "grad_norm": 4.5259135106334725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198970 + }, + { + "epoch": 0.9650170410525077, + "grad_norm": 4.5055152497752715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198980 + }, + { + "epoch": 0.9650655392453439, + "grad_norm": 4.8058836910058744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 198990 + }, + { + "epoch": 0.9651140374381799, + "grad_norm": 4.7029246275087644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199000 + }, + { + "epoch": 0.9651625356310161, + "grad_norm": 4.464252612024211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199010 + }, + { + "epoch": 0.9652110338238521, + "grad_norm": 4.6891994287534544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199020 + }, + { + "epoch": 0.9652595320166882, + "grad_norm": 4.655593599522945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199030 + }, + { + "epoch": 0.9653080302095243, + "grad_norm": 4.684398646759291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199040 + }, + { + "epoch": 0.9653565284023604, + "grad_norm": 4.658616603592236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199050 + }, + { + "epoch": 0.9654050265951964, + "grad_norm": 4.871041525689179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199060 + }, + { + "epoch": 0.9654535247880326, + "grad_norm": 4.4775767094051844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199070 + }, + { + "epoch": 0.9655020229808686, + "grad_norm": 4.4952646049978284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199080 + }, + { + "epoch": 0.9655505211737048, + "grad_norm": 4.523213803508952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199090 + }, + { + "epoch": 0.9655990193665408, + "grad_norm": 4.682317822357618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199100 + }, + { + "epoch": 0.9656475175593769, + "grad_norm": 4.355852567528018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199110 + }, + { + "epoch": 0.9656960157522131, + "grad_norm": 4.399412034672423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199120 + }, + { + "epoch": 0.9657445139450491, + "grad_norm": 4.585601587336896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199130 + }, + { + "epoch": 0.9657930121378853, + "grad_norm": 5.1113605792352246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199140 + }, + { + "epoch": 0.9658415103307213, + "grad_norm": 4.96608372202445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199150 + }, + { + "epoch": 0.9658900085235574, + "grad_norm": 4.714583923259852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199160 + }, + { + "epoch": 0.9659385067163935, + "grad_norm": 4.6409127207880374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199170 + }, + { + "epoch": 0.9659870049092296, + "grad_norm": 4.560021338306797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199180 + }, + { + "epoch": 0.9660355031020657, + "grad_norm": 4.2865437421824026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199190 + }, + { + "epoch": 0.9660840012949018, + "grad_norm": 4.451129598237458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199200 + }, + { + "epoch": 0.9661324994877378, + "grad_norm": 4.439897338670562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199210 + }, + { + "epoch": 0.966180997680574, + "grad_norm": 4.490923188882334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199220 + }, + { + "epoch": 0.96622949587341, + "grad_norm": 4.3318838294226225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199230 + }, + { + "epoch": 0.9662779940662461, + "grad_norm": 4.401515951713009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199240 + }, + { + "epoch": 0.9663264922590822, + "grad_norm": 4.437332634665836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199250 + }, + { + "epoch": 0.9663749904519183, + "grad_norm": 4.606737036283448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199260 + }, + { + "epoch": 0.9664234886447544, + "grad_norm": 4.437377043586821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199270 + }, + { + "epoch": 0.9664719868375905, + "grad_norm": 4.492386551646632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199280 + }, + { + "epoch": 0.9665204850304265, + "grad_norm": 4.390301100443139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199290 + }, + { + "epoch": 0.9665689832232627, + "grad_norm": 4.7023291926961974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199300 + }, + { + "epoch": 0.9666174814160987, + "grad_norm": 4.095513261859196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199310 + }, + { + "epoch": 0.9666659796089349, + "grad_norm": 4.4197374649002086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199320 + }, + { + "epoch": 0.9667144778017709, + "grad_norm": 4.2376029796287185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199330 + }, + { + "epoch": 0.966762975994607, + "grad_norm": 4.4734242976574023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199340 + }, + { + "epoch": 0.9668114741874431, + "grad_norm": 4.650780383030906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199350 + }, + { + "epoch": 0.9668599723802792, + "grad_norm": 5.933828006732256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199360 + }, + { + "epoch": 0.9669084705731152, + "grad_norm": 4.0388162148019546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199370 + }, + { + "epoch": 0.9669569687659514, + "grad_norm": 4.368019901335174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199380 + }, + { + "epoch": 0.9670054669587874, + "grad_norm": 4.468567027515746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199390 + }, + { + "epoch": 0.9670539651516236, + "grad_norm": 4.522398455719667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199400 + }, + { + "epoch": 0.9671024633444596, + "grad_norm": 4.453999125075825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199410 + }, + { + "epoch": 0.9671509615372957, + "grad_norm": 4.007383225257399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199420 + }, + { + "epoch": 0.9671994597301318, + "grad_norm": 4.479835169490798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199430 + }, + { + "epoch": 0.9672479579229679, + "grad_norm": 4.301781331150778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199440 + }, + { + "epoch": 0.967296456115804, + "grad_norm": 4.312505197390237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199450 + }, + { + "epoch": 0.9673449543086401, + "grad_norm": 4.186906465974971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199460 + }, + { + "epoch": 0.9673934525014761, + "grad_norm": 4.087188543167031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199470 + }, + { + "epoch": 0.9674419506943123, + "grad_norm": 3.972916573502516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199480 + }, + { + "epoch": 0.9674904488871483, + "grad_norm": 4.383128526797009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199490 + }, + { + "epoch": 0.9675389470799844, + "grad_norm": 4.532037678472989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199500 + }, + { + "epoch": 0.9675874452728205, + "grad_norm": 4.083601012894178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199510 + }, + { + "epoch": 0.9676359434656566, + "grad_norm": 4.150106036604484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199520 + }, + { + "epoch": 0.9676844416584927, + "grad_norm": 4.262560437950924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199530 + }, + { + "epoch": 0.9677329398513288, + "grad_norm": 4.360280669857275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199540 + }, + { + "epoch": 0.9677814380441648, + "grad_norm": 4.695947808386336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199550 + }, + { + "epoch": 0.967829936237001, + "grad_norm": 4.091386784921269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199560 + }, + { + "epoch": 0.967878434429837, + "grad_norm": 4.110343709839981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199570 + }, + { + "epoch": 0.9679269326226732, + "grad_norm": 4.312808243867039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199580 + }, + { + "epoch": 0.9679754308155092, + "grad_norm": 4.463265312892872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199590 + }, + { + "epoch": 0.9680239290083453, + "grad_norm": 4.288268939944828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199600 + }, + { + "epoch": 0.9680724272011814, + "grad_norm": 4.5583156804696046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199610 + }, + { + "epoch": 0.9681209253940175, + "grad_norm": 4.435697675830852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199620 + }, + { + "epoch": 0.9681694235868537, + "grad_norm": 3.8902957300024354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199630 + }, + { + "epoch": 0.9682179217796897, + "grad_norm": 4.73264982758792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199640 + }, + { + "epoch": 0.9682664199725258, + "grad_norm": 4.239715778453501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199650 + }, + { + "epoch": 0.9683149181653619, + "grad_norm": 3.6998201835558575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199660 + }, + { + "epoch": 0.968363416358198, + "grad_norm": 4.1264154759801386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199670 + }, + { + "epoch": 0.968411914551034, + "grad_norm": 4.275947418364012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199680 + }, + { + "epoch": 0.9684604127438702, + "grad_norm": 4.024956368198218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199690 + }, + { + "epoch": 0.9685089109367062, + "grad_norm": 3.967107176094942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199700 + }, + { + "epoch": 0.9685574091295424, + "grad_norm": 3.9690362996225304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199710 + }, + { + "epoch": 0.9686059073223784, + "grad_norm": 4.117324792218824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199720 + }, + { + "epoch": 0.9686544055152145, + "grad_norm": 3.634436041011213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199730 + }, + { + "epoch": 0.9687029037080506, + "grad_norm": 4.839344569518289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199740 + }, + { + "epoch": 0.9687514019008867, + "grad_norm": 4.183816670888518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199750 + }, + { + "epoch": 0.9687999000937227, + "grad_norm": 3.7452934975590324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199760 + }, + { + "epoch": 0.9688483982865589, + "grad_norm": 3.6662719082869444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199770 + }, + { + "epoch": 0.9688968964793949, + "grad_norm": 3.8449737616019775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199780 + }, + { + "epoch": 0.9689453946722311, + "grad_norm": 3.8405634228411145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199790 + }, + { + "epoch": 0.9689938928650671, + "grad_norm": 4.772164885480379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199800 + }, + { + "epoch": 0.9690423910579032, + "grad_norm": 3.892572664199179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199810 + }, + { + "epoch": 0.9690908892507393, + "grad_norm": 3.5494778671818494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199820 + }, + { + "epoch": 0.9691393874435754, + "grad_norm": 3.5907511630739464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199830 + }, + { + "epoch": 0.9691878856364115, + "grad_norm": 3.950113836026503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199840 + }, + { + "epoch": 0.9692363838292476, + "grad_norm": 7.751640396236326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199850 + }, + { + "epoch": 0.9692848820220836, + "grad_norm": 1.2176543350506108e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199860 + }, + { + "epoch": 0.9693333802149198, + "grad_norm": 0.0012365091824904084, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 199870 + }, + { + "epoch": 0.9693818784077558, + "grad_norm": 0.028948090970516205, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 199880 + }, + { + "epoch": 0.969430376600592, + "grad_norm": 0.00012766808504238725, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199890 + }, + { + "epoch": 0.969478874793428, + "grad_norm": 4.0554114093538374e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199900 + }, + { + "epoch": 0.9695273729862641, + "grad_norm": 0.00021929576178081334, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199910 + }, + { + "epoch": 0.9695758711791002, + "grad_norm": 6.67264600906492e-07, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 199920 + }, + { + "epoch": 0.9696243693719363, + "grad_norm": 0.000573284924030304, + "learning_rate": 0.0002, + "loss": 0.0016, + "step": 199930 + }, + { + "epoch": 0.9696728675647723, + "grad_norm": 2.4071901862043887e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199940 + }, + { + "epoch": 0.9697213657576085, + "grad_norm": 1.036682260746602e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199950 + }, + { + "epoch": 0.9697698639504445, + "grad_norm": 0.022228477522730827, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 199960 + }, + { + "epoch": 0.9698183621432807, + "grad_norm": 3.2087962154037086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199970 + }, + { + "epoch": 0.9698668603361167, + "grad_norm": 3.1809986467123963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199980 + }, + { + "epoch": 0.9699153585289528, + "grad_norm": 3.05430489788705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 199990 + }, + { + "epoch": 0.9699638567217889, + "grad_norm": 2.9062080102448817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200000 + }, + { + "epoch": 0.970012354914625, + "grad_norm": 3.287177605670877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200010 + }, + { + "epoch": 0.970060853107461, + "grad_norm": 3.056231662412756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200020 + }, + { + "epoch": 0.9701093513002972, + "grad_norm": 2.938755869763554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200030 + }, + { + "epoch": 0.9701578494931332, + "grad_norm": 2.789235622913111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200040 + }, + { + "epoch": 0.9702063476859694, + "grad_norm": 2.7828177735500503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200050 + }, + { + "epoch": 0.9702548458788054, + "grad_norm": 2.809983016049955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200060 + }, + { + "epoch": 0.9703033440716415, + "grad_norm": 2.8156434837001143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200070 + }, + { + "epoch": 0.9703518422644776, + "grad_norm": 2.6935672394756693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200080 + }, + { + "epoch": 0.9704003404573137, + "grad_norm": 2.457888058415847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200090 + }, + { + "epoch": 0.9704488386501497, + "grad_norm": 2.5544263735355344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200100 + }, + { + "epoch": 0.9704973368429859, + "grad_norm": 2.6281350073986687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200110 + }, + { + "epoch": 0.9705458350358219, + "grad_norm": 2.572036009951262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200120 + }, + { + "epoch": 0.9705943332286581, + "grad_norm": 2.5497161004750524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200130 + }, + { + "epoch": 0.9706428314214941, + "grad_norm": 2.3326024347625207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200140 + }, + { + "epoch": 0.9706913296143302, + "grad_norm": 2.343635969737079e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200150 + }, + { + "epoch": 0.9707398278071664, + "grad_norm": 2.3628685994481202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200160 + }, + { + "epoch": 0.9707883260000024, + "grad_norm": 2.399995537416544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200170 + }, + { + "epoch": 0.9708368241928386, + "grad_norm": 2.337179012101842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200180 + }, + { + "epoch": 0.9708853223856746, + "grad_norm": 2.246373014713754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200190 + }, + { + "epoch": 0.9709338205785107, + "grad_norm": 2.2317085495160427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200200 + }, + { + "epoch": 0.9709823187713468, + "grad_norm": 2.256472271255916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200210 + }, + { + "epoch": 0.9710308169641829, + "grad_norm": 2.235166220998508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200220 + }, + { + "epoch": 0.971079315157019, + "grad_norm": 2.168014816561481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200230 + }, + { + "epoch": 0.9711278133498551, + "grad_norm": 2.128840833393042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200240 + }, + { + "epoch": 0.9711763115426911, + "grad_norm": 2.133987663910375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200250 + }, + { + "epoch": 0.9712248097355273, + "grad_norm": 2.1360174287110567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200260 + }, + { + "epoch": 0.9712733079283633, + "grad_norm": 2.111657295245095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200270 + }, + { + "epoch": 0.9713218061211994, + "grad_norm": 2.0995530576328747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200280 + }, + { + "epoch": 0.9713703043140355, + "grad_norm": 2.0079853584320517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200290 + }, + { + "epoch": 0.9714188025068716, + "grad_norm": 1.974847464225604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200300 + }, + { + "epoch": 0.9714673006997077, + "grad_norm": 1.9946157863159897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200310 + }, + { + "epoch": 0.9715157988925438, + "grad_norm": 2.017042788793333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200320 + }, + { + "epoch": 0.9715642970853798, + "grad_norm": 1.94873155123787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200330 + }, + { + "epoch": 0.971612795278216, + "grad_norm": 1.8388058151685982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200340 + }, + { + "epoch": 0.971661293471052, + "grad_norm": 1.8285875285073416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200350 + }, + { + "epoch": 0.9717097916638882, + "grad_norm": 1.9319384136906592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200360 + }, + { + "epoch": 0.9717582898567242, + "grad_norm": 1.8883223447119235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200370 + }, + { + "epoch": 0.9718067880495603, + "grad_norm": 1.872485540843627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200380 + }, + { + "epoch": 0.9718552862423964, + "grad_norm": 1.6788127368272399e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200390 + }, + { + "epoch": 0.9719037844352325, + "grad_norm": 1.6663038877595682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200400 + }, + { + "epoch": 0.9719522826280685, + "grad_norm": 1.8826707446351065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200410 + }, + { + "epoch": 0.9720007808209047, + "grad_norm": 1.6931261370700668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200420 + }, + { + "epoch": 0.9720492790137407, + "grad_norm": 1.7340795466225245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200430 + }, + { + "epoch": 0.9720977772065769, + "grad_norm": 1.5665050341340248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200440 + }, + { + "epoch": 0.9721462753994129, + "grad_norm": 1.4412048585654702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200450 + }, + { + "epoch": 0.972194773592249, + "grad_norm": 1.7349719882986392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200460 + }, + { + "epoch": 0.9722432717850851, + "grad_norm": 1.6123017303470988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200470 + }, + { + "epoch": 0.9722917699779212, + "grad_norm": 1.7223376289621228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200480 + }, + { + "epoch": 0.9723402681707572, + "grad_norm": 1.4830675354460254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200490 + }, + { + "epoch": 0.9723887663635934, + "grad_norm": 1.3387760873229126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200500 + }, + { + "epoch": 0.9724372645564294, + "grad_norm": 1.537563321107882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200510 + }, + { + "epoch": 0.9724857627492656, + "grad_norm": 1.4882145933370339e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200520 + }, + { + "epoch": 0.9725342609421016, + "grad_norm": 1.459069608245045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200530 + }, + { + "epoch": 0.9725827591349377, + "grad_norm": 1.2315663298068102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200540 + }, + { + "epoch": 0.9726312573277738, + "grad_norm": 1.2162878419985645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200550 + }, + { + "epoch": 0.9726797555206099, + "grad_norm": 1.6932458493101876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200560 + }, + { + "epoch": 0.972728253713446, + "grad_norm": 1.314313863076677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200570 + }, + { + "epoch": 0.9727767519062821, + "grad_norm": 1.3180341511542792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200580 + }, + { + "epoch": 0.9728252500991181, + "grad_norm": 1.0819312592502683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200590 + }, + { + "epoch": 0.9728737482919543, + "grad_norm": 1.0733837143561686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200600 + }, + { + "epoch": 0.9729222464847903, + "grad_norm": 1.2586524462676607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200610 + }, + { + "epoch": 0.9729707446776265, + "grad_norm": 1.216091618516657e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200620 + }, + { + "epoch": 0.9730192428704625, + "grad_norm": 1.1941573347939993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200630 + }, + { + "epoch": 0.9730677410632986, + "grad_norm": 9.795535333978478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200640 + }, + { + "epoch": 0.9731162392561347, + "grad_norm": 9.126081863541913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200650 + }, + { + "epoch": 0.9731647374489708, + "grad_norm": 1.158876557383337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200660 + }, + { + "epoch": 0.973213235641807, + "grad_norm": 1.0769316531877848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200670 + }, + { + "epoch": 0.973261733834643, + "grad_norm": 1.1025115327356616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200680 + }, + { + "epoch": 0.9733102320274791, + "grad_norm": 1.0403816759207984e-06, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 200690 + }, + { + "epoch": 0.9733587302203152, + "grad_norm": 1.2883801900898106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200700 + }, + { + "epoch": 0.9734072284131513, + "grad_norm": 2.8937754450453212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200710 + }, + { + "epoch": 0.9734557266059873, + "grad_norm": 3.331962716401904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200720 + }, + { + "epoch": 0.9735042247988235, + "grad_norm": 3.231495156796882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200730 + }, + { + "epoch": 0.9735527229916595, + "grad_norm": 1.775699502104544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200740 + }, + { + "epoch": 0.9736012211844957, + "grad_norm": 1.9000322026840877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200750 + }, + { + "epoch": 0.9736497193773317, + "grad_norm": 2.8764031867467565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200760 + }, + { + "epoch": 0.9736982175701678, + "grad_norm": 2.633276380947791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200770 + }, + { + "epoch": 0.9737467157630039, + "grad_norm": 2.942067112599034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200780 + }, + { + "epoch": 0.97379521395584, + "grad_norm": 1.5219994793369551e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200790 + }, + { + "epoch": 0.973843712148676, + "grad_norm": 2.232734232165967e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200800 + }, + { + "epoch": 0.9738922103415122, + "grad_norm": 2.497574087101384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200810 + }, + { + "epoch": 0.9739407085343482, + "grad_norm": 2.4543426206946606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200820 + }, + { + "epoch": 0.9739892067271844, + "grad_norm": 2.068702542601386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200830 + }, + { + "epoch": 0.9740377049200204, + "grad_norm": 1.3637605889016413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200840 + }, + { + "epoch": 0.9740862031128565, + "grad_norm": 1.4528194469676237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200850 + }, + { + "epoch": 0.9741347013056926, + "grad_norm": 2.004499719987507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200860 + }, + { + "epoch": 0.9741831994985287, + "grad_norm": 1.974128053916502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200870 + }, + { + "epoch": 0.9742316976913648, + "grad_norm": 2.0357799712655833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200880 + }, + { + "epoch": 0.9742801958842009, + "grad_norm": 1.229479153153079e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200890 + }, + { + "epoch": 0.9743286940770369, + "grad_norm": 1.0912876859947573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200900 + }, + { + "epoch": 0.9743771922698731, + "grad_norm": 1.9888368569809245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200910 + }, + { + "epoch": 0.9744256904627091, + "grad_norm": 1.9328665530338185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200920 + }, + { + "epoch": 0.9744741886555452, + "grad_norm": 1.8573633724372485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200930 + }, + { + "epoch": 0.9745226868483813, + "grad_norm": 1.1391293810447678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200940 + }, + { + "epoch": 0.9745711850412174, + "grad_norm": 1.034183469528216e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200950 + }, + { + "epoch": 0.9746196832340535, + "grad_norm": 1.609724336049112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200960 + }, + { + "epoch": 0.9746681814268896, + "grad_norm": 1.8275095499120653e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200970 + }, + { + "epoch": 0.9747166796197256, + "grad_norm": 1.7271083834202727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200980 + }, + { + "epoch": 0.9747651778125618, + "grad_norm": 1.0717764098444604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 200990 + }, + { + "epoch": 0.9748136760053978, + "grad_norm": 9.532164426673262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201000 + }, + { + "epoch": 0.974862174198234, + "grad_norm": 1.5326731954701245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201010 + }, + { + "epoch": 0.97491067239107, + "grad_norm": 1.6390379187214421e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201020 + }, + { + "epoch": 0.9749591705839061, + "grad_norm": 1.2411983334459364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201030 + }, + { + "epoch": 0.9750076687767422, + "grad_norm": 7.762827749502321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201040 + }, + { + "epoch": 0.9750561669695783, + "grad_norm": 8.427284114986833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201050 + }, + { + "epoch": 0.9751046651624143, + "grad_norm": 1.4787767668167362e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201060 + }, + { + "epoch": 0.9751531633552505, + "grad_norm": 1.2892502354588942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201070 + }, + { + "epoch": 0.9752016615480865, + "grad_norm": 1.3333451533981133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201080 + }, + { + "epoch": 0.9752501597409227, + "grad_norm": 7.767729357510689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201090 + }, + { + "epoch": 0.9752986579337587, + "grad_norm": 8.646873652651266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201100 + }, + { + "epoch": 0.9753471561265948, + "grad_norm": 1.5000540543042007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201110 + }, + { + "epoch": 0.9753956543194309, + "grad_norm": 1.3602597164208419e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201120 + }, + { + "epoch": 0.975444152512267, + "grad_norm": 1.331196813225688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201130 + }, + { + "epoch": 0.975492650705103, + "grad_norm": 6.975330393288459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201140 + }, + { + "epoch": 0.9755411488979392, + "grad_norm": 7.137060720197042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201150 + }, + { + "epoch": 0.9755896470907752, + "grad_norm": 1.024122525450366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201160 + }, + { + "epoch": 0.9756381452836114, + "grad_norm": 1.1644568758129026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201170 + }, + { + "epoch": 0.9756866434764475, + "grad_norm": 1.2356496199572575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201180 + }, + { + "epoch": 0.9757351416692835, + "grad_norm": 6.066196078791108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201190 + }, + { + "epoch": 0.9757836398621197, + "grad_norm": 6.384967718986445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201200 + }, + { + "epoch": 0.9758321380549557, + "grad_norm": 1.157734345724748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201210 + }, + { + "epoch": 0.9758806362477919, + "grad_norm": 8.914779527913197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201220 + }, + { + "epoch": 0.9759291344406279, + "grad_norm": 1.2851620567744249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201230 + }, + { + "epoch": 0.975977632633464, + "grad_norm": 7.219014150905423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201240 + }, + { + "epoch": 0.9760261308263001, + "grad_norm": 8.865699783200398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201250 + }, + { + "epoch": 0.9760746290191362, + "grad_norm": 1.3850434470441542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201260 + }, + { + "epoch": 0.9761231272119723, + "grad_norm": 1.8466320170773542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201270 + }, + { + "epoch": 0.9761716254048084, + "grad_norm": 1.402572024744586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201280 + }, + { + "epoch": 0.9762201235976444, + "grad_norm": 8.452400379610481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201290 + }, + { + "epoch": 0.9762686217904806, + "grad_norm": 7.21779827017599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201300 + }, + { + "epoch": 0.9763171199833166, + "grad_norm": 1.2744817468046676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201310 + }, + { + "epoch": 0.9763656181761527, + "grad_norm": 1.2367648878353066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201320 + }, + { + "epoch": 0.9764141163689888, + "grad_norm": 2.637257239257451e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201330 + }, + { + "epoch": 0.9764626145618249, + "grad_norm": 6.84585700128082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201340 + }, + { + "epoch": 0.976511112754661, + "grad_norm": 8.910811857276713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201350 + }, + { + "epoch": 0.9765596109474971, + "grad_norm": 1.211495259667572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201360 + }, + { + "epoch": 0.9766081091403331, + "grad_norm": 1.2804763400708907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201370 + }, + { + "epoch": 0.9766566073331693, + "grad_norm": 1.2457659295250778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201380 + }, + { + "epoch": 0.9767051055260053, + "grad_norm": 6.623376407333126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201390 + }, + { + "epoch": 0.9767536037188415, + "grad_norm": 5.785232701782661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201400 + }, + { + "epoch": 0.9768021019116775, + "grad_norm": 1.1743654795282055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201410 + }, + { + "epoch": 0.9768506001045136, + "grad_norm": 1.110185280595033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201420 + }, + { + "epoch": 0.9768990982973497, + "grad_norm": 1.1676536360027967e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201430 + }, + { + "epoch": 0.9769475964901858, + "grad_norm": 6.374036161105323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201440 + }, + { + "epoch": 0.9769960946830218, + "grad_norm": 6.929956839485385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201450 + }, + { + "epoch": 0.977044592875858, + "grad_norm": 1.8392547644907609e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201460 + }, + { + "epoch": 0.977093091068694, + "grad_norm": 0.00012809087638743222, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 201470 + }, + { + "epoch": 0.9771415892615302, + "grad_norm": 0.0003534356947056949, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 201480 + }, + { + "epoch": 0.9771900874543662, + "grad_norm": 0.00032598606776446104, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 201490 + }, + { + "epoch": 0.9772385856472023, + "grad_norm": 3.1543022487312555e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201500 + }, + { + "epoch": 0.9772870838400384, + "grad_norm": 0.0001858054893091321, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201510 + }, + { + "epoch": 0.9773355820328745, + "grad_norm": 1.861611963249743e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201520 + }, + { + "epoch": 0.9773840802257105, + "grad_norm": 9.29940797504969e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201530 + }, + { + "epoch": 0.9774325784185467, + "grad_norm": 7.0217538450378925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201540 + }, + { + "epoch": 0.9774810766113827, + "grad_norm": 6.52393146083341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201550 + }, + { + "epoch": 0.9775295748042189, + "grad_norm": 7.515835477533983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201560 + }, + { + "epoch": 0.9775780729970549, + "grad_norm": 7.579207704111468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201570 + }, + { + "epoch": 0.977626571189891, + "grad_norm": 8.01087480795104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201580 + }, + { + "epoch": 0.9776750693827271, + "grad_norm": 5.453834546642611e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201590 + }, + { + "epoch": 0.9777235675755632, + "grad_norm": 1.8792308765114285e-05, + "learning_rate": 0.0002, + "loss": 0.0051, + "step": 201600 + }, + { + "epoch": 0.9777720657683993, + "grad_norm": 0.00024279307399410754, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 201610 + }, + { + "epoch": 0.9778205639612354, + "grad_norm": 0.003667350858449936, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 201620 + }, + { + "epoch": 0.9778690621540714, + "grad_norm": 0.0005447435542009771, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 201630 + }, + { + "epoch": 0.9779175603469076, + "grad_norm": 6.705531995976344e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201640 + }, + { + "epoch": 0.9779660585397436, + "grad_norm": 2.7474097805679776e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201650 + }, + { + "epoch": 0.9780145567325798, + "grad_norm": 3.075299900956452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201660 + }, + { + "epoch": 0.9780630549254158, + "grad_norm": 2.5470859327469952e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201670 + }, + { + "epoch": 0.9781115531182519, + "grad_norm": 2.3043117835186422e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201680 + }, + { + "epoch": 0.9781600513110881, + "grad_norm": 1.6411438991781324e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201690 + }, + { + "epoch": 0.9782085495039241, + "grad_norm": 1.6207417502300814e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201700 + }, + { + "epoch": 0.9782570476967603, + "grad_norm": 1.7125530575867742e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201710 + }, + { + "epoch": 0.9783055458895963, + "grad_norm": 2.1468389604706317e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201720 + }, + { + "epoch": 0.9783540440824324, + "grad_norm": 0.1322820633649826, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 201730 + }, + { + "epoch": 0.9784025422752685, + "grad_norm": 1.4383695997821633e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201740 + }, + { + "epoch": 0.9784510404681046, + "grad_norm": 1.606687146704644e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201750 + }, + { + "epoch": 0.9784995386609406, + "grad_norm": 1.7840568034444004e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 201760 + }, + { + "epoch": 0.9785480368537768, + "grad_norm": 1.206223987537669e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201770 + }, + { + "epoch": 0.9785965350466128, + "grad_norm": 1.1228741641389206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201780 + }, + { + "epoch": 0.978645033239449, + "grad_norm": 6.205265890457667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201790 + }, + { + "epoch": 0.978693531432285, + "grad_norm": 6.2623171288578305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201800 + }, + { + "epoch": 0.9787420296251211, + "grad_norm": 0.0006353824282996356, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201810 + }, + { + "epoch": 0.9787905278179572, + "grad_norm": 1.219892601511674e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201820 + }, + { + "epoch": 0.9788390260107933, + "grad_norm": 8.21285448182607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201830 + }, + { + "epoch": 0.9788875242036293, + "grad_norm": 5.6687090363993775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201840 + }, + { + "epoch": 0.9789360223964655, + "grad_norm": 5.190237061469816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201850 + }, + { + "epoch": 0.9789845205893015, + "grad_norm": 7.807070687704254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201860 + }, + { + "epoch": 0.9790330187821377, + "grad_norm": 6.14645832683891e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 201870 + }, + { + "epoch": 0.9790815169749737, + "grad_norm": 0.0002952046925202012, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201880 + }, + { + "epoch": 0.9791300151678098, + "grad_norm": 0.00012311046884860843, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201890 + }, + { + "epoch": 0.9791785133606459, + "grad_norm": 2.792750092339702e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201900 + }, + { + "epoch": 0.979227011553482, + "grad_norm": 2.8841441235272214e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201910 + }, + { + "epoch": 0.979275509746318, + "grad_norm": 1.426666131010279e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201920 + }, + { + "epoch": 0.9793240079391542, + "grad_norm": 1.2785928447556216e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201930 + }, + { + "epoch": 0.9793725061319902, + "grad_norm": 1.289596639253432e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201940 + }, + { + "epoch": 0.9794210043248264, + "grad_norm": 1.267578591068741e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201950 + }, + { + "epoch": 0.9794695025176624, + "grad_norm": 1.1418526810302865e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201960 + }, + { + "epoch": 0.9795180007104985, + "grad_norm": 9.739060260471888e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201970 + }, + { + "epoch": 0.9795664989033346, + "grad_norm": 8.49953357828781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201980 + }, + { + "epoch": 0.9796149970961707, + "grad_norm": 9.727003089210484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 201990 + }, + { + "epoch": 0.9796634952890068, + "grad_norm": 8.652364158479031e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202000 + }, + { + "epoch": 0.9797119934818429, + "grad_norm": 8.878670996637084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202010 + }, + { + "epoch": 0.9797604916746789, + "grad_norm": 7.80143636802677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202020 + }, + { + "epoch": 0.9798089898675151, + "grad_norm": 7.828889465599786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202030 + }, + { + "epoch": 0.9798574880603511, + "grad_norm": 6.795502940803999e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202040 + }, + { + "epoch": 0.9799059862531873, + "grad_norm": 6.825091531936778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202050 + }, + { + "epoch": 0.9799544844460233, + "grad_norm": 6.269684035942191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202060 + }, + { + "epoch": 0.9800029826388594, + "grad_norm": 1.0415939868835267e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202070 + }, + { + "epoch": 0.9800514808316955, + "grad_norm": 6.2783788052911405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202080 + }, + { + "epoch": 0.9800999790245316, + "grad_norm": 6.046788257663138e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202090 + }, + { + "epoch": 0.9801484772173676, + "grad_norm": 6.435649993363768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202100 + }, + { + "epoch": 0.9801969754102038, + "grad_norm": 6.236764420464169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202110 + }, + { + "epoch": 0.9802454736030398, + "grad_norm": 5.87896965953405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202120 + }, + { + "epoch": 0.980293971795876, + "grad_norm": 5.413921371655306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202130 + }, + { + "epoch": 0.980342469988712, + "grad_norm": 6.994448995101266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202140 + }, + { + "epoch": 0.9803909681815481, + "grad_norm": 3.853399448416894e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202150 + }, + { + "epoch": 0.9804394663743842, + "grad_norm": 4.252302460372448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202160 + }, + { + "epoch": 0.9804879645672203, + "grad_norm": 4.226200871926267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202170 + }, + { + "epoch": 0.9805364627600563, + "grad_norm": 4.393444214656483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202180 + }, + { + "epoch": 0.9805849609528925, + "grad_norm": 3.2333869057765696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202190 + }, + { + "epoch": 0.9806334591457286, + "grad_norm": 3.2676371120032854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202200 + }, + { + "epoch": 0.9806819573385647, + "grad_norm": 4.1602729652368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202210 + }, + { + "epoch": 0.9807304555314008, + "grad_norm": 3.9257665775949135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202220 + }, + { + "epoch": 0.9807789537242368, + "grad_norm": 3.9103142626117915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202230 + }, + { + "epoch": 0.980827451917073, + "grad_norm": 3.2657935662427917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202240 + }, + { + "epoch": 0.980875950109909, + "grad_norm": 3.4924130432045786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202250 + }, + { + "epoch": 0.9809244483027452, + "grad_norm": 3.598839839469292e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202260 + }, + { + "epoch": 0.9809729464955812, + "grad_norm": 3.6718743103847373e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202270 + }, + { + "epoch": 0.9810214446884173, + "grad_norm": 3.8064772525103763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202280 + }, + { + "epoch": 0.9810699428812534, + "grad_norm": 3.1742724786454346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202290 + }, + { + "epoch": 0.9811184410740895, + "grad_norm": 3.5810444387607276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202300 + }, + { + "epoch": 0.9811669392669256, + "grad_norm": 3.386447360753664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202310 + }, + { + "epoch": 0.9812154374597617, + "grad_norm": 3.362871439094306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202320 + }, + { + "epoch": 0.9812639356525977, + "grad_norm": 3.1919798857416026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202330 + }, + { + "epoch": 0.9813124338454339, + "grad_norm": 2.891058102250099e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202340 + }, + { + "epoch": 0.9813609320382699, + "grad_norm": 2.6774687285069376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202350 + }, + { + "epoch": 0.981409430231106, + "grad_norm": 3.2901143640629016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202360 + }, + { + "epoch": 0.9814579284239421, + "grad_norm": 3.0742871786060277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202370 + }, + { + "epoch": 0.9815064266167782, + "grad_norm": 3.325832039990928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202380 + }, + { + "epoch": 0.9815549248096143, + "grad_norm": 2.584030426078243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202390 + }, + { + "epoch": 0.9816034230024504, + "grad_norm": 2.4628791379655013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202400 + }, + { + "epoch": 0.9816519211952864, + "grad_norm": 3.0096928185230354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202410 + }, + { + "epoch": 0.9817004193881226, + "grad_norm": 3.0139578939269995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202420 + }, + { + "epoch": 0.9817489175809586, + "grad_norm": 2.9941106731712352e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202430 + }, + { + "epoch": 0.9817974157737948, + "grad_norm": 7.915177775430493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202440 + }, + { + "epoch": 0.9818459139666308, + "grad_norm": 2.4426144591416232e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202450 + }, + { + "epoch": 0.9818944121594669, + "grad_norm": 2.7866954042110592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202460 + }, + { + "epoch": 0.981942910352303, + "grad_norm": 2.76175683211477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202470 + }, + { + "epoch": 0.9819914085451391, + "grad_norm": 2.85892224383133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202480 + }, + { + "epoch": 0.9820399067379751, + "grad_norm": 2.5511196781735634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202490 + }, + { + "epoch": 0.9820884049308113, + "grad_norm": 2.3667248569836374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202500 + }, + { + "epoch": 0.9821369031236473, + "grad_norm": 2.711429942792165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202510 + }, + { + "epoch": 0.9821854013164835, + "grad_norm": 2.5405449832760496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202520 + }, + { + "epoch": 0.9822338995093195, + "grad_norm": 2.739129286055686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202530 + }, + { + "epoch": 0.9822823977021556, + "grad_norm": 2.451532054692507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202540 + }, + { + "epoch": 0.9823308958949917, + "grad_norm": 2.4490730083925882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202550 + }, + { + "epoch": 0.9823793940878278, + "grad_norm": 2.4342118649656186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202560 + }, + { + "epoch": 0.9824278922806639, + "grad_norm": 2.5913709578162525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202570 + }, + { + "epoch": 0.9824763904735, + "grad_norm": 2.472060032232548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202580 + }, + { + "epoch": 0.982524888666336, + "grad_norm": 2.5808551527006784e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 202590 + }, + { + "epoch": 0.9825733868591722, + "grad_norm": 0.0007306264014914632, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202600 + }, + { + "epoch": 0.9826218850520082, + "grad_norm": 3.0447194149019197e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202610 + }, + { + "epoch": 0.9826703832448443, + "grad_norm": 1.756395613483619e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202620 + }, + { + "epoch": 0.9827188814376804, + "grad_norm": 9.943893928721081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202630 + }, + { + "epoch": 0.9827673796305165, + "grad_norm": 1.680117566138506e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202640 + }, + { + "epoch": 0.9828158778233526, + "grad_norm": 7.1461709012510255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202650 + }, + { + "epoch": 0.9828643760161887, + "grad_norm": 4.782549694937188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202660 + }, + { + "epoch": 0.9829128742090247, + "grad_norm": 4.293608071748167e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202670 + }, + { + "epoch": 0.9829613724018609, + "grad_norm": 3.5851410302711884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202680 + }, + { + "epoch": 0.9830098705946969, + "grad_norm": 4.337823611422209e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202690 + }, + { + "epoch": 0.983058368787533, + "grad_norm": 4.155494480073685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202700 + }, + { + "epoch": 0.9831068669803692, + "grad_norm": 3.065056489504059e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202710 + }, + { + "epoch": 0.9831553651732052, + "grad_norm": 2.844467189788702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202720 + }, + { + "epoch": 0.9832038633660414, + "grad_norm": 2.753567514446331e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202730 + }, + { + "epoch": 0.9832523615588774, + "grad_norm": 3.5404934806138044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202740 + }, + { + "epoch": 0.9833008597517136, + "grad_norm": 3.935428594559198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202750 + }, + { + "epoch": 0.9833493579445496, + "grad_norm": 3.1210752240440343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202760 + }, + { + "epoch": 0.9833978561373857, + "grad_norm": 2.8327101517788833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202770 + }, + { + "epoch": 0.9834463543302218, + "grad_norm": 2.4520393253624206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202780 + }, + { + "epoch": 0.9834948525230579, + "grad_norm": 3.0042751859582495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202790 + }, + { + "epoch": 0.9835433507158939, + "grad_norm": 2.567601313785417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202800 + }, + { + "epoch": 0.9835918489087301, + "grad_norm": 2.310270019734162e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202810 + }, + { + "epoch": 0.9836403471015661, + "grad_norm": 2.1851781184523134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202820 + }, + { + "epoch": 0.9836888452944023, + "grad_norm": 2.2617389277002076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202830 + }, + { + "epoch": 0.9837373434872383, + "grad_norm": 2.4848645807651337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202840 + }, + { + "epoch": 0.9837858416800744, + "grad_norm": 2.7570472411753144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202850 + }, + { + "epoch": 0.9838343398729105, + "grad_norm": 2.2686513148073573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202860 + }, + { + "epoch": 0.9838828380657466, + "grad_norm": 2.0835257146245567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202870 + }, + { + "epoch": 0.9839313362585826, + "grad_norm": 2.133420821337495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202880 + }, + { + "epoch": 0.9839798344514188, + "grad_norm": 2.0216327811795054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202890 + }, + { + "epoch": 0.9840283326442548, + "grad_norm": 2.6588882064970676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202900 + }, + { + "epoch": 0.984076830837091, + "grad_norm": 1.796368565010198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202910 + }, + { + "epoch": 0.984125329029927, + "grad_norm": 1.8954366396428668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202920 + }, + { + "epoch": 0.9841738272227631, + "grad_norm": 2.0337588466645684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202930 + }, + { + "epoch": 0.9842223254155992, + "grad_norm": 2.309043566128821e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202940 + }, + { + "epoch": 0.9842708236084353, + "grad_norm": 2.058070776911336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202950 + }, + { + "epoch": 0.9843193218012714, + "grad_norm": 1.674540612839337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202960 + }, + { + "epoch": 0.9843678199941075, + "grad_norm": 2.167563479815726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202970 + }, + { + "epoch": 0.9844163181869435, + "grad_norm": 1.647330122978019e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202980 + }, + { + "epoch": 0.9844648163797797, + "grad_norm": 2.077701310554403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 202990 + }, + { + "epoch": 0.9845133145726157, + "grad_norm": 2.0337115529400762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203000 + }, + { + "epoch": 0.9845618127654518, + "grad_norm": 1.6457212268505828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203010 + }, + { + "epoch": 0.9846103109582879, + "grad_norm": 2.002364681175095e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 203020 + }, + { + "epoch": 0.984658809151124, + "grad_norm": 4.146102583035827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203030 + }, + { + "epoch": 0.9847073073439601, + "grad_norm": 4.566973530018004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203040 + }, + { + "epoch": 0.9847558055367962, + "grad_norm": 4.913259999739239e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203050 + }, + { + "epoch": 0.9848043037296322, + "grad_norm": 5.547064120037248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203060 + }, + { + "epoch": 0.9848528019224684, + "grad_norm": 5.7093870964308735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203070 + }, + { + "epoch": 0.9849013001153044, + "grad_norm": 5.297606548992917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203080 + }, + { + "epoch": 0.9849497983081406, + "grad_norm": 4.228573288855841e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203090 + }, + { + "epoch": 0.9849982965009766, + "grad_norm": 3.7507634260691702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203100 + }, + { + "epoch": 0.9850467946938127, + "grad_norm": 4.334443929110421e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203110 + }, + { + "epoch": 0.9850952928866488, + "grad_norm": 4.522698418440996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203120 + }, + { + "epoch": 0.9851437910794849, + "grad_norm": 4.086532953806454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203130 + }, + { + "epoch": 0.9851922892723209, + "grad_norm": 2.9613390779559268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203140 + }, + { + "epoch": 0.9852407874651571, + "grad_norm": 3.0511819204548374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203150 + }, + { + "epoch": 0.9852892856579931, + "grad_norm": 3.5117679999530083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203160 + }, + { + "epoch": 0.9853377838508293, + "grad_norm": 3.411147417864413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203170 + }, + { + "epoch": 0.9853862820436653, + "grad_norm": 3.1628244414605433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203180 + }, + { + "epoch": 0.9854347802365014, + "grad_norm": 2.6026978048321325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203190 + }, + { + "epoch": 0.9854832784293375, + "grad_norm": 2.4011967525439104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203200 + }, + { + "epoch": 0.9855317766221736, + "grad_norm": 0.0007085402612574399, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203210 + }, + { + "epoch": 0.9855802748150098, + "grad_norm": 2.8491172088251915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203220 + }, + { + "epoch": 0.9856287730078458, + "grad_norm": 2.8561491944856243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203230 + }, + { + "epoch": 0.9856772712006819, + "grad_norm": 2.0506081455096137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203240 + }, + { + "epoch": 0.985725769393518, + "grad_norm": 2.3040422547637718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203250 + }, + { + "epoch": 0.9857742675863541, + "grad_norm": 2.8256029054318788e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 203260 + }, + { + "epoch": 0.9858227657791901, + "grad_norm": 1.5321940736612305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203270 + }, + { + "epoch": 0.9858712639720263, + "grad_norm": 4.19778807554394e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203280 + }, + { + "epoch": 0.9859197621648623, + "grad_norm": 3.3671680284896865e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203290 + }, + { + "epoch": 0.9859682603576985, + "grad_norm": 1.3628838132717647e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203300 + }, + { + "epoch": 0.9860167585505345, + "grad_norm": 1.2432461517164484e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203310 + }, + { + "epoch": 0.9860652567433706, + "grad_norm": 1.7213376850122586e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203320 + }, + { + "epoch": 0.9861137549362067, + "grad_norm": 1.0689190276025329e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 203330 + }, + { + "epoch": 0.9861622531290428, + "grad_norm": 3.840518547804095e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203340 + }, + { + "epoch": 0.9862107513218789, + "grad_norm": 7.713171362411231e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203350 + }, + { + "epoch": 0.986259249514715, + "grad_norm": 3.6177636502543464e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203360 + }, + { + "epoch": 0.986307747707551, + "grad_norm": 7.508601811423432e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 203370 + }, + { + "epoch": 0.9863562459003872, + "grad_norm": 6.878034128021682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203380 + }, + { + "epoch": 0.9864047440932232, + "grad_norm": 4.76648392577772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203390 + }, + { + "epoch": 0.9864532422860594, + "grad_norm": 5.131795660417993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203400 + }, + { + "epoch": 0.9865017404788954, + "grad_norm": 6.7579030655906536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203410 + }, + { + "epoch": 0.9865502386717315, + "grad_norm": 4.737707058666274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203420 + }, + { + "epoch": 0.9865987368645676, + "grad_norm": 5.2922691793355625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203430 + }, + { + "epoch": 0.9866472350574037, + "grad_norm": 4.4459256969275884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203440 + }, + { + "epoch": 0.9866957332502397, + "grad_norm": 4.242686827637954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203450 + }, + { + "epoch": 0.9867442314430759, + "grad_norm": 5.337989477993688e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203460 + }, + { + "epoch": 0.9867927296359119, + "grad_norm": 5.9951585171802435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203470 + }, + { + "epoch": 0.9868412278287481, + "grad_norm": 6.175142061692895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203480 + }, + { + "epoch": 0.9868897260215841, + "grad_norm": 4.150262611801736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203490 + }, + { + "epoch": 0.9869382242144202, + "grad_norm": 3.6426104088604916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203500 + }, + { + "epoch": 0.9869867224072563, + "grad_norm": 5.1223555601609405e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203510 + }, + { + "epoch": 0.9870352206000924, + "grad_norm": 4.950109541823622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203520 + }, + { + "epoch": 0.9870837187929284, + "grad_norm": 5.446355316962581e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203530 + }, + { + "epoch": 0.9871322169857646, + "grad_norm": 5.354309450922301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203540 + }, + { + "epoch": 0.9871807151786006, + "grad_norm": 4.474232355278218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203550 + }, + { + "epoch": 0.9872292133714368, + "grad_norm": 3.9988158277992625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203560 + }, + { + "epoch": 0.9872777115642728, + "grad_norm": 4.021002951049013e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203570 + }, + { + "epoch": 0.9873262097571089, + "grad_norm": 4.997918040317018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203580 + }, + { + "epoch": 0.987374707949945, + "grad_norm": 4.879173502558842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203590 + }, + { + "epoch": 0.9874232061427811, + "grad_norm": 3.987682248407509e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 203600 + }, + { + "epoch": 0.9874717043356172, + "grad_norm": 5.319943284121109e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203610 + }, + { + "epoch": 0.9875202025284533, + "grad_norm": 4.31133321399102e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203620 + }, + { + "epoch": 0.9875687007212893, + "grad_norm": 0.01118764840066433, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 203630 + }, + { + "epoch": 0.9876171989141255, + "grad_norm": 0.00013472915452439338, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203640 + }, + { + "epoch": 0.9876656971069615, + "grad_norm": 0.00025263859424740076, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203650 + }, + { + "epoch": 0.9877141952997976, + "grad_norm": 0.02103986032307148, + "learning_rate": 0.0002, + "loss": 0.0022, + "step": 203660 + }, + { + "epoch": 0.9877626934926337, + "grad_norm": 0.009405383840203285, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 203670 + }, + { + "epoch": 0.9878111916854698, + "grad_norm": 0.00014694445417262614, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203680 + }, + { + "epoch": 0.9878596898783059, + "grad_norm": 0.004358131438493729, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 203690 + }, + { + "epoch": 0.987908188071142, + "grad_norm": 0.00020460448286030442, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203700 + }, + { + "epoch": 0.987956686263978, + "grad_norm": 0.0013148357393220067, + "learning_rate": 0.0002, + "loss": 0.0686, + "step": 203710 + }, + { + "epoch": 0.9880051844568142, + "grad_norm": 0.00019186460122000426, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 203720 + }, + { + "epoch": 0.9880536826496503, + "grad_norm": 0.0968577191233635, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 203730 + }, + { + "epoch": 0.9881021808424864, + "grad_norm": 0.0019152471795678139, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 203740 + }, + { + "epoch": 0.9881506790353225, + "grad_norm": 2.7913483791053295e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203750 + }, + { + "epoch": 0.9881991772281585, + "grad_norm": 3.2719646696932614e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203760 + }, + { + "epoch": 0.9882476754209947, + "grad_norm": 3.8005051465006545e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203770 + }, + { + "epoch": 0.9882961736138307, + "grad_norm": 0.00028499308973550797, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203780 + }, + { + "epoch": 0.9883446718066669, + "grad_norm": 1.0038993423222564e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 203790 + }, + { + "epoch": 0.9883931699995029, + "grad_norm": 1.6477368262712844e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203800 + }, + { + "epoch": 0.988441668192339, + "grad_norm": 1.296484060731018e-05, + "learning_rate": 0.0002, + "loss": 0.0132, + "step": 203810 + }, + { + "epoch": 0.9884901663851751, + "grad_norm": 6.549506360897794e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 203820 + }, + { + "epoch": 0.9885386645780112, + "grad_norm": 0.0004593875491991639, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 203830 + }, + { + "epoch": 0.9885871627708472, + "grad_norm": 0.00012047446216456592, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 203840 + }, + { + "epoch": 0.9886356609636834, + "grad_norm": 6.387654866557568e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203850 + }, + { + "epoch": 0.9886841591565194, + "grad_norm": 0.00012780829274561256, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203860 + }, + { + "epoch": 0.9887326573493556, + "grad_norm": 7.087207632139325e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203870 + }, + { + "epoch": 0.9887811555421916, + "grad_norm": 4.4854397856397554e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203880 + }, + { + "epoch": 0.9888296537350277, + "grad_norm": 3.587594983400777e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203890 + }, + { + "epoch": 0.9888781519278638, + "grad_norm": 2.8069318432244472e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203900 + }, + { + "epoch": 0.9889266501206999, + "grad_norm": 2.4881621357053518e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203910 + }, + { + "epoch": 0.988975148313536, + "grad_norm": 0.00011298462777631357, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203920 + }, + { + "epoch": 0.9890236465063721, + "grad_norm": 2.0830002540606074e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203930 + }, + { + "epoch": 0.9890721446992081, + "grad_norm": 0.013576875440776348, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203940 + }, + { + "epoch": 0.9891206428920443, + "grad_norm": 1.5896273907856084e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203950 + }, + { + "epoch": 0.9891691410848803, + "grad_norm": 1.795020216377452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203960 + }, + { + "epoch": 0.9892176392777164, + "grad_norm": 1.4704766726936214e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203970 + }, + { + "epoch": 0.9892661374705525, + "grad_norm": 1.4700584870297462e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203980 + }, + { + "epoch": 0.9893146356633886, + "grad_norm": 1.3739429050474428e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 203990 + }, + { + "epoch": 0.9893631338562247, + "grad_norm": 1.2841848729294725e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204000 + }, + { + "epoch": 0.9894116320490608, + "grad_norm": 1.1862036444654223e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204010 + }, + { + "epoch": 0.9894601302418968, + "grad_norm": 1.1025099411199335e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204020 + }, + { + "epoch": 0.989508628434733, + "grad_norm": 1.1297418495814782e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204030 + }, + { + "epoch": 0.989557126627569, + "grad_norm": 1.0975690202030819e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204040 + }, + { + "epoch": 0.9896056248204051, + "grad_norm": 1.000163047137903e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204050 + }, + { + "epoch": 0.9896541230132412, + "grad_norm": 9.49444165598834e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 204060 + }, + { + "epoch": 0.9897026212060773, + "grad_norm": 1.3891519301978406e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204070 + }, + { + "epoch": 0.9897511193989134, + "grad_norm": 1.706398143141996e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204080 + }, + { + "epoch": 0.9897996175917495, + "grad_norm": 4.037494363728911e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204090 + }, + { + "epoch": 0.9898481157845855, + "grad_norm": 1.7620248399907723e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204100 + }, + { + "epoch": 0.9898966139774217, + "grad_norm": 1.825803155952599e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204110 + }, + { + "epoch": 0.9899451121702577, + "grad_norm": 1.6690284610376693e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204120 + }, + { + "epoch": 0.9899936103630939, + "grad_norm": 1.661323221924249e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204130 + }, + { + "epoch": 0.9900421085559299, + "grad_norm": 1.4587934856535867e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204140 + }, + { + "epoch": 0.990090606748766, + "grad_norm": 1.4282272786658723e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204150 + }, + { + "epoch": 0.9901391049416021, + "grad_norm": 1.666926618781872e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204160 + }, + { + "epoch": 0.9901876031344382, + "grad_norm": 1.3777485946775414e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204170 + }, + { + "epoch": 0.9902361013272742, + "grad_norm": 1.3359065633267164e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204180 + }, + { + "epoch": 0.9902845995201104, + "grad_norm": 1.3272222531668376e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204190 + }, + { + "epoch": 0.9903330977129464, + "grad_norm": 1.1584085768845398e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204200 + }, + { + "epoch": 0.9903815959057826, + "grad_norm": 1.1706472832884174e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204210 + }, + { + "epoch": 0.9904300940986186, + "grad_norm": 1.174549379356904e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204220 + }, + { + "epoch": 0.9904785922914547, + "grad_norm": 1.09651127786492e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204230 + }, + { + "epoch": 0.9905270904842909, + "grad_norm": 1.0139630830963142e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204240 + }, + { + "epoch": 0.9905755886771269, + "grad_norm": 1.4898461813572794e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204250 + }, + { + "epoch": 0.9906240868699631, + "grad_norm": 1.0372551514592487e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204260 + }, + { + "epoch": 0.9906725850627991, + "grad_norm": 9.60664328886196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204270 + }, + { + "epoch": 0.9907210832556352, + "grad_norm": 1.1190754776180256e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204280 + }, + { + "epoch": 0.9907695814484713, + "grad_norm": 8.82614403963089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204290 + }, + { + "epoch": 0.9908180796413074, + "grad_norm": 8.789271305431612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204300 + }, + { + "epoch": 0.9908665778341434, + "grad_norm": 9.134492756857071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204310 + }, + { + "epoch": 0.9909150760269796, + "grad_norm": 8.684454769536387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204320 + }, + { + "epoch": 0.9909635742198156, + "grad_norm": 1.0030961675511207e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204330 + }, + { + "epoch": 0.9910120724126518, + "grad_norm": 0.0006132572889328003, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204340 + }, + { + "epoch": 0.9910605706054878, + "grad_norm": 7.791161806380842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204350 + }, + { + "epoch": 0.9911090687983239, + "grad_norm": 8.341714419657364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204360 + }, + { + "epoch": 0.99115756699116, + "grad_norm": 1.0427664165035821e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204370 + }, + { + "epoch": 0.9912060651839961, + "grad_norm": 7.557179287687177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204380 + }, + { + "epoch": 0.9912545633768322, + "grad_norm": 7.122818260540953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204390 + }, + { + "epoch": 0.9913030615696683, + "grad_norm": 7.172019650170114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204400 + }, + { + "epoch": 0.9913515597625043, + "grad_norm": 7.712686965533067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204410 + }, + { + "epoch": 0.9914000579553405, + "grad_norm": 7.009829460002948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204420 + }, + { + "epoch": 0.9914485561481765, + "grad_norm": 6.796619800297776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204430 + }, + { + "epoch": 0.9914970543410127, + "grad_norm": 6.611297976633068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204440 + }, + { + "epoch": 0.9915455525338487, + "grad_norm": 1.0451206435391214e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204450 + }, + { + "epoch": 0.9915940507266848, + "grad_norm": 6.693776867905399e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204460 + }, + { + "epoch": 0.9916425489195209, + "grad_norm": 6.54579525871668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204470 + }, + { + "epoch": 0.991691047112357, + "grad_norm": 1.8148613889934495e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204480 + }, + { + "epoch": 0.991739545305193, + "grad_norm": 5.8826008171308786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204490 + }, + { + "epoch": 0.9917880434980292, + "grad_norm": 7.867372005421203e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204500 + }, + { + "epoch": 0.9918365416908652, + "grad_norm": 5.889969543204643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204510 + }, + { + "epoch": 0.9918850398837014, + "grad_norm": 5.805285127280513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204520 + }, + { + "epoch": 0.9919335380765374, + "grad_norm": 5.681558832293376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204530 + }, + { + "epoch": 0.9919820362693735, + "grad_norm": 5.516174951480934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204540 + }, + { + "epoch": 0.9920305344622096, + "grad_norm": 5.386105385696283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204550 + }, + { + "epoch": 0.9920790326550457, + "grad_norm": 7.979710971994791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204560 + }, + { + "epoch": 0.9921275308478817, + "grad_norm": 5.500287443283014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204570 + }, + { + "epoch": 0.9921760290407179, + "grad_norm": 5.229732323641656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204580 + }, + { + "epoch": 0.9922245272335539, + "grad_norm": 4.961505510436837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204590 + }, + { + "epoch": 0.9922730254263901, + "grad_norm": 4.915478257316863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204600 + }, + { + "epoch": 0.9923215236192261, + "grad_norm": 5.064125161879929e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204610 + }, + { + "epoch": 0.9923700218120622, + "grad_norm": 5.640190011035884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204620 + }, + { + "epoch": 0.9924185200048983, + "grad_norm": 4.708043434220599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204630 + }, + { + "epoch": 0.9924670181977344, + "grad_norm": 4.460527634364553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204640 + }, + { + "epoch": 0.9925155163905705, + "grad_norm": 4.531147624220466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204650 + }, + { + "epoch": 0.9925640145834066, + "grad_norm": 5.139060249348404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204660 + }, + { + "epoch": 0.9926125127762426, + "grad_norm": 4.276420895621413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204670 + }, + { + "epoch": 0.9926610109690788, + "grad_norm": 4.6792274588369764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204680 + }, + { + "epoch": 0.9927095091619148, + "grad_norm": 4.306008122512139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204690 + }, + { + "epoch": 0.992758007354751, + "grad_norm": 4.140022610954475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204700 + }, + { + "epoch": 0.992806505547587, + "grad_norm": 4.04292131861439e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204710 + }, + { + "epoch": 0.9928550037404231, + "grad_norm": 4.170236024947371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204720 + }, + { + "epoch": 0.9929035019332592, + "grad_norm": 4.428728971106466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204730 + }, + { + "epoch": 0.9929520001260953, + "grad_norm": 3.762921778616146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204740 + }, + { + "epoch": 0.9930004983189314, + "grad_norm": 3.749490588234039e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204750 + }, + { + "epoch": 0.9930489965117675, + "grad_norm": 3.7617405723722186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204760 + }, + { + "epoch": 0.9930974947046036, + "grad_norm": 4.977833214070415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204770 + }, + { + "epoch": 0.9931459928974397, + "grad_norm": 3.7175684610701865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204780 + }, + { + "epoch": 0.9931944910902758, + "grad_norm": 3.3298065318376757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204790 + }, + { + "epoch": 0.9932429892831118, + "grad_norm": 3.3373914902767865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204800 + }, + { + "epoch": 0.993291487475948, + "grad_norm": 3.6150470350548858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204810 + }, + { + "epoch": 0.993339985668784, + "grad_norm": 3.3302526389888953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204820 + }, + { + "epoch": 0.9933884838616202, + "grad_norm": 3.334389703013585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204830 + }, + { + "epoch": 0.9934369820544562, + "grad_norm": 4.216036813886603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204840 + }, + { + "epoch": 0.9934854802472923, + "grad_norm": 3.2286395708069904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204850 + }, + { + "epoch": 0.9935339784401284, + "grad_norm": 3.6227274904376827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204860 + }, + { + "epoch": 0.9935824766329645, + "grad_norm": 4.049825292895548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204870 + }, + { + "epoch": 0.9936309748258005, + "grad_norm": 3.180121211698861e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204880 + }, + { + "epoch": 0.9936794730186367, + "grad_norm": 3.0500584671244724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204890 + }, + { + "epoch": 0.9937279712114727, + "grad_norm": 2.880452939280076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204900 + }, + { + "epoch": 0.9937764694043089, + "grad_norm": 2.8715667212964036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204910 + }, + { + "epoch": 0.9938249675971449, + "grad_norm": 2.927100695160334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204920 + }, + { + "epoch": 0.993873465789981, + "grad_norm": 2.9107345653756056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204930 + }, + { + "epoch": 0.9939219639828171, + "grad_norm": 2.70959344561561e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204940 + }, + { + "epoch": 0.9939704621756532, + "grad_norm": 3.1028209832584253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204950 + }, + { + "epoch": 0.9940189603684892, + "grad_norm": 2.929380343630328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204960 + }, + { + "epoch": 0.9940674585613254, + "grad_norm": 2.8637830382649554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204970 + }, + { + "epoch": 0.9941159567541614, + "grad_norm": 3.2243306122836657e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204980 + }, + { + "epoch": 0.9941644549469976, + "grad_norm": 2.5523477233946323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 204990 + }, + { + "epoch": 0.9942129531398336, + "grad_norm": 2.5756430659384932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205000 + }, + { + "epoch": 0.9942614513326697, + "grad_norm": 4.14916166846524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205010 + }, + { + "epoch": 0.9943099495255058, + "grad_norm": 2.548128804846783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205020 + }, + { + "epoch": 0.9943584477183419, + "grad_norm": 2.5566585009073606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205030 + }, + { + "epoch": 0.994406945911178, + "grad_norm": 2.3873624286352424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205040 + }, + { + "epoch": 0.9944554441040141, + "grad_norm": 2.2971100861468585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205050 + }, + { + "epoch": 0.9945039422968501, + "grad_norm": 2.4184655558201484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205060 + }, + { + "epoch": 0.9945524404896863, + "grad_norm": 2.4197645416279556e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205070 + }, + { + "epoch": 0.9946009386825223, + "grad_norm": 2.239211653431994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205080 + }, + { + "epoch": 0.9946494368753585, + "grad_norm": 2.280278522448498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205090 + }, + { + "epoch": 0.9946979350681945, + "grad_norm": 2.265195689687971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205100 + }, + { + "epoch": 0.9947464332610306, + "grad_norm": 2.111478124788846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205110 + }, + { + "epoch": 0.9947949314538667, + "grad_norm": 2.4951139039330883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205120 + }, + { + "epoch": 0.9948434296467028, + "grad_norm": 2.1207690679148072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205130 + }, + { + "epoch": 0.9948919278395388, + "grad_norm": 2.1348726022552e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205140 + }, + { + "epoch": 0.994940426032375, + "grad_norm": 2.064147565761232e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205150 + }, + { + "epoch": 0.994988924225211, + "grad_norm": 2.1536991425818997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205160 + }, + { + "epoch": 0.9950374224180472, + "grad_norm": 2.0527866126940353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205170 + }, + { + "epoch": 0.9950859206108832, + "grad_norm": 2.095383024425246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205180 + }, + { + "epoch": 0.9951344188037193, + "grad_norm": 2.0466645764827263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205190 + }, + { + "epoch": 0.9951829169965554, + "grad_norm": 1.864371824922273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205200 + }, + { + "epoch": 0.9952314151893915, + "grad_norm": 2.0519655663520098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205210 + }, + { + "epoch": 0.9952799133822275, + "grad_norm": 2.0717150164273335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205220 + }, + { + "epoch": 0.9953284115750637, + "grad_norm": 2.6071843421959784e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205230 + }, + { + "epoch": 0.9953769097678997, + "grad_norm": 2.070395794362412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205240 + }, + { + "epoch": 0.9954254079607359, + "grad_norm": 1.7803437231123098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205250 + }, + { + "epoch": 0.995473906153572, + "grad_norm": 0.00012045457697240636, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 205260 + }, + { + "epoch": 0.995522404346408, + "grad_norm": 0.0005078606191091239, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 205270 + }, + { + "epoch": 0.9955709025392442, + "grad_norm": 2.6536683435551822e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205280 + }, + { + "epoch": 0.9956194007320802, + "grad_norm": 1.7988068066188134e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205290 + }, + { + "epoch": 0.9956678989249164, + "grad_norm": 3.258049764554016e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 205300 + }, + { + "epoch": 0.9957163971177524, + "grad_norm": 2.4351678803213872e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205310 + }, + { + "epoch": 0.9957648953105885, + "grad_norm": 2.5135686883004382e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205320 + }, + { + "epoch": 0.9958133935034246, + "grad_norm": 1.507349224993959e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 205330 + }, + { + "epoch": 0.9958618916962607, + "grad_norm": 3.477604332147166e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205340 + }, + { + "epoch": 0.9959103898890967, + "grad_norm": 1.5494295439566486e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205350 + }, + { + "epoch": 0.9959588880819329, + "grad_norm": 6.595086688321317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205360 + }, + { + "epoch": 0.9960073862747689, + "grad_norm": 6.337678314594086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205370 + }, + { + "epoch": 0.9960558844676051, + "grad_norm": 5.59936415811535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205380 + }, + { + "epoch": 0.9961043826604411, + "grad_norm": 9.874801435216796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205390 + }, + { + "epoch": 0.9961528808532772, + "grad_norm": 9.362860510009341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205400 + }, + { + "epoch": 0.9962013790461133, + "grad_norm": 4.713900125352666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205410 + }, + { + "epoch": 0.9962498772389494, + "grad_norm": 6.264715921133757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205420 + }, + { + "epoch": 0.9962983754317855, + "grad_norm": 5.012535439163912e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205430 + }, + { + "epoch": 0.9963468736246216, + "grad_norm": 7.653933607798535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205440 + }, + { + "epoch": 0.9963953718174576, + "grad_norm": 7.334246220125351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205450 + }, + { + "epoch": 0.9964438700102938, + "grad_norm": 4.0817499211698305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205460 + }, + { + "epoch": 0.9964923682031298, + "grad_norm": 3.913523414666997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205470 + }, + { + "epoch": 0.996540866395966, + "grad_norm": 4.002838977612555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205480 + }, + { + "epoch": 0.996589364588802, + "grad_norm": 5.854784831171855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205490 + }, + { + "epoch": 0.9966378627816381, + "grad_norm": 6.1307987380132545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205500 + }, + { + "epoch": 0.9966863609744742, + "grad_norm": 3.60683111466642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205510 + }, + { + "epoch": 0.9967348591673103, + "grad_norm": 3.993543032265734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205520 + }, + { + "epoch": 0.9967833573601463, + "grad_norm": 3.3174428608617745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205530 + }, + { + "epoch": 0.9968318555529825, + "grad_norm": 5.273611350276042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205540 + }, + { + "epoch": 0.9968803537458185, + "grad_norm": 4.878238087258069e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205550 + }, + { + "epoch": 0.9969288519386547, + "grad_norm": 3.064539896513452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205560 + }, + { + "epoch": 0.9969773501314907, + "grad_norm": 5.471353688335512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205570 + }, + { + "epoch": 0.9970258483243268, + "grad_norm": 2.9007624107180163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205580 + }, + { + "epoch": 0.9970743465171629, + "grad_norm": 4.626281679520616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205590 + }, + { + "epoch": 0.997122844709999, + "grad_norm": 4.9386676437279675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205600 + }, + { + "epoch": 0.997171342902835, + "grad_norm": 2.8782708341168473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205610 + }, + { + "epoch": 0.9972198410956712, + "grad_norm": 2.551040552134509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205620 + }, + { + "epoch": 0.9972683392885072, + "grad_norm": 2.5234110125893494e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205630 + }, + { + "epoch": 0.9973168374813434, + "grad_norm": 3.914631179213757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205640 + }, + { + "epoch": 0.9973653356741794, + "grad_norm": 3.839939381578006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205650 + }, + { + "epoch": 0.9974138338670155, + "grad_norm": 2.3220502498588758e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205660 + }, + { + "epoch": 0.9974623320598516, + "grad_norm": 2.443863877488184e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205670 + }, + { + "epoch": 0.9975108302526877, + "grad_norm": 1.0271643986925483e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205680 + }, + { + "epoch": 0.9975593284455238, + "grad_norm": 3.462795348241343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205690 + }, + { + "epoch": 0.9976078266383599, + "grad_norm": 3.3304236239928287e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205700 + }, + { + "epoch": 0.9976563248311959, + "grad_norm": 2.2444137357524596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205710 + }, + { + "epoch": 0.9977048230240321, + "grad_norm": 2.264539489260642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205720 + }, + { + "epoch": 0.9977533212168681, + "grad_norm": 4.476318281376734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205730 + }, + { + "epoch": 0.9978018194097042, + "grad_norm": 3.1517956813331693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205740 + }, + { + "epoch": 0.9978503176025403, + "grad_norm": 6.496777587017277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205750 + }, + { + "epoch": 0.9978988157953764, + "grad_norm": 1.916854898809106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205760 + }, + { + "epoch": 0.9979473139882125, + "grad_norm": 2.0037653030158253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205770 + }, + { + "epoch": 0.9979958121810486, + "grad_norm": 1.93542973647709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205780 + }, + { + "epoch": 0.9980443103738847, + "grad_norm": 3.0025473733985564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205790 + }, + { + "epoch": 0.9980928085667208, + "grad_norm": 3.943084720958723e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205800 + }, + { + "epoch": 0.9981413067595569, + "grad_norm": 1.8252220570502686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205810 + }, + { + "epoch": 0.998189804952393, + "grad_norm": 1.8222323205918656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205820 + }, + { + "epoch": 0.9982383031452291, + "grad_norm": 1.8146801039620186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205830 + }, + { + "epoch": 0.9982868013380651, + "grad_norm": 2.9831671781721525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205840 + }, + { + "epoch": 0.9983352995309013, + "grad_norm": 2.4661674160597613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205850 + }, + { + "epoch": 0.9983837977237373, + "grad_norm": 7.244407697726274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205860 + }, + { + "epoch": 0.9984322959165735, + "grad_norm": 1.645886072765279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205870 + }, + { + "epoch": 0.9984807941094095, + "grad_norm": 1.772956466083997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205880 + }, + { + "epoch": 0.9985292923022456, + "grad_norm": 2.4387081793975085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205890 + }, + { + "epoch": 0.9985777904950817, + "grad_norm": 2.389475639574812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205900 + }, + { + "epoch": 0.9986262886879178, + "grad_norm": 1.855243681347929e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205910 + }, + { + "epoch": 0.9986747868807538, + "grad_norm": 1.7860004390968243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205920 + }, + { + "epoch": 0.99872328507359, + "grad_norm": 2.207271336374106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205930 + }, + { + "epoch": 0.998771783266426, + "grad_norm": 2.221945578639861e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205940 + }, + { + "epoch": 0.9988202814592622, + "grad_norm": 2.528516688471427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205950 + }, + { + "epoch": 0.9988687796520982, + "grad_norm": 7.280117188201984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205960 + }, + { + "epoch": 0.9989172778449343, + "grad_norm": 1.445954012524453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205970 + }, + { + "epoch": 0.9989657760377704, + "grad_norm": 1.4844629276922205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205980 + }, + { + "epoch": 0.9990142742306065, + "grad_norm": 2.156335995096015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 205990 + }, + { + "epoch": 0.9990627724234425, + "grad_norm": 2.058682184724603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206000 + }, + { + "epoch": 0.9991112706162787, + "grad_norm": 1.4219033346307697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206010 + }, + { + "epoch": 0.9991597688091147, + "grad_norm": 1.4962112118155346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206020 + }, + { + "epoch": 0.9992082670019509, + "grad_norm": 1.5096851484486251e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206030 + }, + { + "epoch": 0.9992567651947869, + "grad_norm": 1.8565314121588017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206040 + }, + { + "epoch": 0.999305263387623, + "grad_norm": 1.870701339612424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206050 + }, + { + "epoch": 0.9993537615804591, + "grad_norm": 1.3527121609513415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206060 + }, + { + "epoch": 0.9994022597732952, + "grad_norm": 5.9852372942259535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206070 + }, + { + "epoch": 0.9994507579661313, + "grad_norm": 1.3434753327601356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206080 + }, + { + "epoch": 0.9994992561589674, + "grad_norm": 1.7748631080394262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206090 + }, + { + "epoch": 0.9995477543518034, + "grad_norm": 4.8251808948407415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206100 + }, + { + "epoch": 0.9995962525446396, + "grad_norm": 1.2013217656203778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206110 + }, + { + "epoch": 0.9996447507374756, + "grad_norm": 1.3357703210203908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206120 + }, + { + "epoch": 0.9996932489303118, + "grad_norm": 1.2737875749735394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206130 + }, + { + "epoch": 0.9997417471231478, + "grad_norm": 1.6638659872114658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206140 + }, + { + "epoch": 0.9997902453159839, + "grad_norm": 6.866436706332024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206150 + }, + { + "epoch": 0.99983874350882, + "grad_norm": 1.1414833807066316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206160 + }, + { + "epoch": 0.9998872417016561, + "grad_norm": 1.0998861625921563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206170 + }, + { + "epoch": 0.9999357398944921, + "grad_norm": 1.137737513090542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206180 + }, + { + "epoch": 0.9999842380873283, + "grad_norm": 1.5914519053694676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206190 + }, + { + "epoch": 1.0000327362801644, + "grad_norm": 1.0815828090926516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206200 + }, + { + "epoch": 1.0000812344730003, + "grad_norm": 1.1055565209971974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206210 + }, + { + "epoch": 1.0001297326658365, + "grad_norm": 1.1860296353916056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206220 + }, + { + "epoch": 1.0001782308586726, + "grad_norm": 1.1845920653286157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206230 + }, + { + "epoch": 1.0002267290515088, + "grad_norm": 1.5236619219649583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206240 + }, + { + "epoch": 1.0002752272443447, + "grad_norm": 1.1428008974689874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206250 + }, + { + "epoch": 1.0003237254371808, + "grad_norm": 1.0814607094289386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206260 + }, + { + "epoch": 1.000372223630017, + "grad_norm": 1.106673153117299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206270 + }, + { + "epoch": 1.0004207218228531, + "grad_norm": 3.832779839285649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206280 + }, + { + "epoch": 1.0004692200156893, + "grad_norm": 1.3486908301274525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206290 + }, + { + "epoch": 1.0005177182085252, + "grad_norm": 1.0559323300185497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206300 + }, + { + "epoch": 1.0005662164013613, + "grad_norm": 1.1546196674316889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206310 + }, + { + "epoch": 1.0006147145941975, + "grad_norm": 1.0168986364078592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206320 + }, + { + "epoch": 1.0006632127870336, + "grad_norm": 1.0103531167260371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206330 + }, + { + "epoch": 1.0007117109798696, + "grad_norm": 1.3937938092567492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206340 + }, + { + "epoch": 1.0007602091727057, + "grad_norm": 9.677711432232172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206350 + }, + { + "epoch": 1.0008087073655418, + "grad_norm": 9.822026640904369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206360 + }, + { + "epoch": 1.000857205558378, + "grad_norm": 1.1862509836646495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206370 + }, + { + "epoch": 1.000905703751214, + "grad_norm": 9.972912948796875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206380 + }, + { + "epoch": 1.00095420194405, + "grad_norm": 1.3197742418924463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206390 + }, + { + "epoch": 1.0010027001368862, + "grad_norm": 9.17917645892885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206400 + }, + { + "epoch": 1.0010511983297223, + "grad_norm": 9.60350121204101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206410 + }, + { + "epoch": 1.0010996965225583, + "grad_norm": 9.71279632722144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206420 + }, + { + "epoch": 1.0011481947153944, + "grad_norm": 9.88723627415311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206430 + }, + { + "epoch": 1.0011966929082305, + "grad_norm": 1.1518546898514614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206440 + }, + { + "epoch": 1.0012451911010667, + "grad_norm": 8.878925541466742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206450 + }, + { + "epoch": 1.0012936892939026, + "grad_norm": 9.882681979433983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206460 + }, + { + "epoch": 1.0013421874867388, + "grad_norm": 9.203717468153627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206470 + }, + { + "epoch": 1.001390685679575, + "grad_norm": 8.413749696956074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206480 + }, + { + "epoch": 1.001439183872411, + "grad_norm": 1.1636635690592811e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206490 + }, + { + "epoch": 1.001487682065247, + "grad_norm": 9.018963851303852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206500 + }, + { + "epoch": 1.001536180258083, + "grad_norm": 8.490295044794038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206510 + }, + { + "epoch": 1.0015846784509193, + "grad_norm": 9.094997608372068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206520 + }, + { + "epoch": 1.0016331766437554, + "grad_norm": 8.467962402392004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206530 + }, + { + "epoch": 1.0016816748365913, + "grad_norm": 1.0424146239529364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206540 + }, + { + "epoch": 1.0017301730294275, + "grad_norm": 8.236634698732814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206550 + }, + { + "epoch": 1.0017786712222636, + "grad_norm": 8.121789960569004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206560 + }, + { + "epoch": 1.0018271694150997, + "grad_norm": 1.0380607818660792e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206570 + }, + { + "epoch": 1.0018756676079357, + "grad_norm": 1.2352109024504898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206580 + }, + { + "epoch": 1.0019241658007718, + "grad_norm": 1.3990144225317636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206590 + }, + { + "epoch": 1.001972663993608, + "grad_norm": 8.288587878269027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206600 + }, + { + "epoch": 1.002021162186444, + "grad_norm": 8.059172387220315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206610 + }, + { + "epoch": 1.00206966037928, + "grad_norm": 8.033061362766603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206620 + }, + { + "epoch": 1.0021181585721162, + "grad_norm": 7.741166427877033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206630 + }, + { + "epoch": 1.0021666567649523, + "grad_norm": 9.73536316450918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206640 + }, + { + "epoch": 1.0022151549577885, + "grad_norm": 9.246684271602135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206650 + }, + { + "epoch": 1.0022636531506244, + "grad_norm": 7.607545740029309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206660 + }, + { + "epoch": 1.0023121513434605, + "grad_norm": 7.548412668256788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206670 + }, + { + "epoch": 1.0023606495362967, + "grad_norm": 7.362485803241725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206680 + }, + { + "epoch": 1.0024091477291328, + "grad_norm": 9.622839343137457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206690 + }, + { + "epoch": 1.0024576459219687, + "grad_norm": 7.213282628981688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206700 + }, + { + "epoch": 1.0025061441148049, + "grad_norm": 7.400739718832483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206710 + }, + { + "epoch": 1.002554642307641, + "grad_norm": 7.664912686777825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206720 + }, + { + "epoch": 1.0026031405004772, + "grad_norm": 8.530078048352152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206730 + }, + { + "epoch": 1.002651638693313, + "grad_norm": 9.483927669862169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206740 + }, + { + "epoch": 1.0027001368861492, + "grad_norm": 7.535376425948925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206750 + }, + { + "epoch": 1.0027486350789854, + "grad_norm": 7.200969776022248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206760 + }, + { + "epoch": 1.0027971332718215, + "grad_norm": 7.203587415460788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206770 + }, + { + "epoch": 1.0028456314646574, + "grad_norm": 1.2661663504331955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206780 + }, + { + "epoch": 1.0028941296574936, + "grad_norm": 8.988316153590858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206790 + }, + { + "epoch": 1.0029426278503297, + "grad_norm": 7.200638378890289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206800 + }, + { + "epoch": 1.0029911260431659, + "grad_norm": 8.559273396713252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206810 + }, + { + "epoch": 1.003039624236002, + "grad_norm": 1.4667862160422374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206820 + }, + { + "epoch": 1.003088122428838, + "grad_norm": 6.564798695762875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206830 + }, + { + "epoch": 1.003136620621674, + "grad_norm": 8.683530268172035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206840 + }, + { + "epoch": 1.0031851188145102, + "grad_norm": 6.705282089569664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206850 + }, + { + "epoch": 1.0032336170073464, + "grad_norm": 3.0352932753885398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206860 + }, + { + "epoch": 1.0032821152001823, + "grad_norm": 6.62254990402289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206870 + }, + { + "epoch": 1.0033306133930184, + "grad_norm": 6.112583150752471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206880 + }, + { + "epoch": 1.0033791115858546, + "grad_norm": 9.62323838393786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206890 + }, + { + "epoch": 1.0034276097786907, + "grad_norm": 6.570350024048821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206900 + }, + { + "epoch": 1.0034761079715266, + "grad_norm": 6.1339130752458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206910 + }, + { + "epoch": 1.0035246061643628, + "grad_norm": 6.978140163482749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206920 + }, + { + "epoch": 1.003573104357199, + "grad_norm": 1.161338673227874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206930 + }, + { + "epoch": 1.003621602550035, + "grad_norm": 7.838420401640178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206940 + }, + { + "epoch": 1.003670100742871, + "grad_norm": 6.009529442962958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206950 + }, + { + "epoch": 1.0037185989357071, + "grad_norm": 6.723790306750743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206960 + }, + { + "epoch": 1.0037670971285433, + "grad_norm": 6.041363462827576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206970 + }, + { + "epoch": 1.0038155953213794, + "grad_norm": 6.316494705060904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206980 + }, + { + "epoch": 1.0038640935142153, + "grad_norm": 8.552796657568251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 206990 + }, + { + "epoch": 1.0039125917070515, + "grad_norm": 6.273635335674044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207000 + }, + { + "epoch": 1.0039610898998876, + "grad_norm": 5.972075314275571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207010 + }, + { + "epoch": 1.0040095880927238, + "grad_norm": 7.072821972542442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207020 + }, + { + "epoch": 1.0040580862855597, + "grad_norm": 1.330192617388093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207030 + }, + { + "epoch": 1.0041065844783958, + "grad_norm": 7.413347020701622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207040 + }, + { + "epoch": 1.004155082671232, + "grad_norm": 5.625444146062364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207050 + }, + { + "epoch": 1.0042035808640681, + "grad_norm": 5.892420631425921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207060 + }, + { + "epoch": 1.004252079056904, + "grad_norm": 6.311047968665662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207070 + }, + { + "epoch": 1.0043005772497402, + "grad_norm": 6.456253345277219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207080 + }, + { + "epoch": 1.0043490754425763, + "grad_norm": 7.096403464856849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207090 + }, + { + "epoch": 1.0043975736354125, + "grad_norm": 6.214146992533642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207100 + }, + { + "epoch": 1.0044460718282484, + "grad_norm": 6.002627515044878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207110 + }, + { + "epoch": 1.0044945700210846, + "grad_norm": 5.786184260614391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207120 + }, + { + "epoch": 1.0045430682139207, + "grad_norm": 5.840034873472177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207130 + }, + { + "epoch": 1.0045915664067568, + "grad_norm": 7.151424483708979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207140 + }, + { + "epoch": 1.0046400645995928, + "grad_norm": 5.60896751267137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207150 + }, + { + "epoch": 1.004688562792429, + "grad_norm": 5.325043730408652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207160 + }, + { + "epoch": 1.004737060985265, + "grad_norm": 5.100565090288001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207170 + }, + { + "epoch": 1.0047855591781012, + "grad_norm": 5.227954034126014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207180 + }, + { + "epoch": 1.0048340573709371, + "grad_norm": 7.57706800413871e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207190 + }, + { + "epoch": 1.0048825555637733, + "grad_norm": 5.345123668121232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207200 + }, + { + "epoch": 1.0049310537566094, + "grad_norm": 5.378040555115149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207210 + }, + { + "epoch": 1.0049795519494455, + "grad_norm": 5.795985202894371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207220 + }, + { + "epoch": 1.0050280501422815, + "grad_norm": 5.49909657365788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207230 + }, + { + "epoch": 1.0050765483351176, + "grad_norm": 6.671483561149216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207240 + }, + { + "epoch": 1.0051250465279538, + "grad_norm": 5.457024485622242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207250 + }, + { + "epoch": 1.00517354472079, + "grad_norm": 4.952823928761063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207260 + }, + { + "epoch": 1.0052220429136258, + "grad_norm": 4.842591465603618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207270 + }, + { + "epoch": 1.005270541106462, + "grad_norm": 4.945475780004926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207280 + }, + { + "epoch": 1.0053190392992981, + "grad_norm": 6.349182513076812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207290 + }, + { + "epoch": 1.0053675374921343, + "grad_norm": 4.772765009875002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207300 + }, + { + "epoch": 1.0054160356849704, + "grad_norm": 1.4752495189895853e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207310 + }, + { + "epoch": 1.0054645338778063, + "grad_norm": 4.6842808387737023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207320 + }, + { + "epoch": 1.0055130320706425, + "grad_norm": 5.177048478799406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207330 + }, + { + "epoch": 1.0055615302634786, + "grad_norm": 5.983430924061395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207340 + }, + { + "epoch": 1.0056100284563148, + "grad_norm": 4.645780791179277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207350 + }, + { + "epoch": 1.0056585266491507, + "grad_norm": 4.6633422812192293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207360 + }, + { + "epoch": 1.0057070248419868, + "grad_norm": 5.26778933362948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207370 + }, + { + "epoch": 1.005755523034823, + "grad_norm": 4.78109654977743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207380 + }, + { + "epoch": 1.005804021227659, + "grad_norm": 5.985680786579906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207390 + }, + { + "epoch": 1.005852519420495, + "grad_norm": 4.867717393608473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207400 + }, + { + "epoch": 1.0059010176133312, + "grad_norm": 4.7027751293171605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207410 + }, + { + "epoch": 1.0059495158061673, + "grad_norm": 4.6058551106398227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207420 + }, + { + "epoch": 1.0059980139990035, + "grad_norm": 5.053107656749489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207430 + }, + { + "epoch": 1.0060465121918394, + "grad_norm": 5.811898518004455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207440 + }, + { + "epoch": 1.0060950103846755, + "grad_norm": 6.570497816937859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207450 + }, + { + "epoch": 1.0061435085775117, + "grad_norm": 4.7589378482371103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207460 + }, + { + "epoch": 1.0061920067703478, + "grad_norm": 4.3516405412447057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207470 + }, + { + "epoch": 1.0062405049631837, + "grad_norm": 6.722711987094954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207480 + }, + { + "epoch": 1.0062890031560199, + "grad_norm": 7.470198966075259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207490 + }, + { + "epoch": 1.006337501348856, + "grad_norm": 4.501442845139536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207500 + }, + { + "epoch": 1.0063859995416922, + "grad_norm": 5.126186692905321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207510 + }, + { + "epoch": 1.006434497734528, + "grad_norm": 4.6914240670048457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207520 + }, + { + "epoch": 1.0064829959273642, + "grad_norm": 5.122387847222853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207530 + }, + { + "epoch": 1.0065314941202004, + "grad_norm": 5.112887038194458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207540 + }, + { + "epoch": 1.0065799923130365, + "grad_norm": 4.2155886603723047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207550 + }, + { + "epoch": 1.0066284905058724, + "grad_norm": 4.0240431076199457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207560 + }, + { + "epoch": 1.0066769886987086, + "grad_norm": 4.487706917188916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207570 + }, + { + "epoch": 1.0067254868915447, + "grad_norm": 4.461322191673389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207580 + }, + { + "epoch": 1.0067739850843809, + "grad_norm": 5.310812980496848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207590 + }, + { + "epoch": 1.0068224832772168, + "grad_norm": 4.17219808923619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207600 + }, + { + "epoch": 1.006870981470053, + "grad_norm": 4.1033186448657943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207610 + }, + { + "epoch": 1.006919479662889, + "grad_norm": 4.093464553989179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207620 + }, + { + "epoch": 1.0069679778557252, + "grad_norm": 4.082419025053241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207630 + }, + { + "epoch": 1.0070164760485611, + "grad_norm": 4.959698571838089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207640 + }, + { + "epoch": 1.0070649742413973, + "grad_norm": 4.020528479031782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207650 + }, + { + "epoch": 1.0071134724342334, + "grad_norm": 4.653725511616358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207660 + }, + { + "epoch": 1.0071619706270696, + "grad_norm": 4.026912563404039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207670 + }, + { + "epoch": 1.0072104688199055, + "grad_norm": 4.1188812360815064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207680 + }, + { + "epoch": 1.0072589670127416, + "grad_norm": 5.102990598970791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207690 + }, + { + "epoch": 1.0073074652055778, + "grad_norm": 4.206201822398725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207700 + }, + { + "epoch": 1.007355963398414, + "grad_norm": 4.003166509392031e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207710 + }, + { + "epoch": 1.0074044615912499, + "grad_norm": 4.0516681565350154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207720 + }, + { + "epoch": 1.007452959784086, + "grad_norm": 3.6984860685151943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207730 + }, + { + "epoch": 1.0075014579769221, + "grad_norm": 4.942227178617031e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207740 + }, + { + "epoch": 1.0075499561697583, + "grad_norm": 3.658074092527386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207750 + }, + { + "epoch": 1.0075984543625942, + "grad_norm": 5.293008484841266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207760 + }, + { + "epoch": 1.0076469525554304, + "grad_norm": 3.7529625274146383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207770 + }, + { + "epoch": 1.0076954507482665, + "grad_norm": 4.062898426582251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207780 + }, + { + "epoch": 1.0077439489411026, + "grad_norm": 4.232656749536545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207790 + }, + { + "epoch": 1.0077924471339386, + "grad_norm": 3.6418072113519884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207800 + }, + { + "epoch": 1.0078409453267747, + "grad_norm": 3.739638714250759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207810 + }, + { + "epoch": 1.0078894435196109, + "grad_norm": 3.664791847768356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207820 + }, + { + "epoch": 1.007937941712447, + "grad_norm": 3.5740600878853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207830 + }, + { + "epoch": 1.0079864399052831, + "grad_norm": 4.410642873153847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207840 + }, + { + "epoch": 1.008034938098119, + "grad_norm": 3.236983445731312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207850 + }, + { + "epoch": 1.0080834362909552, + "grad_norm": 3.506441998979426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207860 + }, + { + "epoch": 1.0081319344837913, + "grad_norm": 3.514352329148096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207870 + }, + { + "epoch": 1.0081804326766275, + "grad_norm": 4.047065544909856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207880 + }, + { + "epoch": 1.0082289308694634, + "grad_norm": 4.1252332039221074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207890 + }, + { + "epoch": 1.0082774290622996, + "grad_norm": 3.3949638122976467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207900 + }, + { + "epoch": 1.0083259272551357, + "grad_norm": 3.730909270416305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207910 + }, + { + "epoch": 1.0083744254479718, + "grad_norm": 3.175653944254009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207920 + }, + { + "epoch": 1.0084229236408078, + "grad_norm": 3.561013670605462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207930 + }, + { + "epoch": 1.008471421833644, + "grad_norm": 3.774199228701036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207940 + }, + { + "epoch": 1.00851992002648, + "grad_norm": 3.326727266994567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207950 + }, + { + "epoch": 1.0085684182193162, + "grad_norm": 3.1651109111408005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207960 + }, + { + "epoch": 1.0086169164121521, + "grad_norm": 3.3690866985125467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207970 + }, + { + "epoch": 1.0086654146049883, + "grad_norm": 3.5817501498058846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207980 + }, + { + "epoch": 1.0087139127978244, + "grad_norm": 3.8248612099778256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 207990 + }, + { + "epoch": 1.0087624109906606, + "grad_norm": 3.0503161951855873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208000 + }, + { + "epoch": 1.0088109091834965, + "grad_norm": 5.260910711513134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208010 + }, + { + "epoch": 1.0088594073763326, + "grad_norm": 3.293131669579452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208020 + }, + { + "epoch": 1.0089079055691688, + "grad_norm": 5.17391470111761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208030 + }, + { + "epoch": 1.008956403762005, + "grad_norm": 3.5405236076258007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208040 + }, + { + "epoch": 1.0090049019548408, + "grad_norm": 3.090068219080422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208050 + }, + { + "epoch": 1.009053400147677, + "grad_norm": 3.0599804290432076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208060 + }, + { + "epoch": 1.0091018983405131, + "grad_norm": 2.813348203289934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208070 + }, + { + "epoch": 1.0091503965333493, + "grad_norm": 3.155877266181051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208080 + }, + { + "epoch": 1.0091988947261852, + "grad_norm": 3.67382966715013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208090 + }, + { + "epoch": 1.0092473929190213, + "grad_norm": 3.10761095079215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208100 + }, + { + "epoch": 1.0092958911118575, + "grad_norm": 2.866037505100394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208110 + }, + { + "epoch": 1.0093443893046936, + "grad_norm": 2.9451305749716994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208120 + }, + { + "epoch": 1.0093928874975295, + "grad_norm": 2.883998888592032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208130 + }, + { + "epoch": 1.0094413856903657, + "grad_norm": 5.593547598436999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208140 + }, + { + "epoch": 1.0094898838832018, + "grad_norm": 2.8830402243329445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208150 + }, + { + "epoch": 1.009538382076038, + "grad_norm": 3.026206627509964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208160 + }, + { + "epoch": 1.0095868802688739, + "grad_norm": 3.1677205925007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208170 + }, + { + "epoch": 1.00963537846171, + "grad_norm": 2.818435689277976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208180 + }, + { + "epoch": 1.0096838766545462, + "grad_norm": 3.784491298119974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208190 + }, + { + "epoch": 1.0097323748473823, + "grad_norm": 2.8964151965737983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208200 + }, + { + "epoch": 1.0097808730402182, + "grad_norm": 2.6310746648050554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208210 + }, + { + "epoch": 1.0098293712330544, + "grad_norm": 2.8374492444527277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208220 + }, + { + "epoch": 1.0098778694258905, + "grad_norm": 2.688796314487263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208230 + }, + { + "epoch": 1.0099263676187267, + "grad_norm": 3.076262373724603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208240 + }, + { + "epoch": 1.0099748658115626, + "grad_norm": 2.623706336635223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208250 + }, + { + "epoch": 1.0100233640043987, + "grad_norm": 2.584963567642262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208260 + }, + { + "epoch": 1.0100718621972349, + "grad_norm": 2.6256410023961507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208270 + }, + { + "epoch": 1.010120360390071, + "grad_norm": 2.693735723369173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208280 + }, + { + "epoch": 1.010168858582907, + "grad_norm": 2.995290913077042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208290 + }, + { + "epoch": 1.010217356775743, + "grad_norm": 2.5308938234047673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208300 + }, + { + "epoch": 1.0102658549685792, + "grad_norm": 2.678628447938536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208310 + }, + { + "epoch": 1.0103143531614154, + "grad_norm": 2.629960249578289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208320 + }, + { + "epoch": 1.0103628513542513, + "grad_norm": 2.427427716611419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208330 + }, + { + "epoch": 1.0104113495470874, + "grad_norm": 2.9735122097918065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208340 + }, + { + "epoch": 1.0104598477399236, + "grad_norm": 2.3537310767096642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208350 + }, + { + "epoch": 1.0105083459327597, + "grad_norm": 2.762747612905514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208360 + }, + { + "epoch": 1.0105568441255959, + "grad_norm": 2.3327318388055573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208370 + }, + { + "epoch": 1.0106053423184318, + "grad_norm": 2.3835274021166697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208380 + }, + { + "epoch": 1.010653840511268, + "grad_norm": 2.7335400432093593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208390 + }, + { + "epoch": 1.010702338704104, + "grad_norm": 2.4027946210480877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208400 + }, + { + "epoch": 1.0107508368969402, + "grad_norm": 2.342066522942332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208410 + }, + { + "epoch": 1.0107993350897762, + "grad_norm": 2.4970353251774213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208420 + }, + { + "epoch": 1.0108478332826123, + "grad_norm": 2.3000578153187234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208430 + }, + { + "epoch": 1.0108963314754484, + "grad_norm": 3.337349596677086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208440 + }, + { + "epoch": 1.0109448296682846, + "grad_norm": 2.2969943813677673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208450 + }, + { + "epoch": 1.0109933278611205, + "grad_norm": 2.4499271944478096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208460 + }, + { + "epoch": 1.0110418260539566, + "grad_norm": 3.3417808253943804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208470 + }, + { + "epoch": 1.0110903242467928, + "grad_norm": 2.2593438586682169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208480 + }, + { + "epoch": 1.011138822439629, + "grad_norm": 2.813278570101829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208490 + }, + { + "epoch": 1.0111873206324649, + "grad_norm": 2.1961142238069442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208500 + }, + { + "epoch": 1.011235818825301, + "grad_norm": 3.260634287016728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208510 + }, + { + "epoch": 1.0112843170181371, + "grad_norm": 2.308310769194577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208520 + }, + { + "epoch": 1.0113328152109733, + "grad_norm": 2.308993174438001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208530 + }, + { + "epoch": 1.0113813134038092, + "grad_norm": 2.408291663869022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208540 + }, + { + "epoch": 1.0114298115966454, + "grad_norm": 2.2453879466866056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208550 + }, + { + "epoch": 1.0114783097894815, + "grad_norm": 1.9767206538290338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208560 + }, + { + "epoch": 1.0115268079823176, + "grad_norm": 2.2082950579260796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208570 + }, + { + "epoch": 1.0115753061751536, + "grad_norm": 2.1695056773296528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208580 + }, + { + "epoch": 1.0116238043679897, + "grad_norm": 2.3280570360384445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208590 + }, + { + "epoch": 1.0116723025608259, + "grad_norm": 2.1980903852636402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208600 + }, + { + "epoch": 1.011720800753662, + "grad_norm": 2.989163192523847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208610 + }, + { + "epoch": 1.011769298946498, + "grad_norm": 2.15016385141098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208620 + }, + { + "epoch": 1.011817797139334, + "grad_norm": 5.602668124993215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208630 + }, + { + "epoch": 1.0118662953321702, + "grad_norm": 2.467598960720352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208640 + }, + { + "epoch": 1.0119147935250064, + "grad_norm": 2.10740225270456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208650 + }, + { + "epoch": 1.0119632917178423, + "grad_norm": 2.0531768996079336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208660 + }, + { + "epoch": 1.0120117899106784, + "grad_norm": 2.1371951675064338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208670 + }, + { + "epoch": 1.0120602881035146, + "grad_norm": 1.1700042250595288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208680 + }, + { + "epoch": 1.0121087862963507, + "grad_norm": 2.2146919320675806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208690 + }, + { + "epoch": 1.0121572844891866, + "grad_norm": 2.413719926153135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208700 + }, + { + "epoch": 1.0122057826820228, + "grad_norm": 1.9606802936777967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208710 + }, + { + "epoch": 1.012254280874859, + "grad_norm": 2.084480144048939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208720 + }, + { + "epoch": 1.012302779067695, + "grad_norm": 2.0145924395365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208730 + }, + { + "epoch": 1.012351277260531, + "grad_norm": 2.395742626504216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208740 + }, + { + "epoch": 1.0123997754533671, + "grad_norm": 2.3481709376937943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208750 + }, + { + "epoch": 1.0124482736462033, + "grad_norm": 2.0501106234860345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208760 + }, + { + "epoch": 1.0124967718390394, + "grad_norm": 1.941709797392832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208770 + }, + { + "epoch": 1.0125452700318753, + "grad_norm": 1.8838302651147387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208780 + }, + { + "epoch": 1.0125937682247115, + "grad_norm": 2.191668215800746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208790 + }, + { + "epoch": 1.0126422664175476, + "grad_norm": 2.3598177278927324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208800 + }, + { + "epoch": 1.0126907646103838, + "grad_norm": 1.914972500571821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208810 + }, + { + "epoch": 1.0127392628032197, + "grad_norm": 1.8877658192195668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208820 + }, + { + "epoch": 1.0127877609960558, + "grad_norm": 1.8510765187329525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208830 + }, + { + "epoch": 1.012836259188892, + "grad_norm": 2.3086431610863656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208840 + }, + { + "epoch": 1.0128847573817281, + "grad_norm": 1.7423838016839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208850 + }, + { + "epoch": 1.0129332555745643, + "grad_norm": 2.1034605879322044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208860 + }, + { + "epoch": 1.0129817537674002, + "grad_norm": 2.2915783404187096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208870 + }, + { + "epoch": 1.0130302519602363, + "grad_norm": 1.8533950196797377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208880 + }, + { + "epoch": 1.0130787501530725, + "grad_norm": 1.9656616245811165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208890 + }, + { + "epoch": 1.0131272483459086, + "grad_norm": 1.7654348027917877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208900 + }, + { + "epoch": 1.0131757465387445, + "grad_norm": 1.7363367987854872e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208910 + }, + { + "epoch": 1.0132242447315807, + "grad_norm": 1.7347052505556348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208920 + }, + { + "epoch": 1.0132727429244168, + "grad_norm": 1.772660169763185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208930 + }, + { + "epoch": 1.013321241117253, + "grad_norm": 1.971758933905221e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208940 + }, + { + "epoch": 1.013369739310089, + "grad_norm": 1.7286116360537562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208950 + }, + { + "epoch": 1.013418237502925, + "grad_norm": 1.6734976782117883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208960 + }, + { + "epoch": 1.0134667356957612, + "grad_norm": 1.6807113922823191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208970 + }, + { + "epoch": 1.0135152338885973, + "grad_norm": 1.7105300287312275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208980 + }, + { + "epoch": 1.0135637320814332, + "grad_norm": 1.9325649702750525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 208990 + }, + { + "epoch": 1.0136122302742694, + "grad_norm": 1.6485162745993875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209000 + }, + { + "epoch": 1.0136607284671055, + "grad_norm": 2.0379171417062025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209010 + }, + { + "epoch": 1.0137092266599417, + "grad_norm": 1.7015953801546857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209020 + }, + { + "epoch": 1.0137577248527776, + "grad_norm": 1.5939343711579568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209030 + }, + { + "epoch": 1.0138062230456137, + "grad_norm": 1.6241385480952886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209040 + }, + { + "epoch": 1.0138547212384499, + "grad_norm": 1.6804108327050926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209050 + }, + { + "epoch": 1.013903219431286, + "grad_norm": 1.6416625214787928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209060 + }, + { + "epoch": 1.013951717624122, + "grad_norm": 1.671034226546908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209070 + }, + { + "epoch": 1.014000215816958, + "grad_norm": 1.9047419641538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209080 + }, + { + "epoch": 1.0140487140097942, + "grad_norm": 1.638832145545166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209090 + }, + { + "epoch": 1.0140972122026304, + "grad_norm": 1.5528542007814394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209100 + }, + { + "epoch": 1.0141457103954663, + "grad_norm": 1.5734579505988222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209110 + }, + { + "epoch": 1.0141942085883024, + "grad_norm": 1.914306011485678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209120 + }, + { + "epoch": 1.0142427067811386, + "grad_norm": 1.8316045213850884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209130 + }, + { + "epoch": 1.0142912049739747, + "grad_norm": 1.675379053267534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209140 + }, + { + "epoch": 1.0143397031668107, + "grad_norm": 1.646500322749489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209150 + }, + { + "epoch": 1.0143882013596468, + "grad_norm": 1.5930898200622323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209160 + }, + { + "epoch": 1.014436699552483, + "grad_norm": 1.5523623630997463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209170 + }, + { + "epoch": 1.014485197745319, + "grad_norm": 1.5832773669899325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209180 + }, + { + "epoch": 1.014533695938155, + "grad_norm": 1.8176336880060262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209190 + }, + { + "epoch": 1.0145821941309912, + "grad_norm": 1.471403550112882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209200 + }, + { + "epoch": 1.0146306923238273, + "grad_norm": 1.528789113081075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209210 + }, + { + "epoch": 1.0146791905166634, + "grad_norm": 1.4542403903305967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209220 + }, + { + "epoch": 1.0147276887094994, + "grad_norm": 1.477740596556032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209230 + }, + { + "epoch": 1.0147761869023355, + "grad_norm": 1.451975748523182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209240 + }, + { + "epoch": 1.0148246850951717, + "grad_norm": 2.7193226515009883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209250 + }, + { + "epoch": 1.0148731832880078, + "grad_norm": 8.018728863135038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209260 + }, + { + "epoch": 1.0149216814808437, + "grad_norm": 1.4712752260948037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209270 + }, + { + "epoch": 1.0149701796736799, + "grad_norm": 1.5700253186423652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209280 + }, + { + "epoch": 1.015018677866516, + "grad_norm": 4.165847258263966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209290 + }, + { + "epoch": 1.0150671760593521, + "grad_norm": 1.7542159014283243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209300 + }, + { + "epoch": 1.015115674252188, + "grad_norm": 1.457025433637682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209310 + }, + { + "epoch": 1.0151641724450242, + "grad_norm": 1.5469363745523879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209320 + }, + { + "epoch": 1.0152126706378604, + "grad_norm": 1.560957514357142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209330 + }, + { + "epoch": 1.0152611688306965, + "grad_norm": 1.5022929744645808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209340 + }, + { + "epoch": 1.0153096670235326, + "grad_norm": 1.471315727030742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209350 + }, + { + "epoch": 1.0153581652163686, + "grad_norm": 1.7798481621866813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209360 + }, + { + "epoch": 1.0154066634092047, + "grad_norm": 1.401985372240233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209370 + }, + { + "epoch": 1.0154551616020409, + "grad_norm": 1.6335032171355124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209380 + }, + { + "epoch": 1.015503659794877, + "grad_norm": 1.5959582810864958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209390 + }, + { + "epoch": 1.015552157987713, + "grad_norm": 1.4637885215051938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209400 + }, + { + "epoch": 1.015600656180549, + "grad_norm": 1.7686637932001759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209410 + }, + { + "epoch": 1.0156491543733852, + "grad_norm": 1.4812120241458615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209420 + }, + { + "epoch": 1.0156976525662214, + "grad_norm": 1.4041589224689233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209430 + }, + { + "epoch": 1.0157461507590573, + "grad_norm": 1.456558038626099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209440 + }, + { + "epoch": 1.0157946489518934, + "grad_norm": 1.3700034173780296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209450 + }, + { + "epoch": 1.0158431471447296, + "grad_norm": 3.556465344445314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209460 + }, + { + "epoch": 1.0158916453375657, + "grad_norm": 1.554961670535704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209470 + }, + { + "epoch": 1.0159401435304016, + "grad_norm": 2.640106231410755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209480 + }, + { + "epoch": 1.0159886417232378, + "grad_norm": 1.5674623909944785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209490 + }, + { + "epoch": 1.016037139916074, + "grad_norm": 1.3826519307258422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209500 + }, + { + "epoch": 1.01608563810891, + "grad_norm": 1.3814472765716346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209510 + }, + { + "epoch": 1.016134136301746, + "grad_norm": 1.310667414600175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209520 + }, + { + "epoch": 1.0161826344945821, + "grad_norm": 2.2932354681870493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209530 + }, + { + "epoch": 1.0162311326874183, + "grad_norm": 1.366912414368926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209540 + }, + { + "epoch": 1.0162796308802544, + "grad_norm": 1.3084998329304653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209550 + }, + { + "epoch": 1.0163281290730903, + "grad_norm": 1.2998165743738355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209560 + }, + { + "epoch": 1.0163766272659265, + "grad_norm": 2.310886202394613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209570 + }, + { + "epoch": 1.0164251254587626, + "grad_norm": 1.3011609212298936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209580 + }, + { + "epoch": 1.0164736236515988, + "grad_norm": 1.9819806595933187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209590 + }, + { + "epoch": 1.0165221218444347, + "grad_norm": 1.47680111695081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209600 + }, + { + "epoch": 1.0165706200372708, + "grad_norm": 1.3892183403640956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209610 + }, + { + "epoch": 1.016619118230107, + "grad_norm": 1.6050366014042083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209620 + }, + { + "epoch": 1.0166676164229431, + "grad_norm": 1.379686409563874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209630 + }, + { + "epoch": 1.016716114615779, + "grad_norm": 1.293209805908191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209640 + }, + { + "epoch": 1.0167646128086152, + "grad_norm": 1.3389106356953562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209650 + }, + { + "epoch": 1.0168131110014513, + "grad_norm": 1.273718339689367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209660 + }, + { + "epoch": 1.0168616091942875, + "grad_norm": 1.405388871944524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209670 + }, + { + "epoch": 1.0169101073871234, + "grad_norm": 1.2768222745762614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209680 + }, + { + "epoch": 1.0169586055799595, + "grad_norm": 1.2421125461514748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209690 + }, + { + "epoch": 1.0170071037727957, + "grad_norm": 1.3065941573131568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209700 + }, + { + "epoch": 1.0170556019656318, + "grad_norm": 1.6800656510440604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209710 + }, + { + "epoch": 1.0171041001584677, + "grad_norm": 1.2189310893973015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209720 + }, + { + "epoch": 1.017152598351304, + "grad_norm": 1.2211650357585313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209730 + }, + { + "epoch": 1.01720109654414, + "grad_norm": 1.267275706595683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209740 + }, + { + "epoch": 1.0172495947369762, + "grad_norm": 1.260568467387202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209750 + }, + { + "epoch": 1.017298092929812, + "grad_norm": 1.299688960898493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209760 + }, + { + "epoch": 1.0173465911226482, + "grad_norm": 1.3056920522558357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209770 + }, + { + "epoch": 1.0173950893154844, + "grad_norm": 1.214020102224822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209780 + }, + { + "epoch": 1.0174435875083205, + "grad_norm": 6.702237556055479e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209790 + }, + { + "epoch": 1.0174920857011565, + "grad_norm": 1.2442650643151865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209800 + }, + { + "epoch": 1.0175405838939926, + "grad_norm": 1.3370798512823967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209810 + }, + { + "epoch": 1.0175890820868287, + "grad_norm": 1.2829239892653277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209820 + }, + { + "epoch": 1.0176375802796649, + "grad_norm": 1.291896580823959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209830 + }, + { + "epoch": 1.0176860784725008, + "grad_norm": 1.2642593105738342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209840 + }, + { + "epoch": 1.017734576665337, + "grad_norm": 1.2373541835586366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209850 + }, + { + "epoch": 1.017783074858173, + "grad_norm": 1.2626020406969474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209860 + }, + { + "epoch": 1.0178315730510092, + "grad_norm": 1.8570742099655035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209870 + }, + { + "epoch": 1.0178800712438454, + "grad_norm": 1.24451787542057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209880 + }, + { + "epoch": 1.0179285694366813, + "grad_norm": 1.2386561820676434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209890 + }, + { + "epoch": 1.0179770676295175, + "grad_norm": 1.2549959649277298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209900 + }, + { + "epoch": 1.0180255658223536, + "grad_norm": 1.348930567246498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209910 + }, + { + "epoch": 1.0180740640151897, + "grad_norm": 1.2358212586605077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209920 + }, + { + "epoch": 1.0181225622080257, + "grad_norm": 1.3588687863830273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209930 + }, + { + "epoch": 1.0181710604008618, + "grad_norm": 1.1792574383662213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209940 + }, + { + "epoch": 1.018219558593698, + "grad_norm": 1.199480976765699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209950 + }, + { + "epoch": 1.018268056786534, + "grad_norm": 2.6712751832747017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209960 + }, + { + "epoch": 1.01831655497937, + "grad_norm": 1.5472961933937768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209970 + }, + { + "epoch": 1.0183650531722062, + "grad_norm": 1.1806589839125081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209980 + }, + { + "epoch": 1.0184135513650423, + "grad_norm": 1.2402531979205378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 209990 + }, + { + "epoch": 1.0184620495578784, + "grad_norm": 1.300114291780119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210000 + }, + { + "epoch": 1.0185105477507144, + "grad_norm": 1.3915760632698948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210010 + }, + { + "epoch": 1.0185590459435505, + "grad_norm": 1.2170279717338417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210020 + }, + { + "epoch": 1.0186075441363867, + "grad_norm": 1.2236351665251277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210030 + }, + { + "epoch": 1.0186560423292228, + "grad_norm": 1.1975598113167507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210040 + }, + { + "epoch": 1.0187045405220587, + "grad_norm": 1.262074391661372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210050 + }, + { + "epoch": 1.0187530387148949, + "grad_norm": 1.2159011930634733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210060 + }, + { + "epoch": 1.018801536907731, + "grad_norm": 1.1271306732396624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210070 + }, + { + "epoch": 1.0188500351005672, + "grad_norm": 1.2011493311092636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210080 + }, + { + "epoch": 1.018898533293403, + "grad_norm": 2.648819759087928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210090 + }, + { + "epoch": 1.0189470314862392, + "grad_norm": 1.1573109048867991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210100 + }, + { + "epoch": 1.0189955296790754, + "grad_norm": 1.7117479274020297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210110 + }, + { + "epoch": 1.0190440278719115, + "grad_norm": 1.1502164909416024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210120 + }, + { + "epoch": 1.0190925260647474, + "grad_norm": 1.1945311939598469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210130 + }, + { + "epoch": 1.0191410242575836, + "grad_norm": 1.4088610100770893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210140 + }, + { + "epoch": 1.0191895224504197, + "grad_norm": 1.199260424300519e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210150 + }, + { + "epoch": 1.0192380206432559, + "grad_norm": 1.1770620034212698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210160 + }, + { + "epoch": 1.0192865188360918, + "grad_norm": 1.183057634079887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210170 + }, + { + "epoch": 1.019335017028928, + "grad_norm": 1.1554804757452075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210180 + }, + { + "epoch": 1.019383515221764, + "grad_norm": 1.225426444761979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210190 + }, + { + "epoch": 1.0194320134146002, + "grad_norm": 1.1855993164999745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210200 + }, + { + "epoch": 1.0194805116074361, + "grad_norm": 1.1775418329307286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210210 + }, + { + "epoch": 1.0195290098002723, + "grad_norm": 1.6408725400651747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210220 + }, + { + "epoch": 1.0195775079931084, + "grad_norm": 1.3258667763693666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210230 + }, + { + "epoch": 1.0196260061859446, + "grad_norm": 1.0730431654337735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210240 + }, + { + "epoch": 1.0196745043787805, + "grad_norm": 1.1024538082438085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210250 + }, + { + "epoch": 1.0197230025716166, + "grad_norm": 1.1394828902666632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210260 + }, + { + "epoch": 1.0197715007644528, + "grad_norm": 1.1830737633999888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210270 + }, + { + "epoch": 1.019819998957289, + "grad_norm": 1.0323616095320176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210280 + }, + { + "epoch": 1.0198684971501248, + "grad_norm": 1.1339983529978781e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210290 + }, + { + "epoch": 1.019916995342961, + "grad_norm": 1.1017242940170036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210300 + }, + { + "epoch": 1.0199654935357971, + "grad_norm": 3.456927402112342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210310 + }, + { + "epoch": 1.0200139917286333, + "grad_norm": 1.1202016736433507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210320 + }, + { + "epoch": 1.0200624899214692, + "grad_norm": 1.1180749481809471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210330 + }, + { + "epoch": 1.0201109881143053, + "grad_norm": 1.1229797536316255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210340 + }, + { + "epoch": 1.0201594863071415, + "grad_norm": 1.0825402085856695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210350 + }, + { + "epoch": 1.0202079844999776, + "grad_norm": 1.1205841587980103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210360 + }, + { + "epoch": 1.0202564826928135, + "grad_norm": 1.098717419267814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210370 + }, + { + "epoch": 1.0203049808856497, + "grad_norm": 1.1221353446444482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210380 + }, + { + "epoch": 1.0203534790784858, + "grad_norm": 1.2538110638615763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210390 + }, + { + "epoch": 1.020401977271322, + "grad_norm": 1.1601851923614959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210400 + }, + { + "epoch": 1.0204504754641581, + "grad_norm": 1.0885943879657134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210410 + }, + { + "epoch": 1.020498973656994, + "grad_norm": 1.1164036095578922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210420 + }, + { + "epoch": 1.0205474718498302, + "grad_norm": 1.054866842764568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210430 + }, + { + "epoch": 1.0205959700426663, + "grad_norm": 1.1884576878173903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210440 + }, + { + "epoch": 1.0206444682355025, + "grad_norm": 1.092422934334536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210450 + }, + { + "epoch": 1.0206929664283384, + "grad_norm": 1.0824097529393839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210460 + }, + { + "epoch": 1.0207414646211745, + "grad_norm": 1.0900892988274791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210470 + }, + { + "epoch": 1.0207899628140107, + "grad_norm": 1.2340447597125603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210480 + }, + { + "epoch": 1.0208384610068468, + "grad_norm": 1.1587349035835359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210490 + }, + { + "epoch": 1.0208869591996828, + "grad_norm": 1.3647049001974665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210500 + }, + { + "epoch": 1.020935457392519, + "grad_norm": 1.0956834728403919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210510 + }, + { + "epoch": 1.020983955585355, + "grad_norm": 1.063247765387132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210520 + }, + { + "epoch": 1.0210324537781912, + "grad_norm": 1.0388685467432879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210530 + }, + { + "epoch": 1.021080951971027, + "grad_norm": 1.0726772359248571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210540 + }, + { + "epoch": 1.0211294501638633, + "grad_norm": 1.1357125373478993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210550 + }, + { + "epoch": 1.0211779483566994, + "grad_norm": 1.1200728522453574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210560 + }, + { + "epoch": 1.0212264465495355, + "grad_norm": 1.0788151882934471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210570 + }, + { + "epoch": 1.0212749447423715, + "grad_norm": 1.0337176092889422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210580 + }, + { + "epoch": 1.0213234429352076, + "grad_norm": 9.887298801913857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210590 + }, + { + "epoch": 1.0213719411280437, + "grad_norm": 1.1099809427150831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210600 + }, + { + "epoch": 1.02142043932088, + "grad_norm": 1.1207389150058589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210610 + }, + { + "epoch": 1.0214689375137158, + "grad_norm": 1.0540625794419611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210620 + }, + { + "epoch": 1.021517435706552, + "grad_norm": 1.0922366300292197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210630 + }, + { + "epoch": 1.021565933899388, + "grad_norm": 1.0241043923997495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210640 + }, + { + "epoch": 1.0216144320922242, + "grad_norm": 1.0923562143716481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210650 + }, + { + "epoch": 1.0216629302850602, + "grad_norm": 1.1528678101058176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210660 + }, + { + "epoch": 1.0217114284778963, + "grad_norm": 1.1255512077923413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210670 + }, + { + "epoch": 1.0217599266707325, + "grad_norm": 9.971657988216975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210680 + }, + { + "epoch": 1.0218084248635686, + "grad_norm": 2.5465399744462047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210690 + }, + { + "epoch": 1.0218569230564045, + "grad_norm": 1.059245988699331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210700 + }, + { + "epoch": 1.0219054212492407, + "grad_norm": 1.0323468302431138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210710 + }, + { + "epoch": 1.0219539194420768, + "grad_norm": 1.0476944822812584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210720 + }, + { + "epoch": 1.022002417634913, + "grad_norm": 1.8388952582881757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210730 + }, + { + "epoch": 1.0220509158277489, + "grad_norm": 1.0612743039928318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210740 + }, + { + "epoch": 1.022099414020585, + "grad_norm": 1.0351406842801225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210750 + }, + { + "epoch": 1.0221479122134212, + "grad_norm": 1.0280027140652237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210760 + }, + { + "epoch": 1.0221964104062573, + "grad_norm": 1.971258853927793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210770 + }, + { + "epoch": 1.0222449085990932, + "grad_norm": 1.086609913159009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210780 + }, + { + "epoch": 1.0222934067919294, + "grad_norm": 1.0133116035149214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210790 + }, + { + "epoch": 1.0223419049847655, + "grad_norm": 1.1059422178050227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210800 + }, + { + "epoch": 1.0223904031776017, + "grad_norm": 1.0871511335608375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210810 + }, + { + "epoch": 1.0224389013704376, + "grad_norm": 1.065653094656227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210820 + }, + { + "epoch": 1.0224873995632737, + "grad_norm": 1.0144091078245765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210830 + }, + { + "epoch": 1.0225358977561099, + "grad_norm": 9.840802306371188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210840 + }, + { + "epoch": 1.022584395948946, + "grad_norm": 1.0285465634751745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210850 + }, + { + "epoch": 1.022632894141782, + "grad_norm": 1.0859618271297222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210860 + }, + { + "epoch": 1.022681392334618, + "grad_norm": 1.0230979796688189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210870 + }, + { + "epoch": 1.0227298905274542, + "grad_norm": 9.840564274554708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210880 + }, + { + "epoch": 1.0227783887202904, + "grad_norm": 9.90060797789738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210890 + }, + { + "epoch": 1.0228268869131263, + "grad_norm": 1.0117565096834369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210900 + }, + { + "epoch": 1.0228753851059624, + "grad_norm": 1.0166796471366979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210910 + }, + { + "epoch": 1.0229238832987986, + "grad_norm": 1.264883593421473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210920 + }, + { + "epoch": 1.0229723814916347, + "grad_norm": 1.1929550680633838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210930 + }, + { + "epoch": 1.0230208796844709, + "grad_norm": 9.522101862557975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210940 + }, + { + "epoch": 1.0230693778773068, + "grad_norm": 1.0186957410951436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210950 + }, + { + "epoch": 1.023117876070143, + "grad_norm": 2.3566234119698493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210960 + }, + { + "epoch": 1.023166374262979, + "grad_norm": 1.0185602405954342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210970 + }, + { + "epoch": 1.0232148724558152, + "grad_norm": 9.290440772247166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210980 + }, + { + "epoch": 1.0232633706486511, + "grad_norm": 1.345931650575949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 210990 + }, + { + "epoch": 1.0233118688414873, + "grad_norm": 1.0223190116676051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211000 + }, + { + "epoch": 1.0233603670343234, + "grad_norm": 1.0464206923188613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211010 + }, + { + "epoch": 1.0234088652271596, + "grad_norm": 9.90964252878257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211020 + }, + { + "epoch": 1.0234573634199955, + "grad_norm": 1.0390530746917648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211030 + }, + { + "epoch": 1.0235058616128316, + "grad_norm": 9.105033882406133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211040 + }, + { + "epoch": 1.0235543598056678, + "grad_norm": 1.0939697148160121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211050 + }, + { + "epoch": 1.023602857998504, + "grad_norm": 1.1163735536001695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211060 + }, + { + "epoch": 1.0236513561913398, + "grad_norm": 9.641253484460321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211070 + }, + { + "epoch": 1.023699854384176, + "grad_norm": 6.969269634282682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211080 + }, + { + "epoch": 1.0237483525770121, + "grad_norm": 9.837091141662313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211090 + }, + { + "epoch": 1.0237968507698483, + "grad_norm": 1.0191381960567014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211100 + }, + { + "epoch": 1.0238453489626842, + "grad_norm": 1.5962083921294834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211110 + }, + { + "epoch": 1.0238938471555203, + "grad_norm": 9.97283038373098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211120 + }, + { + "epoch": 1.0239423453483565, + "grad_norm": 9.548503498990613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211130 + }, + { + "epoch": 1.0239908435411926, + "grad_norm": 1.0371073955184329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211140 + }, + { + "epoch": 1.0240393417340286, + "grad_norm": 2.804175949222554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211150 + }, + { + "epoch": 1.0240878399268647, + "grad_norm": 9.741302875454494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211160 + }, + { + "epoch": 1.0241363381197008, + "grad_norm": 9.490119623478677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211170 + }, + { + "epoch": 1.024184836312537, + "grad_norm": 8.9361307686886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211180 + }, + { + "epoch": 1.024233334505373, + "grad_norm": 8.847157317859455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211190 + }, + { + "epoch": 1.024281832698209, + "grad_norm": 1.0505207370670178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211200 + }, + { + "epoch": 1.0243303308910452, + "grad_norm": 9.944331225142378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211210 + }, + { + "epoch": 1.0243788290838813, + "grad_norm": 9.813675916348075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211220 + }, + { + "epoch": 1.0244273272767173, + "grad_norm": 9.218486241024948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211230 + }, + { + "epoch": 1.0244758254695534, + "grad_norm": 9.241959020300783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211240 + }, + { + "epoch": 1.0245243236623895, + "grad_norm": 1.0030870356558808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211250 + }, + { + "epoch": 1.0245728218552257, + "grad_norm": 9.495310138163404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211260 + }, + { + "epoch": 1.0246213200480616, + "grad_norm": 1.0104886172257466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211270 + }, + { + "epoch": 1.0246698182408978, + "grad_norm": 8.959299435673529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211280 + }, + { + "epoch": 1.024718316433734, + "grad_norm": 8.866801692875015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211290 + }, + { + "epoch": 1.02476681462657, + "grad_norm": 9.92281954381724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211300 + }, + { + "epoch": 1.024815312819406, + "grad_norm": 9.463449401891921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211310 + }, + { + "epoch": 1.024863811012242, + "grad_norm": 1.0128688643362693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211320 + }, + { + "epoch": 1.0249123092050783, + "grad_norm": 1.1400992150356615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211330 + }, + { + "epoch": 1.0249608073979144, + "grad_norm": 9.385453125787535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211340 + }, + { + "epoch": 1.0250093055907503, + "grad_norm": 9.621739138765406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211350 + }, + { + "epoch": 1.0250578037835865, + "grad_norm": 1.1125129617539642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211360 + }, + { + "epoch": 1.0251063019764226, + "grad_norm": 9.164082115376004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211370 + }, + { + "epoch": 1.0251548001692588, + "grad_norm": 9.337426831734774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211380 + }, + { + "epoch": 1.025203298362095, + "grad_norm": 1.0055305921241597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211390 + }, + { + "epoch": 1.0252517965549308, + "grad_norm": 9.401374967410447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211400 + }, + { + "epoch": 1.025300294747767, + "grad_norm": 9.569257741759429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211410 + }, + { + "epoch": 1.025348792940603, + "grad_norm": 9.585841098669334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211420 + }, + { + "epoch": 1.0253972911334392, + "grad_norm": 1.1790807263878378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211430 + }, + { + "epoch": 1.0254457893262752, + "grad_norm": 8.892914138414199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211440 + }, + { + "epoch": 1.0254942875191113, + "grad_norm": 9.475974138695165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211450 + }, + { + "epoch": 1.0255427857119475, + "grad_norm": 9.014253521399951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211460 + }, + { + "epoch": 1.0255912839047836, + "grad_norm": 9.053469796072022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211470 + }, + { + "epoch": 1.0256397820976195, + "grad_norm": 8.789894678784549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211480 + }, + { + "epoch": 1.0256882802904557, + "grad_norm": 4.0054374039755203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211490 + }, + { + "epoch": 1.0257367784832918, + "grad_norm": 9.384047672256202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211500 + }, + { + "epoch": 1.025785276676128, + "grad_norm": 9.294492997469206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211510 + }, + { + "epoch": 1.0258337748689639, + "grad_norm": 8.947755247845635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211520 + }, + { + "epoch": 1.0258822730618, + "grad_norm": 8.87615030364941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211530 + }, + { + "epoch": 1.0259307712546362, + "grad_norm": 8.22037264924802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211540 + }, + { + "epoch": 1.0259792694474723, + "grad_norm": 9.504186948561255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211550 + }, + { + "epoch": 1.0260277676403082, + "grad_norm": 9.899319763917447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211560 + }, + { + "epoch": 1.0260762658331444, + "grad_norm": 9.960415781051779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211570 + }, + { + "epoch": 1.0261247640259805, + "grad_norm": 9.26157355252144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211580 + }, + { + "epoch": 1.0261732622188167, + "grad_norm": 8.84036595039106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211590 + }, + { + "epoch": 1.0262217604116526, + "grad_norm": 8.900975956294133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211600 + }, + { + "epoch": 1.0262702586044887, + "grad_norm": 8.938943096836738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211610 + }, + { + "epoch": 1.0263187567973249, + "grad_norm": 8.903118953185185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211620 + }, + { + "epoch": 1.026367254990161, + "grad_norm": 1.0833510799557189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211630 + }, + { + "epoch": 1.026415753182997, + "grad_norm": 8.971250053946278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211640 + }, + { + "epoch": 1.026464251375833, + "grad_norm": 9.73818643501545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211650 + }, + { + "epoch": 1.0265127495686692, + "grad_norm": 9.141690782143996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211660 + }, + { + "epoch": 1.0265612477615054, + "grad_norm": 8.918189564610657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211670 + }, + { + "epoch": 1.0266097459543413, + "grad_norm": 8.142364293917126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211680 + }, + { + "epoch": 1.0266582441471774, + "grad_norm": 8.271039320106865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211690 + }, + { + "epoch": 1.0267067423400136, + "grad_norm": 8.90218458948766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211700 + }, + { + "epoch": 1.0267552405328497, + "grad_norm": 1.0536841443808953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211710 + }, + { + "epoch": 1.0268037387256856, + "grad_norm": 9.762970876181498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211720 + }, + { + "epoch": 1.0268522369185218, + "grad_norm": 4.200703187962063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211730 + }, + { + "epoch": 1.026900735111358, + "grad_norm": 2.8991312319703866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211740 + }, + { + "epoch": 1.026949233304194, + "grad_norm": 1.3010550219405559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211750 + }, + { + "epoch": 1.02699773149703, + "grad_norm": 0.0008655341225676239, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211760 + }, + { + "epoch": 1.0270462296898661, + "grad_norm": 0.9338889718055725, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 211770 + }, + { + "epoch": 1.0270947278827023, + "grad_norm": 0.0002511898928787559, + "learning_rate": 0.0002, + "loss": 0.0158, + "step": 211780 + }, + { + "epoch": 1.0271432260755384, + "grad_norm": 0.0002180292794946581, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 211790 + }, + { + "epoch": 1.0271917242683744, + "grad_norm": 0.03599068522453308, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 211800 + }, + { + "epoch": 1.0272402224612105, + "grad_norm": 0.0034906871151179075, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 211810 + }, + { + "epoch": 1.0272887206540466, + "grad_norm": 0.0007502559456042945, + "learning_rate": 0.0002, + "loss": 0.0028, + "step": 211820 + }, + { + "epoch": 1.0273372188468828, + "grad_norm": 0.000584433029871434, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 211830 + }, + { + "epoch": 1.0273857170397187, + "grad_norm": 0.00013699382543563843, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211840 + }, + { + "epoch": 1.0274342152325548, + "grad_norm": 0.00025877426378428936, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211850 + }, + { + "epoch": 1.027482713425391, + "grad_norm": 5.397771383286454e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211860 + }, + { + "epoch": 1.0275312116182271, + "grad_norm": 2.951878195744939e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211870 + }, + { + "epoch": 1.027579709811063, + "grad_norm": 2.2842197722638957e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211880 + }, + { + "epoch": 1.0276282080038992, + "grad_norm": 1.0199563803325873e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211890 + }, + { + "epoch": 1.0276767061967353, + "grad_norm": 1.7290940377279185e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211900 + }, + { + "epoch": 1.0277252043895715, + "grad_norm": 1.5922800230327994e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211910 + }, + { + "epoch": 1.0277737025824076, + "grad_norm": 0.0002792790764942765, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 211920 + }, + { + "epoch": 1.0278222007752436, + "grad_norm": 0.00018074216495733708, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211930 + }, + { + "epoch": 1.0278706989680797, + "grad_norm": 3.435334292589687e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211940 + }, + { + "epoch": 1.0279191971609158, + "grad_norm": 2.9674842153326608e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211950 + }, + { + "epoch": 1.027967695353752, + "grad_norm": 2.4874218070181087e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211960 + }, + { + "epoch": 1.028016193546588, + "grad_norm": 2.4886359824449755e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211970 + }, + { + "epoch": 1.028064691739424, + "grad_norm": 1.8782920960802585e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211980 + }, + { + "epoch": 1.0281131899322602, + "grad_norm": 1.348477144347271e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 211990 + }, + { + "epoch": 1.0281616881250963, + "grad_norm": 1.6610936654615216e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212000 + }, + { + "epoch": 1.0282101863179323, + "grad_norm": 1.488560428697383e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212010 + }, + { + "epoch": 1.0282586845107684, + "grad_norm": 1.4446578461502213e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212020 + }, + { + "epoch": 1.0283071827036045, + "grad_norm": 1.2681796761171427e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212030 + }, + { + "epoch": 1.0283556808964407, + "grad_norm": 9.398732800036669e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212040 + }, + { + "epoch": 1.0284041790892766, + "grad_norm": 1.2503020116128027e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212050 + }, + { + "epoch": 1.0284526772821128, + "grad_norm": 1.1242547770962119e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212060 + }, + { + "epoch": 1.028501175474949, + "grad_norm": 1.3499682609108277e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212070 + }, + { + "epoch": 1.028549673667785, + "grad_norm": 1.066082222678233e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212080 + }, + { + "epoch": 1.028598171860621, + "grad_norm": 8.330335731443483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212090 + }, + { + "epoch": 1.0286466700534571, + "grad_norm": 0.0005506979068741202, + "learning_rate": 0.0002, + "loss": 0.0413, + "step": 212100 + }, + { + "epoch": 1.0286951682462933, + "grad_norm": 4.5553799282060936e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212110 + }, + { + "epoch": 1.0287436664391294, + "grad_norm": 0.00027807772858068347, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212120 + }, + { + "epoch": 1.0287921646319653, + "grad_norm": 0.00012065115151926875, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212130 + }, + { + "epoch": 1.0288406628248015, + "grad_norm": 2.4304248654516414e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212140 + }, + { + "epoch": 1.0288891610176376, + "grad_norm": 4.1653867810964584e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212150 + }, + { + "epoch": 1.0289376592104738, + "grad_norm": 2.2017919036443345e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212160 + }, + { + "epoch": 1.0289861574033097, + "grad_norm": 1.701512337604072e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212170 + }, + { + "epoch": 1.0290346555961458, + "grad_norm": 1.2043071365042124e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212180 + }, + { + "epoch": 1.029083153788982, + "grad_norm": 7.837732482585125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212190 + }, + { + "epoch": 1.029131651981818, + "grad_norm": 1.1620351870078593e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212200 + }, + { + "epoch": 1.029180150174654, + "grad_norm": 8.870187230058946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212210 + }, + { + "epoch": 1.0292286483674902, + "grad_norm": 7.895086127973627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212220 + }, + { + "epoch": 1.0292771465603263, + "grad_norm": 6.774412213417236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212230 + }, + { + "epoch": 1.0293256447531625, + "grad_norm": 5.066907306172652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212240 + }, + { + "epoch": 1.0293741429459984, + "grad_norm": 5.854557002749061e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212250 + }, + { + "epoch": 1.0294226411388345, + "grad_norm": 6.624628440476954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212260 + }, + { + "epoch": 1.0294711393316707, + "grad_norm": 5.3633757488569245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212270 + }, + { + "epoch": 1.0295196375245068, + "grad_norm": 6.7993423726875335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212280 + }, + { + "epoch": 1.0295681357173427, + "grad_norm": 4.591992819769075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212290 + }, + { + "epoch": 1.0296166339101789, + "grad_norm": 5.118588887853548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212300 + }, + { + "epoch": 1.029665132103015, + "grad_norm": 5.5548366617586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212310 + }, + { + "epoch": 1.0297136302958512, + "grad_norm": 5.7405727602599654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212320 + }, + { + "epoch": 1.029762128488687, + "grad_norm": 5.579908247455023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212330 + }, + { + "epoch": 1.0298106266815232, + "grad_norm": 5.392852926888736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212340 + }, + { + "epoch": 1.0298591248743594, + "grad_norm": 4.664016614697175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212350 + }, + { + "epoch": 1.0299076230671955, + "grad_norm": 4.365712811704725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212360 + }, + { + "epoch": 1.0299561212600314, + "grad_norm": 4.75676779387868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212370 + }, + { + "epoch": 1.0300046194528676, + "grad_norm": 4.252383860148257e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212380 + }, + { + "epoch": 1.0300531176457037, + "grad_norm": 3.598195462473086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212390 + }, + { + "epoch": 1.0301016158385399, + "grad_norm": 4.290450306143612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212400 + }, + { + "epoch": 1.0301501140313758, + "grad_norm": 4.973320756107569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212410 + }, + { + "epoch": 1.030198612224212, + "grad_norm": 5.078378308098763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212420 + }, + { + "epoch": 1.030247110417048, + "grad_norm": 3.6467558857111726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212430 + }, + { + "epoch": 1.0302956086098842, + "grad_norm": 3.2971606742648873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212440 + }, + { + "epoch": 1.0303441068027204, + "grad_norm": 3.651929546322208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212450 + }, + { + "epoch": 1.0303926049955563, + "grad_norm": 3.8251814657996874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212460 + }, + { + "epoch": 1.0304411031883924, + "grad_norm": 3.634795803009183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212470 + }, + { + "epoch": 1.0304896013812286, + "grad_norm": 3.938910595024936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212480 + }, + { + "epoch": 1.0305380995740647, + "grad_norm": 3.098781462540501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212490 + }, + { + "epoch": 1.0305865977669006, + "grad_norm": 3.307532097096555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212500 + }, + { + "epoch": 1.0306350959597368, + "grad_norm": 4.1477774175291415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212510 + }, + { + "epoch": 1.030683594152573, + "grad_norm": 3.4581928503030213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212520 + }, + { + "epoch": 1.030732092345409, + "grad_norm": 3.256760464864783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212530 + }, + { + "epoch": 1.030780590538245, + "grad_norm": 3.6889539387630066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212540 + }, + { + "epoch": 1.0308290887310811, + "grad_norm": 3.144116135445074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212550 + }, + { + "epoch": 1.0308775869239173, + "grad_norm": 3.2004065815272043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212560 + }, + { + "epoch": 1.0309260851167534, + "grad_norm": 3.2745631415309617e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212570 + }, + { + "epoch": 1.0309745833095894, + "grad_norm": 3.110304305664613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212580 + }, + { + "epoch": 1.0310230815024255, + "grad_norm": 2.493150986992987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212590 + }, + { + "epoch": 1.0310715796952616, + "grad_norm": 3.199150114596705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212600 + }, + { + "epoch": 1.0311200778880978, + "grad_norm": 2.9978109523653984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212610 + }, + { + "epoch": 1.0311685760809337, + "grad_norm": 2.989954737131484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212620 + }, + { + "epoch": 1.0312170742737699, + "grad_norm": 3.0956966838857625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212630 + }, + { + "epoch": 1.031265572466606, + "grad_norm": 2.400655830570031e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212640 + }, + { + "epoch": 1.0313140706594421, + "grad_norm": 2.8765066417690832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212650 + }, + { + "epoch": 1.031362568852278, + "grad_norm": 2.631348024806357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212660 + }, + { + "epoch": 1.0314110670451142, + "grad_norm": 2.6163397706113756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212670 + }, + { + "epoch": 1.0314595652379503, + "grad_norm": 2.9835705390723888e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212680 + }, + { + "epoch": 1.0315080634307865, + "grad_norm": 2.3187139959190972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212690 + }, + { + "epoch": 1.0315565616236224, + "grad_norm": 3.51767334905162e-06, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 212700 + }, + { + "epoch": 1.0316050598164586, + "grad_norm": 0.0005917787784710526, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212710 + }, + { + "epoch": 1.0316535580092947, + "grad_norm": 8.092785719782114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212720 + }, + { + "epoch": 1.0317020562021308, + "grad_norm": 4.240904218022479e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212730 + }, + { + "epoch": 1.0317505543949668, + "grad_norm": 3.3081068977480754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212740 + }, + { + "epoch": 1.031799052587803, + "grad_norm": 4.579365850077011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212750 + }, + { + "epoch": 1.031847550780639, + "grad_norm": 5.151648565515643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212760 + }, + { + "epoch": 1.0318960489734752, + "grad_norm": 4.957565579388756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212770 + }, + { + "epoch": 1.0319445471663111, + "grad_norm": 4.306461050873622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212780 + }, + { + "epoch": 1.0319930453591473, + "grad_norm": 2.928818730651983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212790 + }, + { + "epoch": 1.0320415435519834, + "grad_norm": 4.1483663153485395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212800 + }, + { + "epoch": 1.0320900417448196, + "grad_norm": 3.3863836961245397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212810 + }, + { + "epoch": 1.0321385399376555, + "grad_norm": 3.698371074278839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212820 + }, + { + "epoch": 1.0321870381304916, + "grad_norm": 3.246487949581933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212830 + }, + { + "epoch": 1.0322355363233278, + "grad_norm": 2.8858883069915464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212840 + }, + { + "epoch": 1.032284034516164, + "grad_norm": 3.3659782729955623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212850 + }, + { + "epoch": 1.0323325327089998, + "grad_norm": 3.2613058920105686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212860 + }, + { + "epoch": 1.032381030901836, + "grad_norm": 3.3904259453265695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212870 + }, + { + "epoch": 1.0324295290946721, + "grad_norm": 3.4658780805330025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212880 + }, + { + "epoch": 1.0324780272875083, + "grad_norm": 2.6386244371678913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212890 + }, + { + "epoch": 1.0325265254803442, + "grad_norm": 3.112209014943801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212900 + }, + { + "epoch": 1.0325750236731803, + "grad_norm": 3.0321618851303356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212910 + }, + { + "epoch": 1.0326235218660165, + "grad_norm": 2.953180683107348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212920 + }, + { + "epoch": 1.0326720200588526, + "grad_norm": 2.813994115058449e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212930 + }, + { + "epoch": 1.0327205182516885, + "grad_norm": 2.278271722389036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212940 + }, + { + "epoch": 1.0327690164445247, + "grad_norm": 2.8453894174162997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212950 + }, + { + "epoch": 1.0328175146373608, + "grad_norm": 2.8271729206608143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212960 + }, + { + "epoch": 1.032866012830197, + "grad_norm": 2.8381730317050824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212970 + }, + { + "epoch": 1.032914511023033, + "grad_norm": 3.3892813462443883e-06, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 212980 + }, + { + "epoch": 1.032963009215869, + "grad_norm": 6.160368229757296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 212990 + }, + { + "epoch": 1.0330115074087052, + "grad_norm": 5.284784492687322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213000 + }, + { + "epoch": 1.0330600056015413, + "grad_norm": 5.096302174933953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213010 + }, + { + "epoch": 1.0331085037943775, + "grad_norm": 4.701988018496195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213020 + }, + { + "epoch": 1.0331570019872134, + "grad_norm": 4.0266804717248306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213030 + }, + { + "epoch": 1.0332055001800495, + "grad_norm": 4.455666567082517e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213040 + }, + { + "epoch": 1.0332539983728857, + "grad_norm": 4.219106358505087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213050 + }, + { + "epoch": 1.0333024965657218, + "grad_norm": 4.342163265391719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213060 + }, + { + "epoch": 1.0333509947585577, + "grad_norm": 3.69926169696555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213070 + }, + { + "epoch": 1.0333994929513939, + "grad_norm": 3.831025424005929e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213080 + }, + { + "epoch": 1.03344799114423, + "grad_norm": 2.1892170479986817e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 213090 + }, + { + "epoch": 1.0334964893370662, + "grad_norm": 0.017969096079468727, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 213100 + }, + { + "epoch": 1.033544987529902, + "grad_norm": 2.4127025426423643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213110 + }, + { + "epoch": 1.0335934857227382, + "grad_norm": 5.5003697525535244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213120 + }, + { + "epoch": 1.0336419839155744, + "grad_norm": 4.171229647909058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213130 + }, + { + "epoch": 1.0336904821084105, + "grad_norm": 1.0904489499807823e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213140 + }, + { + "epoch": 1.0337389803012464, + "grad_norm": 5.6193111959146336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213150 + }, + { + "epoch": 1.0337874784940826, + "grad_norm": 6.849614692328032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213160 + }, + { + "epoch": 1.0338359766869187, + "grad_norm": 2.6008906388597097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213170 + }, + { + "epoch": 1.0338844748797549, + "grad_norm": 2.7103164939035196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213180 + }, + { + "epoch": 1.0339329730725908, + "grad_norm": 5.634826266032178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213190 + }, + { + "epoch": 1.033981471265427, + "grad_norm": 2.2071906187193235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213200 + }, + { + "epoch": 1.034029969458263, + "grad_norm": 2.294060095664463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213210 + }, + { + "epoch": 1.0340784676510992, + "grad_norm": 2.021380169026088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213220 + }, + { + "epoch": 1.0341269658439352, + "grad_norm": 3.6347287277749274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213230 + }, + { + "epoch": 1.0341754640367713, + "grad_norm": 3.978746917709941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213240 + }, + { + "epoch": 1.0342239622296074, + "grad_norm": 2.7894989216292743e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213250 + }, + { + "epoch": 1.0342724604224436, + "grad_norm": 2.9952394470456056e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213260 + }, + { + "epoch": 1.0343209586152795, + "grad_norm": 2.1489504433702677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213270 + }, + { + "epoch": 1.0343694568081157, + "grad_norm": 2.394732291577384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213280 + }, + { + "epoch": 1.0344179550009518, + "grad_norm": 3.335617748234654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213290 + }, + { + "epoch": 1.034466453193788, + "grad_norm": 3.1408737868332537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213300 + }, + { + "epoch": 1.0345149513866239, + "grad_norm": 2.2893989353178767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213310 + }, + { + "epoch": 1.03456344957946, + "grad_norm": 4.031849130115006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213320 + }, + { + "epoch": 1.0346119477722961, + "grad_norm": 1.9616027202573605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213330 + }, + { + "epoch": 1.0346604459651323, + "grad_norm": 3.132090569124557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213340 + }, + { + "epoch": 1.0347089441579682, + "grad_norm": 1.958546135938377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213350 + }, + { + "epoch": 1.0347574423508044, + "grad_norm": 1.7135839698312338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213360 + }, + { + "epoch": 1.0348059405436405, + "grad_norm": 2.5923061457433505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213370 + }, + { + "epoch": 1.0348544387364766, + "grad_norm": 3.2223765629169066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213380 + }, + { + "epoch": 1.0349029369293126, + "grad_norm": 3.4918307392217685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213390 + }, + { + "epoch": 1.0349514351221487, + "grad_norm": 2.9703096515731886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213400 + }, + { + "epoch": 1.0349999333149849, + "grad_norm": 1.7772714500097209e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213410 + }, + { + "epoch": 1.035048431507821, + "grad_norm": 1.758660232553666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213420 + }, + { + "epoch": 1.0350969297006571, + "grad_norm": 1.6248325209744507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213430 + }, + { + "epoch": 1.035145427893493, + "grad_norm": 4.767445261677494e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213440 + }, + { + "epoch": 1.0351939260863292, + "grad_norm": 1.4885033579048468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213450 + }, + { + "epoch": 1.0352424242791654, + "grad_norm": 2.8446875148802064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213460 + }, + { + "epoch": 1.0352909224720013, + "grad_norm": 2.7438941287982743e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213470 + }, + { + "epoch": 1.0353394206648374, + "grad_norm": 1.4224824553821236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213480 + }, + { + "epoch": 1.0353879188576736, + "grad_norm": 2.3745139969832962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213490 + }, + { + "epoch": 1.0354364170505097, + "grad_norm": 1.1255881418037461e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213500 + }, + { + "epoch": 1.0354849152433458, + "grad_norm": 1.335009869762871e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213510 + }, + { + "epoch": 1.0355334134361818, + "grad_norm": 1.4032069657332613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213520 + }, + { + "epoch": 1.035581911629018, + "grad_norm": 1.203766714752419e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213530 + }, + { + "epoch": 1.035630409821854, + "grad_norm": 3.696465455504949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213540 + }, + { + "epoch": 1.0356789080146902, + "grad_norm": 2.0345123630249873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213550 + }, + { + "epoch": 1.0357274062075261, + "grad_norm": 1.0864130217669299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213560 + }, + { + "epoch": 1.0357759044003623, + "grad_norm": 1.1138573654534412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213570 + }, + { + "epoch": 1.0358244025931984, + "grad_norm": 5.23979906574823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213580 + }, + { + "epoch": 1.0358729007860346, + "grad_norm": 2.062267640212667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213590 + }, + { + "epoch": 1.0359213989788705, + "grad_norm": 1.0712707307902747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213600 + }, + { + "epoch": 1.0359698971717066, + "grad_norm": 1.0754886261565844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213610 + }, + { + "epoch": 1.0360183953645428, + "grad_norm": 1.013224505186372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213620 + }, + { + "epoch": 1.036066893557379, + "grad_norm": 1.2300473599680117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213630 + }, + { + "epoch": 1.0361153917502148, + "grad_norm": 1.911174422275508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213640 + }, + { + "epoch": 1.036163889943051, + "grad_norm": 1.078892182704294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213650 + }, + { + "epoch": 1.0362123881358871, + "grad_norm": 1.473895849812834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213660 + }, + { + "epoch": 1.0362608863287233, + "grad_norm": 1.0140806807612535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213670 + }, + { + "epoch": 1.0363093845215592, + "grad_norm": 9.363529898109846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213680 + }, + { + "epoch": 1.0363578827143953, + "grad_norm": 1.6122520491990144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213690 + }, + { + "epoch": 1.0364063809072315, + "grad_norm": 1.0327609061278054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213700 + }, + { + "epoch": 1.0364548791000676, + "grad_norm": 2.435953092572163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213710 + }, + { + "epoch": 1.0365033772929035, + "grad_norm": 9.414202963853313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213720 + }, + { + "epoch": 1.0365518754857397, + "grad_norm": 9.172677550850494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213730 + }, + { + "epoch": 1.0366003736785758, + "grad_norm": 1.6204269286390627e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213740 + }, + { + "epoch": 1.036648871871412, + "grad_norm": 9.03767670479283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213750 + }, + { + "epoch": 1.036697370064248, + "grad_norm": 1.1421062708905083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213760 + }, + { + "epoch": 1.036745868257084, + "grad_norm": 9.114498311646457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213770 + }, + { + "epoch": 1.0367943664499202, + "grad_norm": 8.546219874006056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213780 + }, + { + "epoch": 1.0368428646427563, + "grad_norm": 1.2981909094378352e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213790 + }, + { + "epoch": 1.0368913628355922, + "grad_norm": 7.28918735148909e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213800 + }, + { + "epoch": 1.0369398610284284, + "grad_norm": 3.357937021064572e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213810 + }, + { + "epoch": 1.0369883592212645, + "grad_norm": 9.020566835715726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213820 + }, + { + "epoch": 1.0370368574141007, + "grad_norm": 7.588363928334729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213830 + }, + { + "epoch": 1.0370853556069366, + "grad_norm": 3.213809350199881e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213840 + }, + { + "epoch": 1.0371338537997727, + "grad_norm": 4.2633387238311116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213850 + }, + { + "epoch": 1.0371823519926089, + "grad_norm": 7.972706157488574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213860 + }, + { + "epoch": 1.037230850185445, + "grad_norm": 7.169256832639803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213870 + }, + { + "epoch": 1.037279348378281, + "grad_norm": 9.854882137005916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213880 + }, + { + "epoch": 1.037327846571117, + "grad_norm": 1.1540173545654397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213890 + }, + { + "epoch": 1.0373763447639532, + "grad_norm": 5.5394652918039355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213900 + }, + { + "epoch": 1.0374248429567894, + "grad_norm": 7.273135338436987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213910 + }, + { + "epoch": 1.0374733411496253, + "grad_norm": 2.444731535433675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213920 + }, + { + "epoch": 1.0375218393424614, + "grad_norm": 1.0113855751114897e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213930 + }, + { + "epoch": 1.0375703375352976, + "grad_norm": 1.3612556131192832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213940 + }, + { + "epoch": 1.0376188357281337, + "grad_norm": 1.4921319007044076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213950 + }, + { + "epoch": 1.0376673339209699, + "grad_norm": 7.196082378868596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213960 + }, + { + "epoch": 1.0377158321138058, + "grad_norm": 9.600765906725428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213970 + }, + { + "epoch": 1.037764330306642, + "grad_norm": 7.270698461070424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213980 + }, + { + "epoch": 1.037812828499478, + "grad_norm": 8.872958119354735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 213990 + }, + { + "epoch": 1.0378613266923142, + "grad_norm": 1.66466963946732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214000 + }, + { + "epoch": 1.0379098248851502, + "grad_norm": 6.545246833411511e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214010 + }, + { + "epoch": 1.0379583230779863, + "grad_norm": 6.673888606201217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214020 + }, + { + "epoch": 1.0380068212708224, + "grad_norm": 6.923946216375043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214030 + }, + { + "epoch": 1.0380553194636586, + "grad_norm": 9.674403145254473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214040 + }, + { + "epoch": 1.0381038176564945, + "grad_norm": 1.0814230790856527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214050 + }, + { + "epoch": 1.0381523158493307, + "grad_norm": 7.109721309461747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214060 + }, + { + "epoch": 1.0382008140421668, + "grad_norm": 6.760797077731695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214070 + }, + { + "epoch": 1.038249312235003, + "grad_norm": 9.867527523965691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214080 + }, + { + "epoch": 1.0382978104278389, + "grad_norm": 1.0060931572297704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214090 + }, + { + "epoch": 1.038346308620675, + "grad_norm": 6.235680416466494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214100 + }, + { + "epoch": 1.0383948068135112, + "grad_norm": 6.750873353666975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214110 + }, + { + "epoch": 1.0384433050063473, + "grad_norm": 7.456267780980852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214120 + }, + { + "epoch": 1.0384918031991832, + "grad_norm": 5.964975002825668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214130 + }, + { + "epoch": 1.0385403013920194, + "grad_norm": 7.84516998919571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214140 + }, + { + "epoch": 1.0385887995848555, + "grad_norm": 9.80132085715013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214150 + }, + { + "epoch": 1.0386372977776916, + "grad_norm": 6.48383092993754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214160 + }, + { + "epoch": 1.0386857959705276, + "grad_norm": 6.486234838121163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214170 + }, + { + "epoch": 1.0387342941633637, + "grad_norm": 6.078490741856513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214180 + }, + { + "epoch": 1.0387827923561999, + "grad_norm": 8.07978551620181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214190 + }, + { + "epoch": 1.038831290549036, + "grad_norm": 7.045528604976425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214200 + }, + { + "epoch": 1.038879788741872, + "grad_norm": 5.777831688646984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214210 + }, + { + "epoch": 1.038928286934708, + "grad_norm": 5.75115791434655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214220 + }, + { + "epoch": 1.0389767851275442, + "grad_norm": 5.948394914412347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214230 + }, + { + "epoch": 1.0390252833203804, + "grad_norm": 1.0212040706392145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214240 + }, + { + "epoch": 1.0390737815132163, + "grad_norm": 5.983887945149036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214250 + }, + { + "epoch": 1.0391222797060524, + "grad_norm": 8.081971145657008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214260 + }, + { + "epoch": 1.0391707778988886, + "grad_norm": 6.509063723569852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214270 + }, + { + "epoch": 1.0392192760917247, + "grad_norm": 5.72276690036233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214280 + }, + { + "epoch": 1.0392677742845606, + "grad_norm": 5.855521862940805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214290 + }, + { + "epoch": 1.0393162724773968, + "grad_norm": 5.67918448268756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214300 + }, + { + "epoch": 1.039364770670233, + "grad_norm": 5.874719022358477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214310 + }, + { + "epoch": 1.039413268863069, + "grad_norm": 1.575565306666249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214320 + }, + { + "epoch": 1.039461767055905, + "grad_norm": 9.149241009254183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214330 + }, + { + "epoch": 1.0395102652487411, + "grad_norm": 8.700063176547701e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214340 + }, + { + "epoch": 1.0395587634415773, + "grad_norm": 7.737595524304197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214350 + }, + { + "epoch": 1.0396072616344134, + "grad_norm": 5.084104941488476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214360 + }, + { + "epoch": 1.0396557598272493, + "grad_norm": 5.093448862680816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214370 + }, + { + "epoch": 1.0397042580200855, + "grad_norm": 5.338612254490727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214380 + }, + { + "epoch": 1.0397527562129216, + "grad_norm": 2.024788045673631e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214390 + }, + { + "epoch": 1.0398012544057578, + "grad_norm": 6.482234766735928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214400 + }, + { + "epoch": 1.0398497525985937, + "grad_norm": 3.7736792819487164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214410 + }, + { + "epoch": 1.0398982507914298, + "grad_norm": 4.961412400916743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214420 + }, + { + "epoch": 1.039946748984266, + "grad_norm": 4.60247065348085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214430 + }, + { + "epoch": 1.0399952471771021, + "grad_norm": 9.282668429477781e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214440 + }, + { + "epoch": 1.040043745369938, + "grad_norm": 1.455700839869678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214450 + }, + { + "epoch": 1.0400922435627742, + "grad_norm": 4.4008334043610375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214460 + }, + { + "epoch": 1.0401407417556103, + "grad_norm": 2.1807045413879678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214470 + }, + { + "epoch": 1.0401892399484465, + "grad_norm": 4.553508290427999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214480 + }, + { + "epoch": 1.0402377381412826, + "grad_norm": 1.1799816093116533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214490 + }, + { + "epoch": 1.0402862363341185, + "grad_norm": 5.323090022102406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214500 + }, + { + "epoch": 1.0403347345269547, + "grad_norm": 6.453909691117587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214510 + }, + { + "epoch": 1.0403832327197908, + "grad_norm": 1.2861233926741988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214520 + }, + { + "epoch": 1.040431730912627, + "grad_norm": 4.5539462689703214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214530 + }, + { + "epoch": 1.040480229105463, + "grad_norm": 6.484537493633979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214540 + }, + { + "epoch": 1.040528727298299, + "grad_norm": 4.116782292840071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214550 + }, + { + "epoch": 1.0405772254911352, + "grad_norm": 4.4788660602534947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214560 + }, + { + "epoch": 1.0406257236839713, + "grad_norm": 4.594914457811683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214570 + }, + { + "epoch": 1.0406742218768072, + "grad_norm": 4.6545477516701794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214580 + }, + { + "epoch": 1.0407227200696434, + "grad_norm": 4.90495892790932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214590 + }, + { + "epoch": 1.0407712182624795, + "grad_norm": 4.109284361675236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214600 + }, + { + "epoch": 1.0408197164553157, + "grad_norm": 7.073145980029949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214610 + }, + { + "epoch": 1.0408682146481516, + "grad_norm": 4.191085736238165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214620 + }, + { + "epoch": 1.0409167128409877, + "grad_norm": 5.241079747975164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214630 + }, + { + "epoch": 1.040965211033824, + "grad_norm": 5.58514273052424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214640 + }, + { + "epoch": 1.04101370922666, + "grad_norm": 4.4181021507938567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214650 + }, + { + "epoch": 1.041062207419496, + "grad_norm": 4.254665668668167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214660 + }, + { + "epoch": 1.041110705612332, + "grad_norm": 4.2289101997994294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214670 + }, + { + "epoch": 1.0411592038051682, + "grad_norm": 4.003578908395866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214680 + }, + { + "epoch": 1.0412077019980044, + "grad_norm": 6.705298005726945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214690 + }, + { + "epoch": 1.0412562001908403, + "grad_norm": 4.6095351535768714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214700 + }, + { + "epoch": 1.0413046983836765, + "grad_norm": 5.7319596180605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214710 + }, + { + "epoch": 1.0413531965765126, + "grad_norm": 1.1745157735276734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214720 + }, + { + "epoch": 1.0414016947693487, + "grad_norm": 3.2397972518083407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214730 + }, + { + "epoch": 1.0414501929621847, + "grad_norm": 2.4316946110047866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214740 + }, + { + "epoch": 1.0414986911550208, + "grad_norm": 1.2930383945786161e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214750 + }, + { + "epoch": 1.041547189347857, + "grad_norm": 1.4134712955637951e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214760 + }, + { + "epoch": 1.041595687540693, + "grad_norm": 2.199855089202174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214770 + }, + { + "epoch": 1.041644185733529, + "grad_norm": 2.177542000936228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214780 + }, + { + "epoch": 1.0416926839263652, + "grad_norm": 2.1186901904002298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214790 + }, + { + "epoch": 1.0417411821192013, + "grad_norm": 1.165124444924004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214800 + }, + { + "epoch": 1.0417896803120374, + "grad_norm": 1.1155768788739806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214810 + }, + { + "epoch": 1.0418381785048734, + "grad_norm": 2.2999797693046276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214820 + }, + { + "epoch": 1.0418866766977095, + "grad_norm": 7.211750698843389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214830 + }, + { + "epoch": 1.0419351748905457, + "grad_norm": 2.643351535880356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214840 + }, + { + "epoch": 1.0419836730833818, + "grad_norm": 2.3714701455901377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214850 + }, + { + "epoch": 1.0420321712762177, + "grad_norm": 1.282733364860178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214860 + }, + { + "epoch": 1.0420806694690539, + "grad_norm": 8.301758498419076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214870 + }, + { + "epoch": 1.04212916766189, + "grad_norm": 9.3453144245359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214880 + }, + { + "epoch": 1.0421776658547262, + "grad_norm": 1.3534036042983644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214890 + }, + { + "epoch": 1.042226164047562, + "grad_norm": 1.2846072650063434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214900 + }, + { + "epoch": 1.0422746622403982, + "grad_norm": 6.878217959638278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214910 + }, + { + "epoch": 1.0423231604332344, + "grad_norm": 2.243789595013368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214920 + }, + { + "epoch": 1.0423716586260705, + "grad_norm": 5.634559556710883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214930 + }, + { + "epoch": 1.0424201568189064, + "grad_norm": 1.1761602536353166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214940 + }, + { + "epoch": 1.0424686550117426, + "grad_norm": 2.6023710688605206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214950 + }, + { + "epoch": 1.0425171532045787, + "grad_norm": 5.930970701228944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214960 + }, + { + "epoch": 1.0425656513974149, + "grad_norm": 6.81870744756452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214970 + }, + { + "epoch": 1.0426141495902508, + "grad_norm": 5.820555202262767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214980 + }, + { + "epoch": 1.042662647783087, + "grad_norm": 1.277120986742375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 214990 + }, + { + "epoch": 1.042711145975923, + "grad_norm": 7.149515113269445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215000 + }, + { + "epoch": 1.0427596441687592, + "grad_norm": 8.191167921722808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215010 + }, + { + "epoch": 1.0428081423615954, + "grad_norm": 7.587875074932526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215020 + }, + { + "epoch": 1.0428566405544313, + "grad_norm": 6.991698455749429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215030 + }, + { + "epoch": 1.0429051387472674, + "grad_norm": 1.2659328376685153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215040 + }, + { + "epoch": 1.0429536369401036, + "grad_norm": 4.507774349349347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215050 + }, + { + "epoch": 1.0430021351329397, + "grad_norm": 6.593179477931699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215060 + }, + { + "epoch": 1.0430506333257756, + "grad_norm": 6.330455448733119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215070 + }, + { + "epoch": 1.0430991315186118, + "grad_norm": 2.260027713418822e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215080 + }, + { + "epoch": 1.043147629711448, + "grad_norm": 2.6591769710648805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215090 + }, + { + "epoch": 1.043196127904284, + "grad_norm": 4.828254418498545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215100 + }, + { + "epoch": 1.04324462609712, + "grad_norm": 6.224121875675337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215110 + }, + { + "epoch": 1.0432931242899561, + "grad_norm": 5.478482307808008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215120 + }, + { + "epoch": 1.0433416224827923, + "grad_norm": 6.760670885341824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215130 + }, + { + "epoch": 1.0433901206756284, + "grad_norm": 8.576212167099584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215140 + }, + { + "epoch": 1.0434386188684643, + "grad_norm": 5.192420644561935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215150 + }, + { + "epoch": 1.0434871170613005, + "grad_norm": 4.324649580667028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215160 + }, + { + "epoch": 1.0435356152541366, + "grad_norm": 4.1802138639468467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215170 + }, + { + "epoch": 1.0435841134469728, + "grad_norm": 4.0391708466813725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215180 + }, + { + "epoch": 1.0436326116398087, + "grad_norm": 9.863369996310212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215190 + }, + { + "epoch": 1.0436811098326448, + "grad_norm": 5.23388962392346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215200 + }, + { + "epoch": 1.043729608025481, + "grad_norm": 9.916136605170323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215210 + }, + { + "epoch": 1.0437781062183171, + "grad_norm": 4.6693250510543294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215220 + }, + { + "epoch": 1.043826604411153, + "grad_norm": 6.778329293410934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215230 + }, + { + "epoch": 1.0438751026039892, + "grad_norm": 6.387476219060773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215240 + }, + { + "epoch": 1.0439236007968253, + "grad_norm": 5.010386985304649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215250 + }, + { + "epoch": 1.0439720989896615, + "grad_norm": 1.1481788533274084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215260 + }, + { + "epoch": 1.0440205971824974, + "grad_norm": 5.56245709049108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215270 + }, + { + "epoch": 1.0440690953753335, + "grad_norm": 4.227589158745104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215280 + }, + { + "epoch": 1.0441175935681697, + "grad_norm": 6.245859367481899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215290 + }, + { + "epoch": 1.0441660917610058, + "grad_norm": 4.932548449687602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215300 + }, + { + "epoch": 1.0442145899538418, + "grad_norm": 4.035205165564548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215310 + }, + { + "epoch": 1.044263088146678, + "grad_norm": 3.838231066310982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215320 + }, + { + "epoch": 1.044311586339514, + "grad_norm": 7.208252554846695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215330 + }, + { + "epoch": 1.0443600845323502, + "grad_norm": 5.77647313093621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215340 + }, + { + "epoch": 1.044408582725186, + "grad_norm": 5.47699926300993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215350 + }, + { + "epoch": 1.0444570809180223, + "grad_norm": 4.6976981593616074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215360 + }, + { + "epoch": 1.0445055791108584, + "grad_norm": 8.317894071296905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215370 + }, + { + "epoch": 1.0445540773036945, + "grad_norm": 5.949385126768902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215380 + }, + { + "epoch": 1.0446025754965305, + "grad_norm": 6.972206278987869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215390 + }, + { + "epoch": 1.0446510736893666, + "grad_norm": 3.998555087036948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215400 + }, + { + "epoch": 1.0446995718822027, + "grad_norm": 4.877764467892121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215410 + }, + { + "epoch": 1.044748070075039, + "grad_norm": 3.4421600503264926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215420 + }, + { + "epoch": 1.0447965682678748, + "grad_norm": 2.0282041077734903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215430 + }, + { + "epoch": 1.044845066460711, + "grad_norm": 6.676820021311869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215440 + }, + { + "epoch": 1.044893564653547, + "grad_norm": 3.6522126833915536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215450 + }, + { + "epoch": 1.0449420628463832, + "grad_norm": 3.3019733791661565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215460 + }, + { + "epoch": 1.0449905610392194, + "grad_norm": 3.8312131778184266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215470 + }, + { + "epoch": 1.0450390592320553, + "grad_norm": 3.5190322478229064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215480 + }, + { + "epoch": 1.0450875574248915, + "grad_norm": 4.0903111653278756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215490 + }, + { + "epoch": 1.0451360556177276, + "grad_norm": 4.0536303913540905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215500 + }, + { + "epoch": 1.0451845538105635, + "grad_norm": 3.638418206719507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215510 + }, + { + "epoch": 1.0452330520033997, + "grad_norm": 3.3096071661020687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215520 + }, + { + "epoch": 1.0452815501962358, + "grad_norm": 4.238307269588404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215530 + }, + { + "epoch": 1.045330048389072, + "grad_norm": 4.7350278009616886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215540 + }, + { + "epoch": 1.045378546581908, + "grad_norm": 1.780608727131039e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215550 + }, + { + "epoch": 1.045427044774744, + "grad_norm": 3.041600677988754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215560 + }, + { + "epoch": 1.0454755429675802, + "grad_norm": 2.880298097807099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215570 + }, + { + "epoch": 1.0455240411604163, + "grad_norm": 4.209956045997387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215580 + }, + { + "epoch": 1.0455725393532525, + "grad_norm": 3.873690843647637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215590 + }, + { + "epoch": 1.0456210375460884, + "grad_norm": 3.288725736183551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215600 + }, + { + "epoch": 1.0456695357389245, + "grad_norm": 4.152856263317517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215610 + }, + { + "epoch": 1.0457180339317607, + "grad_norm": 3.195121394128364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215620 + }, + { + "epoch": 1.0457665321245968, + "grad_norm": 3.643634443051269e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215630 + }, + { + "epoch": 1.0458150303174327, + "grad_norm": 3.639956673850975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215640 + }, + { + "epoch": 1.0458635285102689, + "grad_norm": 3.0506490134030173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215650 + }, + { + "epoch": 1.045912026703105, + "grad_norm": 8.912094244806212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215660 + }, + { + "epoch": 1.0459605248959412, + "grad_norm": 3.529735295160208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215670 + }, + { + "epoch": 1.046009023088777, + "grad_norm": 9.446454782846558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215680 + }, + { + "epoch": 1.0460575212816132, + "grad_norm": 3.116357731869357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215690 + }, + { + "epoch": 1.0461060194744494, + "grad_norm": 3.0890060997990076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215700 + }, + { + "epoch": 1.0461545176672855, + "grad_norm": 3.738406348929857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215710 + }, + { + "epoch": 1.0462030158601214, + "grad_norm": 2.9654307809323655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215720 + }, + { + "epoch": 1.0462515140529576, + "grad_norm": 2.673592121027468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215730 + }, + { + "epoch": 1.0463000122457937, + "grad_norm": 2.8914152494508016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215740 + }, + { + "epoch": 1.0463485104386299, + "grad_norm": 2.6568031330498343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215750 + }, + { + "epoch": 1.0463970086314658, + "grad_norm": 2.905771339101193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215760 + }, + { + "epoch": 1.046445506824302, + "grad_norm": 2.781149248676229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215770 + }, + { + "epoch": 1.046494005017138, + "grad_norm": 3.1791023502592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215780 + }, + { + "epoch": 1.0465425032099742, + "grad_norm": 4.6533233444279176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215790 + }, + { + "epoch": 1.0465910014028101, + "grad_norm": 8.678334779688157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215800 + }, + { + "epoch": 1.0466394995956463, + "grad_norm": 3.132011272555246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215810 + }, + { + "epoch": 1.0466879977884824, + "grad_norm": 2.807090879741736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215820 + }, + { + "epoch": 1.0467364959813186, + "grad_norm": 2.6037056954919535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215830 + }, + { + "epoch": 1.0467849941741545, + "grad_norm": 3.3766283991099044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215840 + }, + { + "epoch": 1.0468334923669906, + "grad_norm": 3.5259705555290566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215850 + }, + { + "epoch": 1.0468819905598268, + "grad_norm": 3.307711722300155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215860 + }, + { + "epoch": 1.046930488752663, + "grad_norm": 3.4074827226504567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215870 + }, + { + "epoch": 1.0469789869454988, + "grad_norm": 8.853428425936727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215880 + }, + { + "epoch": 1.047027485138335, + "grad_norm": 3.003866595463478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215890 + }, + { + "epoch": 1.0470759833311711, + "grad_norm": 2.706973702970572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215900 + }, + { + "epoch": 1.0471244815240073, + "grad_norm": 4.543254590316792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215910 + }, + { + "epoch": 1.0471729797168432, + "grad_norm": 1.3450064670905704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215920 + }, + { + "epoch": 1.0472214779096793, + "grad_norm": 2.663470581865113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215930 + }, + { + "epoch": 1.0472699761025155, + "grad_norm": 2.353844905655933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215940 + }, + { + "epoch": 1.0473184742953516, + "grad_norm": 2.6382352302789513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215950 + }, + { + "epoch": 1.0473669724881876, + "grad_norm": 3.7254815765663807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215960 + }, + { + "epoch": 1.0474154706810237, + "grad_norm": 2.7715279315998487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215970 + }, + { + "epoch": 1.0474639688738598, + "grad_norm": 3.8542034417332616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215980 + }, + { + "epoch": 1.047512467066696, + "grad_norm": 4.896678547083866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 215990 + }, + { + "epoch": 1.0475609652595321, + "grad_norm": 2.873742346309882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216000 + }, + { + "epoch": 1.047609463452368, + "grad_norm": 4.10910104164941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216010 + }, + { + "epoch": 1.0476579616452042, + "grad_norm": 2.5414985316274397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216020 + }, + { + "epoch": 1.0477064598380403, + "grad_norm": 2.5099086542468285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216030 + }, + { + "epoch": 1.0477549580308763, + "grad_norm": 2.8286797260079766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216040 + }, + { + "epoch": 1.0478034562237124, + "grad_norm": 7.045178449516243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216050 + }, + { + "epoch": 1.0478519544165485, + "grad_norm": 2.5114363211287127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216060 + }, + { + "epoch": 1.0479004526093847, + "grad_norm": 5.822602133775945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216070 + }, + { + "epoch": 1.0479489508022208, + "grad_norm": 2.310660960347377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216080 + }, + { + "epoch": 1.0479974489950568, + "grad_norm": 2.4046872226790583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216090 + }, + { + "epoch": 1.048045947187893, + "grad_norm": 2.1869243482797174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216100 + }, + { + "epoch": 1.048094445380729, + "grad_norm": 2.4702862333469966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216110 + }, + { + "epoch": 1.0481429435735652, + "grad_norm": 2.2654936060462205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216120 + }, + { + "epoch": 1.048191441766401, + "grad_norm": 2.464638555466081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216130 + }, + { + "epoch": 1.0482399399592373, + "grad_norm": 1.9813558083114913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216140 + }, + { + "epoch": 1.0482884381520734, + "grad_norm": 2.6661072638489713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216150 + }, + { + "epoch": 1.0483369363449095, + "grad_norm": 2.242803560648099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216160 + }, + { + "epoch": 1.0483854345377455, + "grad_norm": 2.3249059211138956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216170 + }, + { + "epoch": 1.0484339327305816, + "grad_norm": 3.4283164040971315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216180 + }, + { + "epoch": 1.0484824309234178, + "grad_norm": 2.27009522291155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216190 + }, + { + "epoch": 1.048530929116254, + "grad_norm": 2.230095077493388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216200 + }, + { + "epoch": 1.0485794273090898, + "grad_norm": 2.3071265786711592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216210 + }, + { + "epoch": 1.048627925501926, + "grad_norm": 2.0769587649738241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216220 + }, + { + "epoch": 1.048676423694762, + "grad_norm": 2.3156478334840358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216230 + }, + { + "epoch": 1.0487249218875982, + "grad_norm": 2.0163066949407948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216240 + }, + { + "epoch": 1.0487734200804342, + "grad_norm": 2.3502138901676517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216250 + }, + { + "epoch": 1.0488219182732703, + "grad_norm": 2.6175464995503717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216260 + }, + { + "epoch": 1.0488704164661065, + "grad_norm": 2.1686621209937584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216270 + }, + { + "epoch": 1.0489189146589426, + "grad_norm": 2.2917201647487673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216280 + }, + { + "epoch": 1.0489674128517785, + "grad_norm": 1.978779238243078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216290 + }, + { + "epoch": 1.0490159110446147, + "grad_norm": 2.0989223514789046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216300 + }, + { + "epoch": 1.0490644092374508, + "grad_norm": 2.1610466660604288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216310 + }, + { + "epoch": 1.049112907430287, + "grad_norm": 2.0499852837474464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216320 + }, + { + "epoch": 1.0491614056231229, + "grad_norm": 2.6444610057296813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216330 + }, + { + "epoch": 1.049209903815959, + "grad_norm": 2.6726240776042687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216340 + }, + { + "epoch": 1.0492584020087952, + "grad_norm": 2.1797401927869942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216350 + }, + { + "epoch": 1.0493069002016313, + "grad_norm": 2.1450095744057762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216360 + }, + { + "epoch": 1.0493553983944672, + "grad_norm": 2.3131160276079754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216370 + }, + { + "epoch": 1.0494038965873034, + "grad_norm": 2.0977387293896754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216380 + }, + { + "epoch": 1.0494523947801395, + "grad_norm": 1.8413555835650186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216390 + }, + { + "epoch": 1.0495008929729757, + "grad_norm": 5.338554842637677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216400 + }, + { + "epoch": 1.0495493911658116, + "grad_norm": 5.608562787529081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216410 + }, + { + "epoch": 1.0495978893586477, + "grad_norm": 3.499880847357417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216420 + }, + { + "epoch": 1.0496463875514839, + "grad_norm": 2.0173368397990998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216430 + }, + { + "epoch": 1.04969488574432, + "grad_norm": 2.0575946280132484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216440 + }, + { + "epoch": 1.049743383937156, + "grad_norm": 1.960680435786344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216450 + }, + { + "epoch": 1.049791882129992, + "grad_norm": 3.781863426866039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216460 + }, + { + "epoch": 1.0498403803228282, + "grad_norm": 2.1790917514863395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216470 + }, + { + "epoch": 1.0498888785156644, + "grad_norm": 2.2176702429987927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216480 + }, + { + "epoch": 1.0499373767085003, + "grad_norm": 3.4610008015079075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216490 + }, + { + "epoch": 1.0499858749013364, + "grad_norm": 1.9733734291094152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216500 + }, + { + "epoch": 1.0500343730941726, + "grad_norm": 1.9266194328793063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216510 + }, + { + "epoch": 1.0500828712870087, + "grad_norm": 2.091111497293241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216520 + }, + { + "epoch": 1.0501313694798449, + "grad_norm": 2.421224678528233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216530 + }, + { + "epoch": 1.0501798676726808, + "grad_norm": 2.008250703511294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216540 + }, + { + "epoch": 1.050228365865517, + "grad_norm": 4.048351911478676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216550 + }, + { + "epoch": 1.050276864058353, + "grad_norm": 1.9541133156053547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216560 + }, + { + "epoch": 1.0503253622511892, + "grad_norm": 2.097526987654419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216570 + }, + { + "epoch": 1.0503738604440251, + "grad_norm": 1.9463440992240066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216580 + }, + { + "epoch": 1.0504223586368613, + "grad_norm": 2.9108292665114277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216590 + }, + { + "epoch": 1.0504708568296974, + "grad_norm": 1.8205611240773578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216600 + }, + { + "epoch": 1.0505193550225336, + "grad_norm": 1.9679808360706375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216610 + }, + { + "epoch": 1.0505678532153695, + "grad_norm": 3.921705911125173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216620 + }, + { + "epoch": 1.0506163514082056, + "grad_norm": 1.669864815312394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216630 + }, + { + "epoch": 1.0506648496010418, + "grad_norm": 2.023448786303561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216640 + }, + { + "epoch": 1.050713347793878, + "grad_norm": 2.0558138658088865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216650 + }, + { + "epoch": 1.0507618459867138, + "grad_norm": 1.9002087014996505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216660 + }, + { + "epoch": 1.05081034417955, + "grad_norm": 1.960602844519599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216670 + }, + { + "epoch": 1.0508588423723861, + "grad_norm": 1.895574541777023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216680 + }, + { + "epoch": 1.0509073405652223, + "grad_norm": 1.6513337186552235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216690 + }, + { + "epoch": 1.0509558387580582, + "grad_norm": 1.9118061800327268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216700 + }, + { + "epoch": 1.0510043369508943, + "grad_norm": 1.7327738532912917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216710 + }, + { + "epoch": 1.0510528351437305, + "grad_norm": 1.7565774612648966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216720 + }, + { + "epoch": 1.0511013333365666, + "grad_norm": 6.103123837419844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216730 + }, + { + "epoch": 1.0511498315294026, + "grad_norm": 1.8062077344893623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216740 + }, + { + "epoch": 1.0511983297222387, + "grad_norm": 1.7470047453116422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216750 + }, + { + "epoch": 1.0512468279150748, + "grad_norm": 1.9784411620094033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216760 + }, + { + "epoch": 1.051295326107911, + "grad_norm": 3.50251781355837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216770 + }, + { + "epoch": 1.051343824300747, + "grad_norm": 3.0165645625856996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216780 + }, + { + "epoch": 1.051392322493583, + "grad_norm": 1.4357243571794243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216790 + }, + { + "epoch": 1.0514408206864192, + "grad_norm": 1.772196611682375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216800 + }, + { + "epoch": 1.0514893188792553, + "grad_norm": 1.9104071213860152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216810 + }, + { + "epoch": 1.0515378170720913, + "grad_norm": 1.7210057023930858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216820 + }, + { + "epoch": 1.0515863152649274, + "grad_norm": 1.8893095443672792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216830 + }, + { + "epoch": 1.0516348134577636, + "grad_norm": 1.664291460201639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216840 + }, + { + "epoch": 1.0516833116505997, + "grad_norm": 1.6942399838626443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216850 + }, + { + "epoch": 1.0517318098434356, + "grad_norm": 1.741250912346004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216860 + }, + { + "epoch": 1.0517803080362718, + "grad_norm": 1.6747432596275758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216870 + }, + { + "epoch": 1.051828806229108, + "grad_norm": 1.655762247310122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216880 + }, + { + "epoch": 1.051877304421944, + "grad_norm": 1.4679004323170375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216890 + }, + { + "epoch": 1.05192580261478, + "grad_norm": 1.667424811557794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216900 + }, + { + "epoch": 1.0519743008076161, + "grad_norm": 1.701963157074715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216910 + }, + { + "epoch": 1.0520227990004523, + "grad_norm": 1.71349185507097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216920 + }, + { + "epoch": 1.0520712971932884, + "grad_norm": 1.6587375739618437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216930 + }, + { + "epoch": 1.0521197953861243, + "grad_norm": 1.782118488335982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216940 + }, + { + "epoch": 1.0521682935789605, + "grad_norm": 1.6125467539040983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216950 + }, + { + "epoch": 1.0522167917717966, + "grad_norm": 1.6899963384275907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216960 + }, + { + "epoch": 1.0522652899646328, + "grad_norm": 1.5753015247810254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216970 + }, + { + "epoch": 1.0523137881574687, + "grad_norm": 1.7135627672359988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216980 + }, + { + "epoch": 1.0523622863503048, + "grad_norm": 1.7294055965066946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 216990 + }, + { + "epoch": 1.052410784543141, + "grad_norm": 1.7034196275744762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217000 + }, + { + "epoch": 1.052459282735977, + "grad_norm": 1.5275163889327814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217010 + }, + { + "epoch": 1.052507780928813, + "grad_norm": 1.6603871699771844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217020 + }, + { + "epoch": 1.0525562791216492, + "grad_norm": 2.7537600999494316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217030 + }, + { + "epoch": 1.0526047773144853, + "grad_norm": 1.882481512893719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217040 + }, + { + "epoch": 1.0526532755073215, + "grad_norm": 1.5805383668521245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217050 + }, + { + "epoch": 1.0527017737001576, + "grad_norm": 1.6360327492748183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217060 + }, + { + "epoch": 1.0527502718929935, + "grad_norm": 1.5475838210932125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217070 + }, + { + "epoch": 1.0527987700858297, + "grad_norm": 1.563572311624739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217080 + }, + { + "epoch": 1.0528472682786658, + "grad_norm": 2.1135620897894114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217090 + }, + { + "epoch": 1.052895766471502, + "grad_norm": 1.822009352281384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217100 + }, + { + "epoch": 1.0529442646643379, + "grad_norm": 1.8166218751503038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217110 + }, + { + "epoch": 1.052992762857174, + "grad_norm": 1.5400885899907735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217120 + }, + { + "epoch": 1.0530412610500102, + "grad_norm": 1.4113443569385709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217130 + }, + { + "epoch": 1.0530897592428463, + "grad_norm": 1.3199046122736036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217140 + }, + { + "epoch": 1.0531382574356822, + "grad_norm": 1.6647324230234517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217150 + }, + { + "epoch": 1.0531867556285184, + "grad_norm": 1.6996190765894426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217160 + }, + { + "epoch": 1.0532352538213545, + "grad_norm": 1.4509002710383356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217170 + }, + { + "epoch": 1.0532837520141907, + "grad_norm": 1.5866427816035866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217180 + }, + { + "epoch": 1.0533322502070266, + "grad_norm": 1.2376811753256334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217190 + }, + { + "epoch": 1.0533807483998627, + "grad_norm": 1.5477868942070927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217200 + }, + { + "epoch": 1.0534292465926989, + "grad_norm": 1.5970442746038316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217210 + }, + { + "epoch": 1.053477744785535, + "grad_norm": 1.5232836858558585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217220 + }, + { + "epoch": 1.053526242978371, + "grad_norm": 1.330723478076834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217230 + }, + { + "epoch": 1.053574741171207, + "grad_norm": 1.5388548035843996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217240 + }, + { + "epoch": 1.0536232393640432, + "grad_norm": 1.5758365634610527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217250 + }, + { + "epoch": 1.0536717375568794, + "grad_norm": 1.4636145806434797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217260 + }, + { + "epoch": 1.0537202357497153, + "grad_norm": 1.6317801510012941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217270 + }, + { + "epoch": 1.0537687339425514, + "grad_norm": 1.6196709395899234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217280 + }, + { + "epoch": 1.0538172321353876, + "grad_norm": 1.4378562696037989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217290 + }, + { + "epoch": 1.0538657303282237, + "grad_norm": 1.4059041575364972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217300 + }, + { + "epoch": 1.0539142285210596, + "grad_norm": 1.4226348810097988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217310 + }, + { + "epoch": 1.0539627267138958, + "grad_norm": 1.6287739867948403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217320 + }, + { + "epoch": 1.054011224906732, + "grad_norm": 1.5785350626629224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217330 + }, + { + "epoch": 1.054059723099568, + "grad_norm": 2.2885370754011092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217340 + }, + { + "epoch": 1.054108221292404, + "grad_norm": 1.418361961214032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217350 + }, + { + "epoch": 1.0541567194852401, + "grad_norm": 1.5668528874357435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217360 + }, + { + "epoch": 1.0542052176780763, + "grad_norm": 1.4526426639349665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217370 + }, + { + "epoch": 1.0542537158709124, + "grad_norm": 1.3534712195450993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217380 + }, + { + "epoch": 1.0543022140637484, + "grad_norm": 1.1643404462802209e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217390 + }, + { + "epoch": 1.0543507122565845, + "grad_norm": 1.389635571058534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217400 + }, + { + "epoch": 1.0543992104494206, + "grad_norm": 1.7218252423845115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217410 + }, + { + "epoch": 1.0544477086422568, + "grad_norm": 1.309444570551932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217420 + }, + { + "epoch": 1.0544962068350927, + "grad_norm": 1.4340231757614674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217430 + }, + { + "epoch": 1.0545447050279289, + "grad_norm": 1.3489447781012132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217440 + }, + { + "epoch": 1.054593203220765, + "grad_norm": 2.1948554262962716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217450 + }, + { + "epoch": 1.0546417014136011, + "grad_norm": 1.3771399665074568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217460 + }, + { + "epoch": 1.054690199606437, + "grad_norm": 1.452505671295512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217470 + }, + { + "epoch": 1.0547386977992732, + "grad_norm": 1.357989702910345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217480 + }, + { + "epoch": 1.0547871959921093, + "grad_norm": 1.4515374857637653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217490 + }, + { + "epoch": 1.0548356941849455, + "grad_norm": 1.4636779610555095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217500 + }, + { + "epoch": 1.0548841923777814, + "grad_norm": 1.3649020047523663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217510 + }, + { + "epoch": 1.0549326905706176, + "grad_norm": 1.3494685902060155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217520 + }, + { + "epoch": 1.0549811887634537, + "grad_norm": 1.3411114707650995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217530 + }, + { + "epoch": 1.0550296869562898, + "grad_norm": 2.3346565569681843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217540 + }, + { + "epoch": 1.0550781851491258, + "grad_norm": 1.3375165508477949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217550 + }, + { + "epoch": 1.055126683341962, + "grad_norm": 1.3223727535205398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217560 + }, + { + "epoch": 1.055175181534798, + "grad_norm": 1.5579423973122175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217570 + }, + { + "epoch": 1.0552236797276342, + "grad_norm": 1.0969831976126443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217580 + }, + { + "epoch": 1.0552721779204703, + "grad_norm": 1.5918351436994271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217590 + }, + { + "epoch": 1.0553206761133063, + "grad_norm": 1.3402460297129437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217600 + }, + { + "epoch": 1.0553691743061424, + "grad_norm": 1.3223677797213895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217610 + }, + { + "epoch": 1.0554176724989786, + "grad_norm": 1.4532228931329882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217620 + }, + { + "epoch": 1.0554661706918147, + "grad_norm": 1.2759451806232391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217630 + }, + { + "epoch": 1.0555146688846506, + "grad_norm": 1.220987257966044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217640 + }, + { + "epoch": 1.0555631670774868, + "grad_norm": 1.2709790553344646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217650 + }, + { + "epoch": 1.055611665270323, + "grad_norm": 1.3587620628641162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217660 + }, + { + "epoch": 1.055660163463159, + "grad_norm": 1.393968318552652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217670 + }, + { + "epoch": 1.055708661655995, + "grad_norm": 1.24208384022495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217680 + }, + { + "epoch": 1.0557571598488311, + "grad_norm": 1.8677788204968238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217690 + }, + { + "epoch": 1.0558056580416673, + "grad_norm": 1.2310887598232512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217700 + }, + { + "epoch": 1.0558541562345034, + "grad_norm": 1.3100826379286445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217710 + }, + { + "epoch": 1.0559026544273393, + "grad_norm": 1.2639146973469906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217720 + }, + { + "epoch": 1.0559511526201755, + "grad_norm": 1.239264406649454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217730 + }, + { + "epoch": 1.0559996508130116, + "grad_norm": 1.0681890927344284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217740 + }, + { + "epoch": 1.0560481490058478, + "grad_norm": 1.4685605265185586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217750 + }, + { + "epoch": 1.0560966471986837, + "grad_norm": 1.3288293132518447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217760 + }, + { + "epoch": 1.0561451453915198, + "grad_norm": 1.374520763874898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217770 + }, + { + "epoch": 1.056193643584356, + "grad_norm": 1.3340712712306413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217780 + }, + { + "epoch": 1.0562421417771921, + "grad_norm": 1.0340252032392527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217790 + }, + { + "epoch": 1.056290639970028, + "grad_norm": 1.4887187660406198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217800 + }, + { + "epoch": 1.0563391381628642, + "grad_norm": 1.7690204856535274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217810 + }, + { + "epoch": 1.0563876363557003, + "grad_norm": 1.1808637623289542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217820 + }, + { + "epoch": 1.0564361345485365, + "grad_norm": 1.2545561389742943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217830 + }, + { + "epoch": 1.0564846327413724, + "grad_norm": 2.495187629847351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217840 + }, + { + "epoch": 1.0565331309342085, + "grad_norm": 1.2543536342946027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217850 + }, + { + "epoch": 1.0565816291270447, + "grad_norm": 1.2511580393947952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217860 + }, + { + "epoch": 1.0566301273198808, + "grad_norm": 1.2029562412863015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217870 + }, + { + "epoch": 1.0566786255127167, + "grad_norm": 1.1986807635366858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217880 + }, + { + "epoch": 1.0567271237055529, + "grad_norm": 1.0134630201719119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217890 + }, + { + "epoch": 1.056775621898389, + "grad_norm": 1.1435164992690261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217900 + }, + { + "epoch": 1.0568241200912252, + "grad_norm": 2.1790224025153293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217910 + }, + { + "epoch": 1.056872618284061, + "grad_norm": 1.2035430074774922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217920 + }, + { + "epoch": 1.0569211164768972, + "grad_norm": 1.2145690675424703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217930 + }, + { + "epoch": 1.0569696146697334, + "grad_norm": 1.052157827530209e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217940 + }, + { + "epoch": 1.0570181128625695, + "grad_norm": 1.2008334238089446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217950 + }, + { + "epoch": 1.0570666110554054, + "grad_norm": 1.4744513521236513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217960 + }, + { + "epoch": 1.0571151092482416, + "grad_norm": 1.2161451934389333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217970 + }, + { + "epoch": 1.0571636074410777, + "grad_norm": 2.061257760033186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217980 + }, + { + "epoch": 1.0572121056339139, + "grad_norm": 1.0184115950551131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 217990 + }, + { + "epoch": 1.0572606038267498, + "grad_norm": 1.2584506237089954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218000 + }, + { + "epoch": 1.057309102019586, + "grad_norm": 1.285219326518927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218010 + }, + { + "epoch": 1.057357600212422, + "grad_norm": 1.2661870130159514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218020 + }, + { + "epoch": 1.0574060984052582, + "grad_norm": 1.2319330267018813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218030 + }, + { + "epoch": 1.0574545965980944, + "grad_norm": 2.0502156417023798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218040 + }, + { + "epoch": 1.0575030947909303, + "grad_norm": 1.1811952305151863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218050 + }, + { + "epoch": 1.0575515929837664, + "grad_norm": 1.1201441196817541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218060 + }, + { + "epoch": 1.0576000911766026, + "grad_norm": 1.1954199408137356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218070 + }, + { + "epoch": 1.0576485893694385, + "grad_norm": 1.1213860062753156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218080 + }, + { + "epoch": 1.0576970875622747, + "grad_norm": 2.3190696651909093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218090 + }, + { + "epoch": 1.0577455857551108, + "grad_norm": 1.1804080912725112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218100 + }, + { + "epoch": 1.057794083947947, + "grad_norm": 1.1679247080564892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218110 + }, + { + "epoch": 1.057842582140783, + "grad_norm": 1.0950335393999922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218120 + }, + { + "epoch": 1.057891080333619, + "grad_norm": 1.1483970041581415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218130 + }, + { + "epoch": 1.0579395785264551, + "grad_norm": 1.1148131306981668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218140 + }, + { + "epoch": 1.0579880767192913, + "grad_norm": 1.227508192869209e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218150 + }, + { + "epoch": 1.0580365749121274, + "grad_norm": 1.4130061742889666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218160 + }, + { + "epoch": 1.0580850731049634, + "grad_norm": 1.1460321758249847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218170 + }, + { + "epoch": 1.0581335712977995, + "grad_norm": 2.2178225833613396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218180 + }, + { + "epoch": 1.0581820694906356, + "grad_norm": 1.223831702645839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218190 + }, + { + "epoch": 1.0582305676834718, + "grad_norm": 1.6147454573456343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218200 + }, + { + "epoch": 1.0582790658763077, + "grad_norm": 1.1881309092132142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218210 + }, + { + "epoch": 1.0583275640691439, + "grad_norm": 1.1700889501753409e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218220 + }, + { + "epoch": 1.05837606226198, + "grad_norm": 9.487051499945665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218230 + }, + { + "epoch": 1.0584245604548161, + "grad_norm": 9.91734978583736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218240 + }, + { + "epoch": 1.058473058647652, + "grad_norm": 1.1561518675762272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218250 + }, + { + "epoch": 1.0585215568404882, + "grad_norm": 1.1138514821595891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218260 + }, + { + "epoch": 1.0585700550333244, + "grad_norm": 1.2154814044151863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218270 + }, + { + "epoch": 1.0586185532261605, + "grad_norm": 1.0645176473644824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218280 + }, + { + "epoch": 1.0586670514189964, + "grad_norm": 3.307528402274329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218290 + }, + { + "epoch": 1.0587155496118326, + "grad_norm": 1.1073796457594653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218300 + }, + { + "epoch": 1.0587640478046687, + "grad_norm": 1.2779335634149902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218310 + }, + { + "epoch": 1.0588125459975049, + "grad_norm": 1.5262523334058642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218320 + }, + { + "epoch": 1.0588610441903408, + "grad_norm": 1.6360982613150554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218330 + }, + { + "epoch": 1.058909542383177, + "grad_norm": 1.1211806594246809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218340 + }, + { + "epoch": 1.058958040576013, + "grad_norm": 1.1133595023693488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218350 + }, + { + "epoch": 1.0590065387688492, + "grad_norm": 1.1839198066354584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218360 + }, + { + "epoch": 1.0590550369616851, + "grad_norm": 1.1732071669712241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218370 + }, + { + "epoch": 1.0591035351545213, + "grad_norm": 1.1370048724756998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218380 + }, + { + "epoch": 1.0591520333473574, + "grad_norm": 9.948220025535193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218390 + }, + { + "epoch": 1.0592005315401936, + "grad_norm": 1.2080609224085492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218400 + }, + { + "epoch": 1.0592490297330295, + "grad_norm": 1.026706897278018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218410 + }, + { + "epoch": 1.0592975279258656, + "grad_norm": 1.3309093560565088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218420 + }, + { + "epoch": 1.0593460261187018, + "grad_norm": 1.4014341331858304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218430 + }, + { + "epoch": 1.059394524311538, + "grad_norm": 9.47360518921414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218440 + }, + { + "epoch": 1.0594430225043738, + "grad_norm": 1.1835087576628212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218450 + }, + { + "epoch": 1.05949152069721, + "grad_norm": 1.1213010964183923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218460 + }, + { + "epoch": 1.0595400188900461, + "grad_norm": 1.2905623236747488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218470 + }, + { + "epoch": 1.0595885170828823, + "grad_norm": 1.0555186236160807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218480 + }, + { + "epoch": 1.0596370152757182, + "grad_norm": 6.330225232886733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218490 + }, + { + "epoch": 1.0596855134685543, + "grad_norm": 1.0819595530620063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218500 + }, + { + "epoch": 1.0597340116613905, + "grad_norm": 1.0912670234120014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218510 + }, + { + "epoch": 1.0597825098542266, + "grad_norm": 1.0358091628859256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218520 + }, + { + "epoch": 1.0598310080470625, + "grad_norm": 1.03873446732905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218530 + }, + { + "epoch": 1.0598795062398987, + "grad_norm": 1.2356763079424127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218540 + }, + { + "epoch": 1.0599280044327348, + "grad_norm": 1.1300986102469324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218550 + }, + { + "epoch": 1.059976502625571, + "grad_norm": 1.1379142961231992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218560 + }, + { + "epoch": 1.0600250008184071, + "grad_norm": 1.186330251812251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218570 + }, + { + "epoch": 1.060073499011243, + "grad_norm": 1.2093541101876326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218580 + }, + { + "epoch": 1.0601219972040792, + "grad_norm": 1.5105337070053793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218590 + }, + { + "epoch": 1.0601704953969153, + "grad_norm": 1.0516080095612779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218600 + }, + { + "epoch": 1.0602189935897515, + "grad_norm": 1.0781913317714498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218610 + }, + { + "epoch": 1.0602674917825874, + "grad_norm": 9.344993259219336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218620 + }, + { + "epoch": 1.0603159899754235, + "grad_norm": 1.0226541036217895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218630 + }, + { + "epoch": 1.0603644881682597, + "grad_norm": 1.0783643489276074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218640 + }, + { + "epoch": 1.0604129863610958, + "grad_norm": 1.0642077086231438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218650 + }, + { + "epoch": 1.0604614845539317, + "grad_norm": 1.0023446606055586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218660 + }, + { + "epoch": 1.0605099827467679, + "grad_norm": 1.0759538326965412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218670 + }, + { + "epoch": 1.060558480939604, + "grad_norm": 1.0311246256833329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218680 + }, + { + "epoch": 1.0606069791324402, + "grad_norm": 1.0008266571048807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218690 + }, + { + "epoch": 1.060655477325276, + "grad_norm": 9.970080583343588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218700 + }, + { + "epoch": 1.0607039755181122, + "grad_norm": 1.1260605248253341e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218710 + }, + { + "epoch": 1.0607524737109484, + "grad_norm": 1.0215110535227723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218720 + }, + { + "epoch": 1.0608009719037845, + "grad_norm": 9.57229602249754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218730 + }, + { + "epoch": 1.0608494700966205, + "grad_norm": 1.8662065315311338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218740 + }, + { + "epoch": 1.0608979682894566, + "grad_norm": 1.001076981310689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218750 + }, + { + "epoch": 1.0609464664822927, + "grad_norm": 1.0696206231841643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218760 + }, + { + "epoch": 1.0609949646751289, + "grad_norm": 1.0402117567309688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218770 + }, + { + "epoch": 1.0610434628679648, + "grad_norm": 1.0020745833116962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218780 + }, + { + "epoch": 1.061091961060801, + "grad_norm": 1.1379523812138359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218790 + }, + { + "epoch": 1.061140459253637, + "grad_norm": 9.936526623732789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218800 + }, + { + "epoch": 1.0611889574464732, + "grad_norm": 1.172949097849596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218810 + }, + { + "epoch": 1.0612374556393092, + "grad_norm": 1.0419164908626044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218820 + }, + { + "epoch": 1.0612859538321453, + "grad_norm": 1.0471156031144346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218830 + }, + { + "epoch": 1.0613344520249814, + "grad_norm": 9.332075734391765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218840 + }, + { + "epoch": 1.0613829502178176, + "grad_norm": 1.0271829609109773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218850 + }, + { + "epoch": 1.0614314484106535, + "grad_norm": 9.931295608112123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218860 + }, + { + "epoch": 1.0614799466034897, + "grad_norm": 9.745512841163873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218870 + }, + { + "epoch": 1.0615284447963258, + "grad_norm": 9.914776910591172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218880 + }, + { + "epoch": 1.061576942989162, + "grad_norm": 9.024877556385036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218890 + }, + { + "epoch": 1.0616254411819979, + "grad_norm": 9.923904542574746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218900 + }, + { + "epoch": 1.061673939374834, + "grad_norm": 1.371614501977092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218910 + }, + { + "epoch": 1.0617224375676702, + "grad_norm": 9.970218428634325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218920 + }, + { + "epoch": 1.0617709357605063, + "grad_norm": 9.856324112433867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218930 + }, + { + "epoch": 1.0618194339533422, + "grad_norm": 8.956530450632272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218940 + }, + { + "epoch": 1.0618679321461784, + "grad_norm": 1.1512397435353705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218950 + }, + { + "epoch": 1.0619164303390145, + "grad_norm": 9.723379434944945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218960 + }, + { + "epoch": 1.0619649285318506, + "grad_norm": 1.0582935061620447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218970 + }, + { + "epoch": 1.0620134267246866, + "grad_norm": 9.411597545749828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218980 + }, + { + "epoch": 1.0620619249175227, + "grad_norm": 9.94150539668226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 218990 + }, + { + "epoch": 1.0621104231103589, + "grad_norm": 9.669715694826664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219000 + }, + { + "epoch": 1.062158921303195, + "grad_norm": 9.93202675658722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219010 + }, + { + "epoch": 1.062207419496031, + "grad_norm": 9.784433530057868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219020 + }, + { + "epoch": 1.062255917688867, + "grad_norm": 9.651490273654417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219030 + }, + { + "epoch": 1.0623044158817032, + "grad_norm": 9.052949678789446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219040 + }, + { + "epoch": 1.0623529140745394, + "grad_norm": 1.7450007305797044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219050 + }, + { + "epoch": 1.0624014122673753, + "grad_norm": 9.60547268391565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219060 + }, + { + "epoch": 1.0624499104602114, + "grad_norm": 9.857876648311503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219070 + }, + { + "epoch": 1.0624984086530476, + "grad_norm": 1.0337339517718647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219080 + }, + { + "epoch": 1.0625469068458837, + "grad_norm": 8.585942623540177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219090 + }, + { + "epoch": 1.0625954050387199, + "grad_norm": 0.00015214973245747387, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 219100 + }, + { + "epoch": 1.0626439032315558, + "grad_norm": 0.00038379785837605596, + "learning_rate": 0.0002, + "loss": 0.0046, + "step": 219110 + }, + { + "epoch": 1.062692401424392, + "grad_norm": 0.0013891453854739666, + "learning_rate": 0.0002, + "loss": 0.0018, + "step": 219120 + }, + { + "epoch": 1.062740899617228, + "grad_norm": 0.000697907991707325, + "learning_rate": 0.0002, + "loss": 0.0257, + "step": 219130 + }, + { + "epoch": 1.062789397810064, + "grad_norm": 0.0003880013828165829, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219140 + }, + { + "epoch": 1.0628378960029001, + "grad_norm": 0.00013560733350459486, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 219150 + }, + { + "epoch": 1.0628863941957363, + "grad_norm": 0.00057808100245893, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 219160 + }, + { + "epoch": 1.0629348923885724, + "grad_norm": 0.00033225471270270646, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219170 + }, + { + "epoch": 1.0629833905814086, + "grad_norm": 2.491656232450623e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219180 + }, + { + "epoch": 1.0630318887742445, + "grad_norm": 2.025380490522366e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219190 + }, + { + "epoch": 1.0630803869670806, + "grad_norm": 1.6759997379267588e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219200 + }, + { + "epoch": 1.0631288851599168, + "grad_norm": 1.5669136701035313e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219210 + }, + { + "epoch": 1.063177383352753, + "grad_norm": 1.4197978998709004e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219220 + }, + { + "epoch": 1.0632258815455888, + "grad_norm": 1.3032105016463902e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219230 + }, + { + "epoch": 1.063274379738425, + "grad_norm": 1.2974569472135045e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219240 + }, + { + "epoch": 1.0633228779312611, + "grad_norm": 1.2820617484976538e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219250 + }, + { + "epoch": 1.0633713761240973, + "grad_norm": 1.0398574886494316e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219260 + }, + { + "epoch": 1.0634198743169332, + "grad_norm": 9.820018931350205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219270 + }, + { + "epoch": 1.0634683725097693, + "grad_norm": 9.27828841668088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219280 + }, + { + "epoch": 1.0635168707026055, + "grad_norm": 8.967702342488337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219290 + }, + { + "epoch": 1.0635653688954416, + "grad_norm": 7.99794725026004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219300 + }, + { + "epoch": 1.0636138670882775, + "grad_norm": 7.5672701314033475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219310 + }, + { + "epoch": 1.0636623652811137, + "grad_norm": 7.482796718250029e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219320 + }, + { + "epoch": 1.0637108634739498, + "grad_norm": 7.123328941815998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219330 + }, + { + "epoch": 1.063759361666786, + "grad_norm": 8.967253052105661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219340 + }, + { + "epoch": 1.063807859859622, + "grad_norm": 6.695186129945796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219350 + }, + { + "epoch": 1.063856358052458, + "grad_norm": 6.395845048245974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219360 + }, + { + "epoch": 1.0639048562452942, + "grad_norm": 6.06484627496684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219370 + }, + { + "epoch": 1.0639533544381303, + "grad_norm": 5.939175480307313e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219380 + }, + { + "epoch": 1.0640018526309662, + "grad_norm": 5.455998234538129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219390 + }, + { + "epoch": 1.0640503508238024, + "grad_norm": 5.389199486671714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219400 + }, + { + "epoch": 1.0640988490166385, + "grad_norm": 5.5345026339637116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219410 + }, + { + "epoch": 1.0641473472094747, + "grad_norm": 5.216100817051483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219420 + }, + { + "epoch": 1.0641958454023106, + "grad_norm": 5.140797838976141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219430 + }, + { + "epoch": 1.0642443435951467, + "grad_norm": 5.182361292099813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219440 + }, + { + "epoch": 1.064292841787983, + "grad_norm": 5.0415642363077495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219450 + }, + { + "epoch": 1.064341339980819, + "grad_norm": 4.69731230623438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219460 + }, + { + "epoch": 1.064389838173655, + "grad_norm": 4.562269168673083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219470 + }, + { + "epoch": 1.064438336366491, + "grad_norm": 4.365508175396826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219480 + }, + { + "epoch": 1.0644868345593272, + "grad_norm": 4.7352536967082415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219490 + }, + { + "epoch": 1.0645353327521634, + "grad_norm": 4.365473159850808e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219500 + }, + { + "epoch": 1.0645838309449993, + "grad_norm": 4.181134954706067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219510 + }, + { + "epoch": 1.0646323291378355, + "grad_norm": 4.159056970820529e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219520 + }, + { + "epoch": 1.0646808273306716, + "grad_norm": 3.929718332074117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219530 + }, + { + "epoch": 1.0647293255235077, + "grad_norm": 3.7698566757171648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219540 + }, + { + "epoch": 1.0647778237163439, + "grad_norm": 5.089822934678523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219550 + }, + { + "epoch": 1.0648263219091798, + "grad_norm": 5.796602181362687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219560 + }, + { + "epoch": 1.064874820102016, + "grad_norm": 3.618222990553477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219570 + }, + { + "epoch": 1.064923318294852, + "grad_norm": 3.3549342788319336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219580 + }, + { + "epoch": 1.064971816487688, + "grad_norm": 3.16654200105404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219590 + }, + { + "epoch": 1.0650203146805242, + "grad_norm": 3.199384536856087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219600 + }, + { + "epoch": 1.0650688128733603, + "grad_norm": 3.421209157750127e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219610 + }, + { + "epoch": 1.0651173110661964, + "grad_norm": 3.123351689282572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219620 + }, + { + "epoch": 1.0651658092590326, + "grad_norm": 2.8822491913160775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219630 + }, + { + "epoch": 1.0652143074518685, + "grad_norm": 2.8244869554328034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219640 + }, + { + "epoch": 1.0652628056447047, + "grad_norm": 2.9505583825084614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219650 + }, + { + "epoch": 1.0653113038375408, + "grad_norm": 2.9713401090702973e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219660 + }, + { + "epoch": 1.065359802030377, + "grad_norm": 3.2639825349178864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219670 + }, + { + "epoch": 1.0654083002232129, + "grad_norm": 2.66698953055311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219680 + }, + { + "epoch": 1.065456798416049, + "grad_norm": 2.3258892269950593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219690 + }, + { + "epoch": 1.0655052966088852, + "grad_norm": 2.588684765214566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219700 + }, + { + "epoch": 1.0655537948017213, + "grad_norm": 2.617687641759403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219710 + }, + { + "epoch": 1.0656022929945572, + "grad_norm": 2.401868641754845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219720 + }, + { + "epoch": 1.0656507911873934, + "grad_norm": 2.3140100893215276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219730 + }, + { + "epoch": 1.0656992893802295, + "grad_norm": 2.1537348402489442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219740 + }, + { + "epoch": 1.0657477875730657, + "grad_norm": 2.5350645955768414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219750 + }, + { + "epoch": 1.0657962857659016, + "grad_norm": 2.3653321932215476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219760 + }, + { + "epoch": 1.0658447839587377, + "grad_norm": 2.0904105895169778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219770 + }, + { + "epoch": 1.0658932821515739, + "grad_norm": 2.1598189050564542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219780 + }, + { + "epoch": 1.06594178034441, + "grad_norm": 1.911610070237657e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219790 + }, + { + "epoch": 1.065990278537246, + "grad_norm": 2.181166337322793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219800 + }, + { + "epoch": 1.066038776730082, + "grad_norm": 2.0427207800821634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219810 + }, + { + "epoch": 1.0660872749229182, + "grad_norm": 2.0817171844100812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219820 + }, + { + "epoch": 1.0661357731157544, + "grad_norm": 1.9402505131438375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219830 + }, + { + "epoch": 1.0661842713085903, + "grad_norm": 1.8720419348028372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219840 + }, + { + "epoch": 1.0662327695014264, + "grad_norm": 1.924648131534923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219850 + }, + { + "epoch": 1.0662812676942626, + "grad_norm": 1.798092284843733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219860 + }, + { + "epoch": 1.0663297658870987, + "grad_norm": 1.7693399740892346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219870 + }, + { + "epoch": 1.0663782640799346, + "grad_norm": 1.76524008566048e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219880 + }, + { + "epoch": 1.0664267622727708, + "grad_norm": 1.7322118992524338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219890 + }, + { + "epoch": 1.066475260465607, + "grad_norm": 1.6954035118033062e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219900 + }, + { + "epoch": 1.066523758658443, + "grad_norm": 1.8489612330085947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219910 + }, + { + "epoch": 1.066572256851279, + "grad_norm": 1.6599759646851453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219920 + }, + { + "epoch": 1.0666207550441151, + "grad_norm": 1.632313569643884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219930 + }, + { + "epoch": 1.0666692532369513, + "grad_norm": 1.5171992799878353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219940 + }, + { + "epoch": 1.0667177514297874, + "grad_norm": 1.7343140825687442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219950 + }, + { + "epoch": 1.0667662496226233, + "grad_norm": 1.5607347449986264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219960 + }, + { + "epoch": 1.0668147478154595, + "grad_norm": 2.065939042950049e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219970 + }, + { + "epoch": 1.0668632460082956, + "grad_norm": 1.5107273156900192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219980 + }, + { + "epoch": 1.0669117442011318, + "grad_norm": 1.5052282833494246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 219990 + }, + { + "epoch": 1.0669602423939677, + "grad_norm": 1.4133507875158102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220000 + }, + { + "epoch": 1.0670087405868038, + "grad_norm": 1.4908687262504827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220010 + }, + { + "epoch": 1.06705723877964, + "grad_norm": 1.7265326732740505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220020 + }, + { + "epoch": 1.0671057369724761, + "grad_norm": 5.824128948006546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220030 + }, + { + "epoch": 1.067154235165312, + "grad_norm": 1.295516881327785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220040 + }, + { + "epoch": 1.0672027333581482, + "grad_norm": 1.5066589185153134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220050 + }, + { + "epoch": 1.0672512315509843, + "grad_norm": 1.3767889868177008e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220060 + }, + { + "epoch": 1.0672997297438205, + "grad_norm": 1.3453949350150651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220070 + }, + { + "epoch": 1.0673482279366566, + "grad_norm": 1.3143087471689796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220080 + }, + { + "epoch": 1.0673967261294925, + "grad_norm": 1.2111790965718683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220090 + }, + { + "epoch": 1.0674452243223287, + "grad_norm": 1.336241552962747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220100 + }, + { + "epoch": 1.0674937225151648, + "grad_norm": 1.3035914889769629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220110 + }, + { + "epoch": 1.0675422207080008, + "grad_norm": 1.2141085790062789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220120 + }, + { + "epoch": 1.067590718900837, + "grad_norm": 1.4309506468634936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220130 + }, + { + "epoch": 1.067639217093673, + "grad_norm": 1.2382583918224555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220140 + }, + { + "epoch": 1.0676877152865092, + "grad_norm": 1.2394987152219983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220150 + }, + { + "epoch": 1.0677362134793453, + "grad_norm": 1.1297067885607248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220160 + }, + { + "epoch": 1.0677847116721813, + "grad_norm": 1.5028219877422089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220170 + }, + { + "epoch": 1.0678332098650174, + "grad_norm": 1.2386631169647444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220180 + }, + { + "epoch": 1.0678817080578535, + "grad_norm": 1.2228482546561281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220190 + }, + { + "epoch": 1.0679302062506897, + "grad_norm": 1.1553639751582523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220200 + }, + { + "epoch": 1.0679787044435256, + "grad_norm": 1.1898634966200916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220210 + }, + { + "epoch": 1.0680272026363617, + "grad_norm": 1.1959360790569917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220220 + }, + { + "epoch": 1.068075700829198, + "grad_norm": 1.0633044666974456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220230 + }, + { + "epoch": 1.068124199022034, + "grad_norm": 1.9289893316454254e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220240 + }, + { + "epoch": 1.06817269721487, + "grad_norm": 1.3084176089250832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220250 + }, + { + "epoch": 1.068221195407706, + "grad_norm": 1.1347743793521659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220260 + }, + { + "epoch": 1.0682696936005422, + "grad_norm": 1.1147907343911356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220270 + }, + { + "epoch": 1.0683181917933784, + "grad_norm": 1.0994037893397035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220280 + }, + { + "epoch": 1.0683666899862143, + "grad_norm": 9.79322294369922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220290 + }, + { + "epoch": 1.0684151881790505, + "grad_norm": 1.0338427500755643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220300 + }, + { + "epoch": 1.0684636863718866, + "grad_norm": 1.0460329349371023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220310 + }, + { + "epoch": 1.0685121845647227, + "grad_norm": 1.0148036153623252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220320 + }, + { + "epoch": 1.0685606827575587, + "grad_norm": 1.046800775839074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220330 + }, + { + "epoch": 1.0686091809503948, + "grad_norm": 9.784919257072033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220340 + }, + { + "epoch": 1.068657679143231, + "grad_norm": 9.955729183275253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220350 + }, + { + "epoch": 1.068706177336067, + "grad_norm": 1.0309495337423868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220360 + }, + { + "epoch": 1.068754675528903, + "grad_norm": 9.74497652350692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220370 + }, + { + "epoch": 1.0688031737217392, + "grad_norm": 1.0042365374829387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220380 + }, + { + "epoch": 1.0688516719145753, + "grad_norm": 8.966835025603359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220390 + }, + { + "epoch": 1.0689001701074115, + "grad_norm": 9.337105666418211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220400 + }, + { + "epoch": 1.0689486683002474, + "grad_norm": 2.1042130811110837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220410 + }, + { + "epoch": 1.0689971664930835, + "grad_norm": 9.46697525705531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220420 + }, + { + "epoch": 1.0690456646859197, + "grad_norm": 9.154341000794375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220430 + }, + { + "epoch": 1.0690941628787558, + "grad_norm": 8.651712164464698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220440 + }, + { + "epoch": 1.0691426610715917, + "grad_norm": 8.689830792718567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220450 + }, + { + "epoch": 1.0691911592644279, + "grad_norm": 9.028671001942712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220460 + }, + { + "epoch": 1.069239657457264, + "grad_norm": 1.2509448197306483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220470 + }, + { + "epoch": 1.0692881556501002, + "grad_norm": 8.755013141126256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220480 + }, + { + "epoch": 1.069336653842936, + "grad_norm": 7.427152013406157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220490 + }, + { + "epoch": 1.0693851520357722, + "grad_norm": 8.89375485257915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220500 + }, + { + "epoch": 1.0694336502286084, + "grad_norm": 8.944056162363268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220510 + }, + { + "epoch": 1.0694821484214445, + "grad_norm": 9.326149665866978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220520 + }, + { + "epoch": 1.0695306466142804, + "grad_norm": 7.728083915026218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220530 + }, + { + "epoch": 1.0695791448071166, + "grad_norm": 7.094679972396989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220540 + }, + { + "epoch": 1.0696276429999527, + "grad_norm": 8.552112831239356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220550 + }, + { + "epoch": 1.0696761411927889, + "grad_norm": 8.488631237923983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220560 + }, + { + "epoch": 1.0697246393856248, + "grad_norm": 8.006249458958337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220570 + }, + { + "epoch": 1.069773137578461, + "grad_norm": 8.016727974791138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220580 + }, + { + "epoch": 1.069821635771297, + "grad_norm": 7.275617690538638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220590 + }, + { + "epoch": 1.0698701339641332, + "grad_norm": 8.176087931133225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220600 + }, + { + "epoch": 1.0699186321569694, + "grad_norm": 7.819815550647036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220610 + }, + { + "epoch": 1.0699671303498053, + "grad_norm": 7.615191748300276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220620 + }, + { + "epoch": 1.0700156285426414, + "grad_norm": 7.777164228173206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220630 + }, + { + "epoch": 1.0700641267354776, + "grad_norm": 7.1432401682614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220640 + }, + { + "epoch": 1.0701126249283135, + "grad_norm": 7.641541515113204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220650 + }, + { + "epoch": 1.0701611231211496, + "grad_norm": 7.776974371154211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220660 + }, + { + "epoch": 1.0702096213139858, + "grad_norm": 7.309430429813801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220670 + }, + { + "epoch": 1.070258119506822, + "grad_norm": 7.661162158001389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220680 + }, + { + "epoch": 1.070306617699658, + "grad_norm": 6.670152856713685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220690 + }, + { + "epoch": 1.070355115892494, + "grad_norm": 7.594808266730979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220700 + }, + { + "epoch": 1.0704036140853301, + "grad_norm": 7.536815473940806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220710 + }, + { + "epoch": 1.0704521122781663, + "grad_norm": 1.7180576605824172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220720 + }, + { + "epoch": 1.0705006104710024, + "grad_norm": 6.512002528324956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220730 + }, + { + "epoch": 1.0705491086638383, + "grad_norm": 6.179474212331115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220740 + }, + { + "epoch": 1.0705976068566745, + "grad_norm": 6.840164701316098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220750 + }, + { + "epoch": 1.0706461050495106, + "grad_norm": 7.001271455919778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220760 + }, + { + "epoch": 1.0706946032423468, + "grad_norm": 7.963859047777078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220770 + }, + { + "epoch": 1.0707431014351827, + "grad_norm": 5.939708671576227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220780 + }, + { + "epoch": 1.0707915996280188, + "grad_norm": 5.807226557408285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220790 + }, + { + "epoch": 1.070840097820855, + "grad_norm": 7.047329404485936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220800 + }, + { + "epoch": 1.0708885960136911, + "grad_norm": 6.705395776407386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220810 + }, + { + "epoch": 1.070937094206527, + "grad_norm": 7.319005135286716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220820 + }, + { + "epoch": 1.0709855923993632, + "grad_norm": 6.794244313823583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220830 + }, + { + "epoch": 1.0710340905921993, + "grad_norm": 5.407567869042396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220840 + }, + { + "epoch": 1.0710825887850355, + "grad_norm": 2.5278150133090094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220850 + }, + { + "epoch": 1.0711310869778714, + "grad_norm": 6.68586949359451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220860 + }, + { + "epoch": 1.0711795851707075, + "grad_norm": 7.240824970722315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220870 + }, + { + "epoch": 1.0712280833635437, + "grad_norm": 8.305677852149529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220880 + }, + { + "epoch": 1.0712765815563798, + "grad_norm": 5.563670129049569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220890 + }, + { + "epoch": 1.0713250797492158, + "grad_norm": 6.382077799571562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220900 + }, + { + "epoch": 1.071373577942052, + "grad_norm": 6.070841891414602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220910 + }, + { + "epoch": 1.071422076134888, + "grad_norm": 6.001363885843602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220920 + }, + { + "epoch": 1.0714705743277242, + "grad_norm": 5.926466997152602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220930 + }, + { + "epoch": 1.0715190725205601, + "grad_norm": 4.871677106166317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220940 + }, + { + "epoch": 1.0715675707133963, + "grad_norm": 5.755554752795433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220950 + }, + { + "epoch": 1.0716160689062324, + "grad_norm": 6.07938204666425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220960 + }, + { + "epoch": 1.0716645670990685, + "grad_norm": 5.920953753957292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220970 + }, + { + "epoch": 1.0717130652919045, + "grad_norm": 6.002705390528718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220980 + }, + { + "epoch": 1.0717615634847406, + "grad_norm": 6.160861403259332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 220990 + }, + { + "epoch": 1.0718100616775768, + "grad_norm": 5.535636091735796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221000 + }, + { + "epoch": 1.071858559870413, + "grad_norm": 6.224416893019225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221010 + }, + { + "epoch": 1.0719070580632488, + "grad_norm": 5.70717929804232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221020 + }, + { + "epoch": 1.071955556256085, + "grad_norm": 5.104334377392661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221030 + }, + { + "epoch": 1.072004054448921, + "grad_norm": 4.732279137442674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221040 + }, + { + "epoch": 1.0720525526417573, + "grad_norm": 5.420207571660285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221050 + }, + { + "epoch": 1.0721010508345932, + "grad_norm": 5.667207574333588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221060 + }, + { + "epoch": 1.0721495490274293, + "grad_norm": 5.421775881586655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221070 + }, + { + "epoch": 1.0721980472202655, + "grad_norm": 5.358634780350258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221080 + }, + { + "epoch": 1.0722465454131016, + "grad_norm": 4.600833563017659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221090 + }, + { + "epoch": 1.0722950436059375, + "grad_norm": 6.116475788076059e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221100 + }, + { + "epoch": 1.0723435417987737, + "grad_norm": 5.145043928678206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221110 + }, + { + "epoch": 1.0723920399916098, + "grad_norm": 5.286012196847878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221120 + }, + { + "epoch": 1.072440538184446, + "grad_norm": 4.539446081253118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221130 + }, + { + "epoch": 1.072489036377282, + "grad_norm": 4.5401364445751824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221140 + }, + { + "epoch": 1.072537534570118, + "grad_norm": 4.874657975051377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221150 + }, + { + "epoch": 1.0725860327629542, + "grad_norm": 6.452344791796349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221160 + }, + { + "epoch": 1.0726345309557903, + "grad_norm": 4.907254833597108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221170 + }, + { + "epoch": 1.0726830291486262, + "grad_norm": 5.421927653515013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221180 + }, + { + "epoch": 1.0727315273414624, + "grad_norm": 3.935912218366866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221190 + }, + { + "epoch": 1.0727800255342985, + "grad_norm": 5.176878516977013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221200 + }, + { + "epoch": 1.0728285237271347, + "grad_norm": 4.998005920242576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221210 + }, + { + "epoch": 1.0728770219199708, + "grad_norm": 4.626764962267771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221220 + }, + { + "epoch": 1.0729255201128067, + "grad_norm": 4.588880244682514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221230 + }, + { + "epoch": 1.0729740183056429, + "grad_norm": 3.963942276641319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221240 + }, + { + "epoch": 1.073022516498479, + "grad_norm": 4.723670201656205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221250 + }, + { + "epoch": 1.0730710146913152, + "grad_norm": 4.899769123767328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221260 + }, + { + "epoch": 1.073119512884151, + "grad_norm": 8.222835390370165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221270 + }, + { + "epoch": 1.0731680110769872, + "grad_norm": 4.408433653679822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221280 + }, + { + "epoch": 1.0732165092698234, + "grad_norm": 3.6205238984621246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221290 + }, + { + "epoch": 1.0732650074626595, + "grad_norm": 7.808530995134788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221300 + }, + { + "epoch": 1.0733135056554954, + "grad_norm": 4.4917220520801493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221310 + }, + { + "epoch": 1.0733620038483316, + "grad_norm": 4.284696899503615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221320 + }, + { + "epoch": 1.0734105020411677, + "grad_norm": 4.4248429276194656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221330 + }, + { + "epoch": 1.0734590002340039, + "grad_norm": 3.4453736930117884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221340 + }, + { + "epoch": 1.0735074984268398, + "grad_norm": 4.478647497307975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221350 + }, + { + "epoch": 1.073555996619676, + "grad_norm": 4.210853035147011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221360 + }, + { + "epoch": 1.073604494812512, + "grad_norm": 4.3597776766546303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221370 + }, + { + "epoch": 1.0736529930053482, + "grad_norm": 3.8100378674243984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221380 + }, + { + "epoch": 1.0737014911981841, + "grad_norm": 3.502155152546038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221390 + }, + { + "epoch": 1.0737499893910203, + "grad_norm": 4.1962192653954844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221400 + }, + { + "epoch": 1.0737984875838564, + "grad_norm": 4.339570693900896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221410 + }, + { + "epoch": 1.0738469857766926, + "grad_norm": 3.990198251813126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221420 + }, + { + "epoch": 1.0738954839695285, + "grad_norm": 4.0023789438237145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221430 + }, + { + "epoch": 1.0739439821623646, + "grad_norm": 3.208203054327896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221440 + }, + { + "epoch": 1.0739924803552008, + "grad_norm": 4.443695615918841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221450 + }, + { + "epoch": 1.074040978548037, + "grad_norm": 4.053130737702304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221460 + }, + { + "epoch": 1.0740894767408729, + "grad_norm": 3.912610395673255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221470 + }, + { + "epoch": 1.074137974933709, + "grad_norm": 4.366063706129353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221480 + }, + { + "epoch": 1.0741864731265451, + "grad_norm": 3.0658080163448176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221490 + }, + { + "epoch": 1.0742349713193813, + "grad_norm": 4.2699696223280625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221500 + }, + { + "epoch": 1.0742834695122172, + "grad_norm": 4.031175251384411e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221510 + }, + { + "epoch": 1.0743319677050533, + "grad_norm": 3.814530202816968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221520 + }, + { + "epoch": 1.0743804658978895, + "grad_norm": 3.725618284988741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221530 + }, + { + "epoch": 1.0744289640907256, + "grad_norm": 3.2534768479308696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221540 + }, + { + "epoch": 1.0744774622835616, + "grad_norm": 3.694273686960514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221550 + }, + { + "epoch": 1.0745259604763977, + "grad_norm": 4.410618430483737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221560 + }, + { + "epoch": 1.0745744586692338, + "grad_norm": 3.753428643449297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221570 + }, + { + "epoch": 1.07462295686207, + "grad_norm": 3.7228684846013493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221580 + }, + { + "epoch": 1.0746714550549061, + "grad_norm": 3.183196213285555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221590 + }, + { + "epoch": 1.074719953247742, + "grad_norm": 4.787621605828463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221600 + }, + { + "epoch": 1.0747684514405782, + "grad_norm": 3.6161489447295025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221610 + }, + { + "epoch": 1.0748169496334143, + "grad_norm": 4.1831862063190783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221620 + }, + { + "epoch": 1.0748654478262503, + "grad_norm": 3.4267571891177795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221630 + }, + { + "epoch": 1.0749139460190864, + "grad_norm": 3.389633889128163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221640 + }, + { + "epoch": 1.0749624442119226, + "grad_norm": 3.5415487786849553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221650 + }, + { + "epoch": 1.0750109424047587, + "grad_norm": 3.798902810103755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221660 + }, + { + "epoch": 1.0750594405975948, + "grad_norm": 3.4010602689704683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221670 + }, + { + "epoch": 1.0751079387904308, + "grad_norm": 3.2994822163345816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221680 + }, + { + "epoch": 1.075156436983267, + "grad_norm": 3.6546927617564506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221690 + }, + { + "epoch": 1.075204935176103, + "grad_norm": 5.542937060454278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221700 + }, + { + "epoch": 1.0752534333689392, + "grad_norm": 3.416448919324466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221710 + }, + { + "epoch": 1.0753019315617751, + "grad_norm": 3.574402285266842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221720 + }, + { + "epoch": 1.0753504297546113, + "grad_norm": 3.3500612062198343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221730 + }, + { + "epoch": 1.0753989279474474, + "grad_norm": 3.2376891567764687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221740 + }, + { + "epoch": 1.0754474261402835, + "grad_norm": 3.5134843301420915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221750 + }, + { + "epoch": 1.0754959243331195, + "grad_norm": 3.6009791415381187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221760 + }, + { + "epoch": 1.0755444225259556, + "grad_norm": 3.3579865998945024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221770 + }, + { + "epoch": 1.0755929207187918, + "grad_norm": 4.808088647223485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221780 + }, + { + "epoch": 1.075641418911628, + "grad_norm": 3.413525462292455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221790 + }, + { + "epoch": 1.0756899171044638, + "grad_norm": 3.850484517897712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221800 + }, + { + "epoch": 1.0757384152973, + "grad_norm": 2.9736355600107345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221810 + }, + { + "epoch": 1.075786913490136, + "grad_norm": 3.11430426336301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221820 + }, + { + "epoch": 1.0758354116829723, + "grad_norm": 3.018487575445761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221830 + }, + { + "epoch": 1.0758839098758082, + "grad_norm": 3.321125916500023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221840 + }, + { + "epoch": 1.0759324080686443, + "grad_norm": 3.3825102718765265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221850 + }, + { + "epoch": 1.0759809062614805, + "grad_norm": 3.2340068401026656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221860 + }, + { + "epoch": 1.0760294044543166, + "grad_norm": 3.630832168255438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221870 + }, + { + "epoch": 1.0760779026471525, + "grad_norm": 3.0682917895319406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221880 + }, + { + "epoch": 1.0761264008399887, + "grad_norm": 2.449003773108416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221890 + }, + { + "epoch": 1.0761748990328248, + "grad_norm": 3.415770777337457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221900 + }, + { + "epoch": 1.076223397225661, + "grad_norm": 3.099484615631809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221910 + }, + { + "epoch": 1.0762718954184969, + "grad_norm": 3.18087671757894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221920 + }, + { + "epoch": 1.076320393611333, + "grad_norm": 3.04304990095261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221930 + }, + { + "epoch": 1.0763688918041692, + "grad_norm": 2.4888169036785257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221940 + }, + { + "epoch": 1.0764173899970053, + "grad_norm": 3.191256041645829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221950 + }, + { + "epoch": 1.0764658881898412, + "grad_norm": 4.806994979844603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221960 + }, + { + "epoch": 1.0765143863826774, + "grad_norm": 3.172448543864448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221970 + }, + { + "epoch": 1.0765628845755135, + "grad_norm": 3.2966627827590855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221980 + }, + { + "epoch": 1.0766113827683497, + "grad_norm": 3.2540179972784244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 221990 + }, + { + "epoch": 1.0766598809611856, + "grad_norm": 2.8660801376645395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222000 + }, + { + "epoch": 1.0767083791540217, + "grad_norm": 3.007214957051474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222010 + }, + { + "epoch": 1.0767568773468579, + "grad_norm": 2.9140019819351437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222020 + }, + { + "epoch": 1.076805375539694, + "grad_norm": 2.7046652917306346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222030 + }, + { + "epoch": 1.07685387373253, + "grad_norm": 2.8645365546253743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222040 + }, + { + "epoch": 1.076902371925366, + "grad_norm": 3.64269283181784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222050 + }, + { + "epoch": 1.0769508701182022, + "grad_norm": 4.0883759311327594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222060 + }, + { + "epoch": 1.0769993683110384, + "grad_norm": 2.7847300998473656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222070 + }, + { + "epoch": 1.0770478665038743, + "grad_norm": 2.6907000005849113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222080 + }, + { + "epoch": 1.0770963646967104, + "grad_norm": 2.4882072580112435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222090 + }, + { + "epoch": 1.0771448628895466, + "grad_norm": 2.831055496699264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222100 + }, + { + "epoch": 1.0771933610823827, + "grad_norm": 2.711420279410959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222110 + }, + { + "epoch": 1.0772418592752189, + "grad_norm": 3.303649975805456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222120 + }, + { + "epoch": 1.0772903574680548, + "grad_norm": 2.9073132168377924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222130 + }, + { + "epoch": 1.077338855660891, + "grad_norm": 2.044822394964285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222140 + }, + { + "epoch": 1.077387353853727, + "grad_norm": 0.00030599013553000987, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222150 + }, + { + "epoch": 1.077435852046563, + "grad_norm": 4.166789040027652e-06, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 222160 + }, + { + "epoch": 1.0774843502393991, + "grad_norm": 0.03942210227251053, + "learning_rate": 0.0002, + "loss": 0.0034, + "step": 222170 + }, + { + "epoch": 1.0775328484322353, + "grad_norm": 0.0022171405144035816, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 222180 + }, + { + "epoch": 1.0775813466250714, + "grad_norm": 0.00029710010858252645, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222190 + }, + { + "epoch": 1.0776298448179076, + "grad_norm": 0.0002574262907728553, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 222200 + }, + { + "epoch": 1.0776783430107435, + "grad_norm": 4.54651344625745e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 222210 + }, + { + "epoch": 1.0777268412035796, + "grad_norm": 4.164616984780878e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222220 + }, + { + "epoch": 1.0777753393964158, + "grad_norm": 3.783087959163822e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222230 + }, + { + "epoch": 1.077823837589252, + "grad_norm": 0.09992565959692001, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 222240 + }, + { + "epoch": 1.0778723357820879, + "grad_norm": 0.004522096365690231, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 222250 + }, + { + "epoch": 1.077920833974924, + "grad_norm": 2.6688248908612877e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 222260 + }, + { + "epoch": 1.0779693321677601, + "grad_norm": 2.3331487682298757e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222270 + }, + { + "epoch": 1.0780178303605963, + "grad_norm": 1.4011128769197967e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222280 + }, + { + "epoch": 1.0780663285534322, + "grad_norm": 1.3641541954712011e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222290 + }, + { + "epoch": 1.0781148267462684, + "grad_norm": 1.7649050278123468e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222300 + }, + { + "epoch": 1.0781633249391045, + "grad_norm": 1.0359554835304152e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222310 + }, + { + "epoch": 1.0782118231319406, + "grad_norm": 1.2137626072217245e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222320 + }, + { + "epoch": 1.0782603213247766, + "grad_norm": 8.972290743258782e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222330 + }, + { + "epoch": 1.0783088195176127, + "grad_norm": 7.124267085600877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222340 + }, + { + "epoch": 1.0783573177104488, + "grad_norm": 1.1477694897621404e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222350 + }, + { + "epoch": 1.078405815903285, + "grad_norm": 7.491678388760192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222360 + }, + { + "epoch": 1.078454314096121, + "grad_norm": 7.4762410804396495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222370 + }, + { + "epoch": 1.078502812288957, + "grad_norm": 5.978063200018369e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222380 + }, + { + "epoch": 1.0785513104817932, + "grad_norm": 5.483756012836238e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222390 + }, + { + "epoch": 1.0785998086746293, + "grad_norm": 6.364679393300321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222400 + }, + { + "epoch": 1.0786483068674653, + "grad_norm": 5.667909135809168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222410 + }, + { + "epoch": 1.0786968050603014, + "grad_norm": 5.666091055900324e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222420 + }, + { + "epoch": 1.0787453032531376, + "grad_norm": 5.6047761063382495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222430 + }, + { + "epoch": 1.0787938014459737, + "grad_norm": 4.8832662287168205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222440 + }, + { + "epoch": 1.0788422996388096, + "grad_norm": 5.121897629578598e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222450 + }, + { + "epoch": 1.0788907978316458, + "grad_norm": 5.29219596501207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222460 + }, + { + "epoch": 1.078939296024482, + "grad_norm": 1.6796126146800816e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222470 + }, + { + "epoch": 1.078987794217318, + "grad_norm": 4.564574282994727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222480 + }, + { + "epoch": 1.079036292410154, + "grad_norm": 4.494327640713891e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222490 + }, + { + "epoch": 1.0790847906029901, + "grad_norm": 4.54146584161208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222500 + }, + { + "epoch": 1.0791332887958263, + "grad_norm": 4.631280717148911e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222510 + }, + { + "epoch": 1.0791817869886624, + "grad_norm": 5.957233497611014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222520 + }, + { + "epoch": 1.0792302851814983, + "grad_norm": 4.3391637518652715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222530 + }, + { + "epoch": 1.0792787833743345, + "grad_norm": 4.174551577307284e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222540 + }, + { + "epoch": 1.0793272815671706, + "grad_norm": 4.096435077372007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222550 + }, + { + "epoch": 1.0793757797600068, + "grad_norm": 4.212520252622198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222560 + }, + { + "epoch": 1.0794242779528427, + "grad_norm": 3.9164906411315314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222570 + }, + { + "epoch": 1.0794727761456788, + "grad_norm": 3.705838707901421e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222580 + }, + { + "epoch": 1.079521274338515, + "grad_norm": 3.761160542126163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222590 + }, + { + "epoch": 1.0795697725313511, + "grad_norm": 3.8014136407582555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222600 + }, + { + "epoch": 1.079618270724187, + "grad_norm": 3.830073183053173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222610 + }, + { + "epoch": 1.0796667689170232, + "grad_norm": 3.7082793369336287e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222620 + }, + { + "epoch": 1.0797152671098593, + "grad_norm": 3.6476608329394367e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222630 + }, + { + "epoch": 1.0797637653026955, + "grad_norm": 3.467451733740745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222640 + }, + { + "epoch": 1.0798122634955316, + "grad_norm": 3.3730816539900843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222650 + }, + { + "epoch": 1.0798607616883675, + "grad_norm": 3.6052481391379843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222660 + }, + { + "epoch": 1.0799092598812037, + "grad_norm": 9.956575922842603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222670 + }, + { + "epoch": 1.0799577580740398, + "grad_norm": 3.1984250199457165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222680 + }, + { + "epoch": 1.0800062562668757, + "grad_norm": 3.2097591429192107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222690 + }, + { + "epoch": 1.0800547544597119, + "grad_norm": 3.3285373319813516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222700 + }, + { + "epoch": 1.080103252652548, + "grad_norm": 3.4413685625622747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222710 + }, + { + "epoch": 1.0801517508453842, + "grad_norm": 3.2204886792897014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222720 + }, + { + "epoch": 1.0802002490382203, + "grad_norm": 3.0750052246730775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222730 + }, + { + "epoch": 1.0802487472310562, + "grad_norm": 3.047553946089465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222740 + }, + { + "epoch": 1.0802972454238924, + "grad_norm": 3.109641056653345e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222750 + }, + { + "epoch": 1.0803457436167285, + "grad_norm": 3.0167429940775037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222760 + }, + { + "epoch": 1.0803942418095647, + "grad_norm": 3.0649161999463104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222770 + }, + { + "epoch": 1.0804427400024006, + "grad_norm": 2.9621653538924875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222780 + }, + { + "epoch": 1.0804912381952367, + "grad_norm": 2.869958279916318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222790 + }, + { + "epoch": 1.0805397363880729, + "grad_norm": 2.9933914902358083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222800 + }, + { + "epoch": 1.080588234580909, + "grad_norm": 2.9945072128612082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222810 + }, + { + "epoch": 1.080636732773745, + "grad_norm": 2.8458412089094054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222820 + }, + { + "epoch": 1.080685230966581, + "grad_norm": 2.7692581170413177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222830 + }, + { + "epoch": 1.0807337291594172, + "grad_norm": 2.7299424800730776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222840 + }, + { + "epoch": 1.0807822273522534, + "grad_norm": 2.8521096737677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222850 + }, + { + "epoch": 1.0808307255450893, + "grad_norm": 2.7586766009335406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222860 + }, + { + "epoch": 1.0808792237379254, + "grad_norm": 2.663056875462644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222870 + }, + { + "epoch": 1.0809277219307616, + "grad_norm": 2.791396354950848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222880 + }, + { + "epoch": 1.0809762201235977, + "grad_norm": 2.5820852442848263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222890 + }, + { + "epoch": 1.0810247183164337, + "grad_norm": 2.654765921761282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222900 + }, + { + "epoch": 1.0810732165092698, + "grad_norm": 2.6176535357080866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222910 + }, + { + "epoch": 1.081121714702106, + "grad_norm": 2.5254485080949962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222920 + }, + { + "epoch": 1.081170212894942, + "grad_norm": 2.4541777747799642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222930 + }, + { + "epoch": 1.081218711087778, + "grad_norm": 2.4667660909472033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222940 + }, + { + "epoch": 1.0812672092806141, + "grad_norm": 2.445171503495658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222950 + }, + { + "epoch": 1.0813157074734503, + "grad_norm": 2.4828509594954085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222960 + }, + { + "epoch": 1.0813642056662864, + "grad_norm": 2.4697005756024737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222970 + }, + { + "epoch": 1.0814127038591224, + "grad_norm": 2.4721991849219194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222980 + }, + { + "epoch": 1.0814612020519585, + "grad_norm": 2.3583395432069665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 222990 + }, + { + "epoch": 1.0815097002447946, + "grad_norm": 2.4046557882684283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223000 + }, + { + "epoch": 1.0815581984376308, + "grad_norm": 2.413696165604051e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223010 + }, + { + "epoch": 1.0816066966304667, + "grad_norm": 2.4274943370983237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223020 + }, + { + "epoch": 1.0816551948233029, + "grad_norm": 2.3497618713008706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223030 + }, + { + "epoch": 1.081703693016139, + "grad_norm": 2.317485268577002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223040 + }, + { + "epoch": 1.0817521912089751, + "grad_norm": 2.2562026060768403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223050 + }, + { + "epoch": 1.081800689401811, + "grad_norm": 2.283344201714499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223060 + }, + { + "epoch": 1.0818491875946472, + "grad_norm": 2.1622229269269155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223070 + }, + { + "epoch": 1.0818976857874834, + "grad_norm": 2.141684490197804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223080 + }, + { + "epoch": 1.0819461839803195, + "grad_norm": 2.145714688595035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223090 + }, + { + "epoch": 1.0819946821731554, + "grad_norm": 2.260224846395431e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223100 + }, + { + "epoch": 1.0820431803659916, + "grad_norm": 2.1157259197934764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223110 + }, + { + "epoch": 1.0820916785588277, + "grad_norm": 2.622067540869466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223120 + }, + { + "epoch": 1.0821401767516639, + "grad_norm": 2.1048922462796327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223130 + }, + { + "epoch": 1.0821886749444998, + "grad_norm": 2.139703838111018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223140 + }, + { + "epoch": 1.082237173137336, + "grad_norm": 2.040274694081745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223150 + }, + { + "epoch": 1.082285671330172, + "grad_norm": 2.021314458033885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223160 + }, + { + "epoch": 1.0823341695230082, + "grad_norm": 1.9590363535826327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223170 + }, + { + "epoch": 1.0823826677158443, + "grad_norm": 1.9809531295322813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223180 + }, + { + "epoch": 1.0824311659086803, + "grad_norm": 2.285272557855933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223190 + }, + { + "epoch": 1.0824796641015164, + "grad_norm": 1.926074901348329e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223200 + }, + { + "epoch": 1.0825281622943526, + "grad_norm": 1.9918675207009073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223210 + }, + { + "epoch": 1.0825766604871885, + "grad_norm": 1.9456472273304826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223220 + }, + { + "epoch": 1.0826251586800246, + "grad_norm": 1.8740648783932556e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223230 + }, + { + "epoch": 1.0826736568728608, + "grad_norm": 2.004631596719264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223240 + }, + { + "epoch": 1.082722155065697, + "grad_norm": 1.7884352701003081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223250 + }, + { + "epoch": 1.082770653258533, + "grad_norm": 1.882468382063962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223260 + }, + { + "epoch": 1.082819151451369, + "grad_norm": 1.8584150893730111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223270 + }, + { + "epoch": 1.0828676496442051, + "grad_norm": 1.787555675036856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223280 + }, + { + "epoch": 1.0829161478370413, + "grad_norm": 1.8829263126463047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223290 + }, + { + "epoch": 1.0829646460298774, + "grad_norm": 1.6437987824247102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223300 + }, + { + "epoch": 1.0830131442227133, + "grad_norm": 1.6277138001896674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223310 + }, + { + "epoch": 1.0830616424155495, + "grad_norm": 1.5730797713331413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223320 + }, + { + "epoch": 1.0831101406083856, + "grad_norm": 1.6102554809549474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223330 + }, + { + "epoch": 1.0831586388012218, + "grad_norm": 1.7642345255808323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223340 + }, + { + "epoch": 1.0832071369940577, + "grad_norm": 1.5315108612412587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223350 + }, + { + "epoch": 1.0832556351868938, + "grad_norm": 1.525493189546978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223360 + }, + { + "epoch": 1.08330413337973, + "grad_norm": 1.554849291096616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223370 + }, + { + "epoch": 1.0833526315725661, + "grad_norm": 1.5033997442515101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223380 + }, + { + "epoch": 1.083401129765402, + "grad_norm": 1.7071038200811017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223390 + }, + { + "epoch": 1.0834496279582382, + "grad_norm": 1.418212264070462e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223400 + }, + { + "epoch": 1.0834981261510743, + "grad_norm": 1.4182884342517355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223410 + }, + { + "epoch": 1.0835466243439105, + "grad_norm": 2.139864591299556e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223420 + }, + { + "epoch": 1.0835951225367464, + "grad_norm": 1.3980295534565812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223430 + }, + { + "epoch": 1.0836436207295825, + "grad_norm": 1.6565445548621938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223440 + }, + { + "epoch": 1.0836921189224187, + "grad_norm": 1.2887097682323656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223450 + }, + { + "epoch": 1.0837406171152548, + "grad_norm": 1.4731582496096962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223460 + }, + { + "epoch": 1.0837891153080907, + "grad_norm": 1.3173709021430113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223470 + }, + { + "epoch": 1.0838376135009269, + "grad_norm": 1.265683408746554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223480 + }, + { + "epoch": 1.083886111693763, + "grad_norm": 1.4924369224900147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223490 + }, + { + "epoch": 1.0839346098865992, + "grad_norm": 1.2241063132023555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223500 + }, + { + "epoch": 1.083983108079435, + "grad_norm": 1.2024810303046252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223510 + }, + { + "epoch": 1.0840316062722712, + "grad_norm": 1.2209347914904356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223520 + }, + { + "epoch": 1.0840801044651074, + "grad_norm": 1.176501086774806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223530 + }, + { + "epoch": 1.0841286026579435, + "grad_norm": 1.449796059205255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223540 + }, + { + "epoch": 1.0841771008507795, + "grad_norm": 1.1527920378284762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223550 + }, + { + "epoch": 1.0842255990436156, + "grad_norm": 1.1457530035841046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223560 + }, + { + "epoch": 1.0842740972364517, + "grad_norm": 1.1587792414502474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223570 + }, + { + "epoch": 1.0843225954292879, + "grad_norm": 1.0933596286122338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223580 + }, + { + "epoch": 1.0843710936221238, + "grad_norm": 1.3491611525751068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223590 + }, + { + "epoch": 1.08441959181496, + "grad_norm": 1.0655330697773024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223600 + }, + { + "epoch": 1.084468090007796, + "grad_norm": 1.1216596931262757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223610 + }, + { + "epoch": 1.0845165882006322, + "grad_norm": 1.0497564062461606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223620 + }, + { + "epoch": 1.0845650863934684, + "grad_norm": 1.1275952829237212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223630 + }, + { + "epoch": 1.0846135845863043, + "grad_norm": 1.2489146001826157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223640 + }, + { + "epoch": 1.0846620827791404, + "grad_norm": 1.0347280294809025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223650 + }, + { + "epoch": 1.0847105809719766, + "grad_norm": 1.0635462786012795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223660 + }, + { + "epoch": 1.0847590791648125, + "grad_norm": 1.0567434856056934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223670 + }, + { + "epoch": 1.0848075773576487, + "grad_norm": 1.4170424265103065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223680 + }, + { + "epoch": 1.0848560755504848, + "grad_norm": 1.1667380022117868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223690 + }, + { + "epoch": 1.084904573743321, + "grad_norm": 9.286145541409496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223700 + }, + { + "epoch": 1.084953071936157, + "grad_norm": 9.82728579401737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223710 + }, + { + "epoch": 1.085001570128993, + "grad_norm": 9.485984264756553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223720 + }, + { + "epoch": 1.0850500683218292, + "grad_norm": 9.017920206133567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223730 + }, + { + "epoch": 1.0850985665146653, + "grad_norm": 1.061395551005262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223740 + }, + { + "epoch": 1.0851470647075012, + "grad_norm": 8.792101766630367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223750 + }, + { + "epoch": 1.0851955629003374, + "grad_norm": 8.323211204697145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223760 + }, + { + "epoch": 1.0852440610931735, + "grad_norm": 8.048021982176579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223770 + }, + { + "epoch": 1.0852925592860097, + "grad_norm": 8.620124845037935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223780 + }, + { + "epoch": 1.0853410574788458, + "grad_norm": 1.0074502370116534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223790 + }, + { + "epoch": 1.0853895556716817, + "grad_norm": 8.122837584778608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223800 + }, + { + "epoch": 1.0854380538645179, + "grad_norm": 9.645488717069384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223810 + }, + { + "epoch": 1.085486552057354, + "grad_norm": 8.102591095848766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223820 + }, + { + "epoch": 1.0855350502501901, + "grad_norm": 8.460872322757496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223830 + }, + { + "epoch": 1.085583548443026, + "grad_norm": 8.906307016331994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223840 + }, + { + "epoch": 1.0856320466358622, + "grad_norm": 8.226676300182589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223850 + }, + { + "epoch": 1.0856805448286984, + "grad_norm": 7.967890951476875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223860 + }, + { + "epoch": 1.0857290430215345, + "grad_norm": 7.715575520705897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223870 + }, + { + "epoch": 1.0857775412143704, + "grad_norm": 8.112342015920149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223880 + }, + { + "epoch": 1.0858260394072066, + "grad_norm": 9.141931514022872e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223890 + }, + { + "epoch": 1.0858745376000427, + "grad_norm": 7.317765380321362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223900 + }, + { + "epoch": 1.0859230357928789, + "grad_norm": 8.073681101450347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223910 + }, + { + "epoch": 1.0859715339857148, + "grad_norm": 6.995962280598178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223920 + }, + { + "epoch": 1.086020032178551, + "grad_norm": 7.289116297215514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223930 + }, + { + "epoch": 1.086068530371387, + "grad_norm": 8.306457175422111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223940 + }, + { + "epoch": 1.0861170285642232, + "grad_norm": 6.979416298236174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223950 + }, + { + "epoch": 1.0861655267570591, + "grad_norm": 7.449988288499299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223960 + }, + { + "epoch": 1.0862140249498953, + "grad_norm": 6.630035045418481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223970 + }, + { + "epoch": 1.0862625231427314, + "grad_norm": 6.762325597264862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223980 + }, + { + "epoch": 1.0863110213355676, + "grad_norm": 7.458175446117821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 223990 + }, + { + "epoch": 1.0863595195284035, + "grad_norm": 7.00771863648697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224000 + }, + { + "epoch": 1.0864080177212396, + "grad_norm": 6.529846245939552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224010 + }, + { + "epoch": 1.0864565159140758, + "grad_norm": 6.18803596807993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224020 + }, + { + "epoch": 1.086505014106912, + "grad_norm": 6.301473263192747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224030 + }, + { + "epoch": 1.0865535122997478, + "grad_norm": 7.128537617973052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224040 + }, + { + "epoch": 1.086602010492584, + "grad_norm": 6.674160317743372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224050 + }, + { + "epoch": 1.0866505086854201, + "grad_norm": 7.40515133657027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224060 + }, + { + "epoch": 1.0866990068782563, + "grad_norm": 6.39915299416316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224070 + }, + { + "epoch": 1.0867475050710922, + "grad_norm": 6.318724672382814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224080 + }, + { + "epoch": 1.0867960032639283, + "grad_norm": 6.973568815737963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224090 + }, + { + "epoch": 1.0868445014567645, + "grad_norm": 6.320780698843009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224100 + }, + { + "epoch": 1.0868929996496006, + "grad_norm": 6.673297434645065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224110 + }, + { + "epoch": 1.0869414978424365, + "grad_norm": 6.068106017664832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224120 + }, + { + "epoch": 1.0869899960352727, + "grad_norm": 6.459380301748752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224130 + }, + { + "epoch": 1.0870384942281088, + "grad_norm": 6.494483386632055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224140 + }, + { + "epoch": 1.087086992420945, + "grad_norm": 5.769466042693239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224150 + }, + { + "epoch": 1.0871354906137811, + "grad_norm": 6.221995363375754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224160 + }, + { + "epoch": 1.087183988806617, + "grad_norm": 5.863382170900877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224170 + }, + { + "epoch": 1.0872324869994532, + "grad_norm": 5.620115075544163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224180 + }, + { + "epoch": 1.0872809851922893, + "grad_norm": 5.695191021004575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224190 + }, + { + "epoch": 1.0873294833851253, + "grad_norm": 5.800538929179311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224200 + }, + { + "epoch": 1.0873779815779614, + "grad_norm": 5.601177122116496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224210 + }, + { + "epoch": 1.0874264797707975, + "grad_norm": 5.6154527783292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224220 + }, + { + "epoch": 1.0874749779636337, + "grad_norm": 5.139302743373264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224230 + }, + { + "epoch": 1.0875234761564698, + "grad_norm": 5.790732302557444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224240 + }, + { + "epoch": 1.0875719743493057, + "grad_norm": 7.570527031930396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224250 + }, + { + "epoch": 1.087620472542142, + "grad_norm": 5.067246888756927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224260 + }, + { + "epoch": 1.087668970734978, + "grad_norm": 5.387026931202854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224270 + }, + { + "epoch": 1.0877174689278142, + "grad_norm": 5.126751716488798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224280 + }, + { + "epoch": 1.08776596712065, + "grad_norm": 4.951534151587111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224290 + }, + { + "epoch": 1.0878144653134862, + "grad_norm": 5.993366585244075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224300 + }, + { + "epoch": 1.0878629635063224, + "grad_norm": 5.502504905052774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224310 + }, + { + "epoch": 1.0879114616991585, + "grad_norm": 5.205928914620017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224320 + }, + { + "epoch": 1.0879599598919945, + "grad_norm": 4.977407570549985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224330 + }, + { + "epoch": 1.0880084580848306, + "grad_norm": 5.077434934719349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224340 + }, + { + "epoch": 1.0880569562776667, + "grad_norm": 5.698816494259518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224350 + }, + { + "epoch": 1.0881054544705029, + "grad_norm": 4.7438987849091063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224360 + }, + { + "epoch": 1.0881539526633388, + "grad_norm": 4.679145604313817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224370 + }, + { + "epoch": 1.088202450856175, + "grad_norm": 4.667351163334388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224380 + }, + { + "epoch": 1.088250949049011, + "grad_norm": 4.904540560346504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224390 + }, + { + "epoch": 1.0882994472418472, + "grad_norm": 5.244609155852231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224400 + }, + { + "epoch": 1.0883479454346832, + "grad_norm": 4.687079524501314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224410 + }, + { + "epoch": 1.0883964436275193, + "grad_norm": 4.652455061204819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224420 + }, + { + "epoch": 1.0884449418203554, + "grad_norm": 4.4262691289986833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224430 + }, + { + "epoch": 1.0884934400131916, + "grad_norm": 5.033390948483429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224440 + }, + { + "epoch": 1.0885419382060275, + "grad_norm": 5.007975687476574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224450 + }, + { + "epoch": 1.0885904363988637, + "grad_norm": 4.5223023903417925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224460 + }, + { + "epoch": 1.0886389345916998, + "grad_norm": 4.6973886469459103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224470 + }, + { + "epoch": 1.088687432784536, + "grad_norm": 4.661601451516617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224480 + }, + { + "epoch": 1.0887359309773719, + "grad_norm": 4.788165028912772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224490 + }, + { + "epoch": 1.088784429170208, + "grad_norm": 4.34716895370002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224500 + }, + { + "epoch": 1.0888329273630442, + "grad_norm": 4.817550234292867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224510 + }, + { + "epoch": 1.0888814255558803, + "grad_norm": 4.931048351863865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224520 + }, + { + "epoch": 1.0889299237487162, + "grad_norm": 2.7509708161232993e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224530 + }, + { + "epoch": 1.0889784219415524, + "grad_norm": 4.527311432411807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224540 + }, + { + "epoch": 1.0890269201343885, + "grad_norm": 4.470116721222439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224550 + }, + { + "epoch": 1.0890754183272247, + "grad_norm": 4.70588929601945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224560 + }, + { + "epoch": 1.0891239165200606, + "grad_norm": 4.787110583492904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224570 + }, + { + "epoch": 1.0891724147128967, + "grad_norm": 4.650226799185475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224580 + }, + { + "epoch": 1.0892209129057329, + "grad_norm": 4.686315833168919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224590 + }, + { + "epoch": 1.089269411098569, + "grad_norm": 4.4860732373308565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224600 + }, + { + "epoch": 1.089317909291405, + "grad_norm": 4.0288057334691985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224610 + }, + { + "epoch": 1.089366407484241, + "grad_norm": 3.8288263226604613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224620 + }, + { + "epoch": 1.0894149056770772, + "grad_norm": 4.0663266531737463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224630 + }, + { + "epoch": 1.0894634038699134, + "grad_norm": 4.0635961795487674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224640 + }, + { + "epoch": 1.0895119020627493, + "grad_norm": 4.5539312054643233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224650 + }, + { + "epoch": 1.0895604002555854, + "grad_norm": 4.2514923848102626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224660 + }, + { + "epoch": 1.0896088984484216, + "grad_norm": 4.1150477159135335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224670 + }, + { + "epoch": 1.0896573966412577, + "grad_norm": 3.9007082364150847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224680 + }, + { + "epoch": 1.0897058948340939, + "grad_norm": 4.231661989706481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224690 + }, + { + "epoch": 1.0897543930269298, + "grad_norm": 4.179358086275897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224700 + }, + { + "epoch": 1.089802891219766, + "grad_norm": 3.784788304983522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224710 + }, + { + "epoch": 1.089851389412602, + "grad_norm": 5.325883307705226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224720 + }, + { + "epoch": 1.089899887605438, + "grad_norm": 3.9154238606897707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224730 + }, + { + "epoch": 1.0899483857982741, + "grad_norm": 3.8070109553700604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224740 + }, + { + "epoch": 1.0899968839911103, + "grad_norm": 4.0913010934673366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224750 + }, + { + "epoch": 1.0900453821839464, + "grad_norm": 3.684965008687868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224760 + }, + { + "epoch": 1.0900938803767826, + "grad_norm": 1.083467282114725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224770 + }, + { + "epoch": 1.0901423785696185, + "grad_norm": 4.2228174379488337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224780 + }, + { + "epoch": 1.0901908767624546, + "grad_norm": 3.525921385971742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224790 + }, + { + "epoch": 1.0902393749552908, + "grad_norm": 5.441402208816726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224800 + }, + { + "epoch": 1.090287873148127, + "grad_norm": 3.7504051420000906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224810 + }, + { + "epoch": 1.0903363713409628, + "grad_norm": 3.793415714881121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224820 + }, + { + "epoch": 1.090384869533799, + "grad_norm": 3.797439944719372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224830 + }, + { + "epoch": 1.0904333677266351, + "grad_norm": 3.347653034779796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224840 + }, + { + "epoch": 1.0904818659194713, + "grad_norm": 3.5612151805253234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224850 + }, + { + "epoch": 1.0905303641123072, + "grad_norm": 3.6835740502283443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224860 + }, + { + "epoch": 1.0905788623051433, + "grad_norm": 3.4618028621480335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224870 + }, + { + "epoch": 1.0906273604979795, + "grad_norm": 3.4751761290863215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224880 + }, + { + "epoch": 1.0906758586908156, + "grad_norm": 3.7725916968156525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224890 + }, + { + "epoch": 1.0907243568836515, + "grad_norm": 0.00021150846441742033, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 224900 + }, + { + "epoch": 1.0907728550764877, + "grad_norm": 4.43468161392957e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224910 + }, + { + "epoch": 1.0908213532693238, + "grad_norm": 1.385951600241242e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224920 + }, + { + "epoch": 1.09086985146216, + "grad_norm": 9.902333658828866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224930 + }, + { + "epoch": 1.090918349654996, + "grad_norm": 1.0222779565083329e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224940 + }, + { + "epoch": 1.090966847847832, + "grad_norm": 5.997327207296621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224950 + }, + { + "epoch": 1.0910153460406682, + "grad_norm": 5.332386990630766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224960 + }, + { + "epoch": 1.0910638442335043, + "grad_norm": 5.031486580264755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224970 + }, + { + "epoch": 1.0911123424263403, + "grad_norm": 5.45481407243642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224980 + }, + { + "epoch": 1.0911608406191764, + "grad_norm": 5.975054591544904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 224990 + }, + { + "epoch": 1.0912093388120125, + "grad_norm": 4.033428467664635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225000 + }, + { + "epoch": 1.0912578370048487, + "grad_norm": 3.7288855310180224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225010 + }, + { + "epoch": 1.0913063351976846, + "grad_norm": 3.424674559937557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225020 + }, + { + "epoch": 1.0913548333905208, + "grad_norm": 3.923732947441749e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225030 + }, + { + "epoch": 1.091403331583357, + "grad_norm": 4.416324827616336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225040 + }, + { + "epoch": 1.091451829776193, + "grad_norm": 3.037788019355503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225050 + }, + { + "epoch": 1.091500327969029, + "grad_norm": 2.740015816016239e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225060 + }, + { + "epoch": 1.091548826161865, + "grad_norm": 2.710681883399957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225070 + }, + { + "epoch": 1.0915973243547012, + "grad_norm": 2.7572900762606878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225080 + }, + { + "epoch": 1.0916458225475374, + "grad_norm": 3.2961193028313573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225090 + }, + { + "epoch": 1.0916943207403733, + "grad_norm": 2.2413223632611334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225100 + }, + { + "epoch": 1.0917428189332095, + "grad_norm": 2.207347279181704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225110 + }, + { + "epoch": 1.0917913171260456, + "grad_norm": 1.9886874724761583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225120 + }, + { + "epoch": 1.0918398153188817, + "grad_norm": 2.1312284843588714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225130 + }, + { + "epoch": 1.0918883135117177, + "grad_norm": 2.799722096824553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225140 + }, + { + "epoch": 1.0919368117045538, + "grad_norm": 1.8338137124374043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225150 + }, + { + "epoch": 1.09198530989739, + "grad_norm": 1.6912007367864135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225160 + }, + { + "epoch": 1.092033808090226, + "grad_norm": 1.6238979014815413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225170 + }, + { + "epoch": 1.092082306283062, + "grad_norm": 1.877246404546895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225180 + }, + { + "epoch": 1.0921308044758982, + "grad_norm": 2.1432649646158097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225190 + }, + { + "epoch": 1.0921793026687343, + "grad_norm": 1.6082873344203108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225200 + }, + { + "epoch": 1.0922278008615705, + "grad_norm": 1.489028818468796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225210 + }, + { + "epoch": 1.0922762990544066, + "grad_norm": 1.4942837651688023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225220 + }, + { + "epoch": 1.0923247972472425, + "grad_norm": 1.5238720152410679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225230 + }, + { + "epoch": 1.0923732954400787, + "grad_norm": 1.9057557665291824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225240 + }, + { + "epoch": 1.0924217936329148, + "grad_norm": 1.4363226910063531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225250 + }, + { + "epoch": 1.0924702918257507, + "grad_norm": 1.3214194041211158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225260 + }, + { + "epoch": 1.0925187900185869, + "grad_norm": 1.3577141544374172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225270 + }, + { + "epoch": 1.092567288211423, + "grad_norm": 1.3557761349147768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225280 + }, + { + "epoch": 1.0926157864042592, + "grad_norm": 1.7028668253260548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225290 + }, + { + "epoch": 1.0926642845970953, + "grad_norm": 1.317763121733151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225300 + }, + { + "epoch": 1.0927127827899312, + "grad_norm": 1.1325194009259576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225310 + }, + { + "epoch": 1.0927612809827674, + "grad_norm": 1.1480674402264412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225320 + }, + { + "epoch": 1.0928097791756035, + "grad_norm": 1.1451024874986615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225330 + }, + { + "epoch": 1.0928582773684397, + "grad_norm": 1.6047774806793313e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225340 + }, + { + "epoch": 1.0929067755612756, + "grad_norm": 1.1279944374109618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225350 + }, + { + "epoch": 1.0929552737541117, + "grad_norm": 1.0658102382876677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225360 + }, + { + "epoch": 1.0930037719469479, + "grad_norm": 1.077227693713212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225370 + }, + { + "epoch": 1.093052270139784, + "grad_norm": 1.0497282119104057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225380 + }, + { + "epoch": 1.09310076833262, + "grad_norm": 1.4371884162756032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225390 + }, + { + "epoch": 1.093149266525456, + "grad_norm": 1.0327526069886517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225400 + }, + { + "epoch": 1.0931977647182922, + "grad_norm": 9.923845709636225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225410 + }, + { + "epoch": 1.0932462629111284, + "grad_norm": 9.605811328583513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225420 + }, + { + "epoch": 1.0932947611039643, + "grad_norm": 9.8411510407459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225430 + }, + { + "epoch": 1.0933432592968004, + "grad_norm": 1.3165456493879901e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225440 + }, + { + "epoch": 1.0933917574896366, + "grad_norm": 8.683132364240009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225450 + }, + { + "epoch": 1.0934402556824727, + "grad_norm": 9.336511652691115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225460 + }, + { + "epoch": 1.0934887538753086, + "grad_norm": 8.847780463838717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225470 + }, + { + "epoch": 1.0935372520681448, + "grad_norm": 9.935847629094496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225480 + }, + { + "epoch": 1.093585750260981, + "grad_norm": 1.1924074669877882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225490 + }, + { + "epoch": 1.093634248453817, + "grad_norm": 8.376902655982121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225500 + }, + { + "epoch": 1.093682746646653, + "grad_norm": 8.482824682687351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225510 + }, + { + "epoch": 1.0937312448394891, + "grad_norm": 8.276552421193628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225520 + }, + { + "epoch": 1.0937797430323253, + "grad_norm": 1.0362663260821137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225530 + }, + { + "epoch": 1.0938282412251614, + "grad_norm": 1.0557959058132838e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225540 + }, + { + "epoch": 1.0938767394179973, + "grad_norm": 7.657329206267605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225550 + }, + { + "epoch": 1.0939252376108335, + "grad_norm": 7.880892098910408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225560 + }, + { + "epoch": 1.0939737358036696, + "grad_norm": 7.735503118055931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225570 + }, + { + "epoch": 1.0940222339965058, + "grad_norm": 7.411423439407372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225580 + }, + { + "epoch": 1.0940707321893417, + "grad_norm": 9.812223424887634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225590 + }, + { + "epoch": 1.0941192303821778, + "grad_norm": 7.223539455480932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225600 + }, + { + "epoch": 1.094167728575014, + "grad_norm": 6.872478479635902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225610 + }, + { + "epoch": 1.0942162267678501, + "grad_norm": 6.983437401686388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225620 + }, + { + "epoch": 1.094264724960686, + "grad_norm": 7.8134553405107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225630 + }, + { + "epoch": 1.0943132231535222, + "grad_norm": 9.709045798445004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225640 + }, + { + "epoch": 1.0943617213463583, + "grad_norm": 6.556508083122026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225650 + }, + { + "epoch": 1.0944102195391945, + "grad_norm": 6.378902526193997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225660 + }, + { + "epoch": 1.0944587177320306, + "grad_norm": 6.702890118504001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225670 + }, + { + "epoch": 1.0945072159248665, + "grad_norm": 6.751723731213133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225680 + }, + { + "epoch": 1.0945557141177027, + "grad_norm": 8.748415325499082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225690 + }, + { + "epoch": 1.0946042123105388, + "grad_norm": 6.286369398367242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225700 + }, + { + "epoch": 1.0946527105033748, + "grad_norm": 6.026998562447261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225710 + }, + { + "epoch": 1.094701208696211, + "grad_norm": 5.949348178546643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225720 + }, + { + "epoch": 1.094749706889047, + "grad_norm": 6.449340048675367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225730 + }, + { + "epoch": 1.0947982050818832, + "grad_norm": 8.219413416554744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225740 + }, + { + "epoch": 1.0948467032747193, + "grad_norm": 6.004155466143857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225750 + }, + { + "epoch": 1.0948952014675553, + "grad_norm": 5.795282049803063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225760 + }, + { + "epoch": 1.0949436996603914, + "grad_norm": 5.761568218076718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225770 + }, + { + "epoch": 1.0949921978532275, + "grad_norm": 5.789463557448471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225780 + }, + { + "epoch": 1.0950406960460635, + "grad_norm": 8.068846568676236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225790 + }, + { + "epoch": 1.0950891942388996, + "grad_norm": 5.679410151060438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225800 + }, + { + "epoch": 1.0951376924317358, + "grad_norm": 5.481671223606099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225810 + }, + { + "epoch": 1.095186190624572, + "grad_norm": 5.405343017628184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225820 + }, + { + "epoch": 1.095234688817408, + "grad_norm": 5.844223096573842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225830 + }, + { + "epoch": 1.095283187010244, + "grad_norm": 7.275699545061798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225840 + }, + { + "epoch": 1.09533168520308, + "grad_norm": 5.253730819276825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225850 + }, + { + "epoch": 1.0953801833959163, + "grad_norm": 5.314710165293945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225860 + }, + { + "epoch": 1.0954286815887524, + "grad_norm": 5.326594987309363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225870 + }, + { + "epoch": 1.0954771797815883, + "grad_norm": 5.174074431124609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225880 + }, + { + "epoch": 1.0955256779744245, + "grad_norm": 6.931686584721319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225890 + }, + { + "epoch": 1.0955741761672606, + "grad_norm": 5.294695597513055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225900 + }, + { + "epoch": 1.0956226743600967, + "grad_norm": 5.026207645641989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225910 + }, + { + "epoch": 1.0956711725529327, + "grad_norm": 5.091344519314589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225920 + }, + { + "epoch": 1.0957196707457688, + "grad_norm": 5.123656592331827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225930 + }, + { + "epoch": 1.095768168938605, + "grad_norm": 5.97466510043887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225940 + }, + { + "epoch": 1.095816667131441, + "grad_norm": 4.792924528373987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225950 + }, + { + "epoch": 1.095865165324277, + "grad_norm": 4.7017132942528406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225960 + }, + { + "epoch": 1.0959136635171132, + "grad_norm": 4.839528742195398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225970 + }, + { + "epoch": 1.0959621617099493, + "grad_norm": 5.540410938920104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225980 + }, + { + "epoch": 1.0960106599027855, + "grad_norm": 5.623763854600838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 225990 + }, + { + "epoch": 1.0960591580956214, + "grad_norm": 4.6803586428723065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226000 + }, + { + "epoch": 1.0961076562884575, + "grad_norm": 4.5843350449104037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226010 + }, + { + "epoch": 1.0961561544812937, + "grad_norm": 4.484431315177062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226020 + }, + { + "epoch": 1.0962046526741298, + "grad_norm": 4.6029740019548626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226030 + }, + { + "epoch": 1.0962531508669657, + "grad_norm": 5.638349307446333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226040 + }, + { + "epoch": 1.0963016490598019, + "grad_norm": 4.5023415395917255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226050 + }, + { + "epoch": 1.096350147252638, + "grad_norm": 4.5061696596349066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226060 + }, + { + "epoch": 1.0963986454454742, + "grad_norm": 4.4464570692070993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226070 + }, + { + "epoch": 1.09644714363831, + "grad_norm": 4.4846504465567705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226080 + }, + { + "epoch": 1.0964956418311462, + "grad_norm": 5.057296448285342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226090 + }, + { + "epoch": 1.0965441400239824, + "grad_norm": 4.4073217964069045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226100 + }, + { + "epoch": 1.0965926382168185, + "grad_norm": 4.249506275755266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226110 + }, + { + "epoch": 1.0966411364096544, + "grad_norm": 4.2731235794235545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226120 + }, + { + "epoch": 1.0966896346024906, + "grad_norm": 4.2178430703643244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226130 + }, + { + "epoch": 1.0967381327953267, + "grad_norm": 5.206005084801291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226140 + }, + { + "epoch": 1.0967866309881629, + "grad_norm": 4.137725397868053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226150 + }, + { + "epoch": 1.0968351291809988, + "grad_norm": 4.162811819696799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226160 + }, + { + "epoch": 1.096883627373835, + "grad_norm": 4.165811731127178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226170 + }, + { + "epoch": 1.096932125566671, + "grad_norm": 4.283039629626728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226180 + }, + { + "epoch": 1.0969806237595072, + "grad_norm": 4.746379431708192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226190 + }, + { + "epoch": 1.0970291219523434, + "grad_norm": 4.012067620351445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226200 + }, + { + "epoch": 1.0970776201451793, + "grad_norm": 4.258960188963101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226210 + }, + { + "epoch": 1.0971261183380154, + "grad_norm": 4.070901411523664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226220 + }, + { + "epoch": 1.0971746165308516, + "grad_norm": 3.790799212310958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226230 + }, + { + "epoch": 1.0972231147236875, + "grad_norm": 4.5223734446153685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226240 + }, + { + "epoch": 1.0972716129165236, + "grad_norm": 3.952261806716706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226250 + }, + { + "epoch": 1.0973201111093598, + "grad_norm": 3.8083507547526096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226260 + }, + { + "epoch": 1.097368609302196, + "grad_norm": 3.7657855500583537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226270 + }, + { + "epoch": 1.097417107495032, + "grad_norm": 3.8156045434334374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226280 + }, + { + "epoch": 1.097465605687868, + "grad_norm": 4.5138142468204023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226290 + }, + { + "epoch": 1.0975141038807041, + "grad_norm": 3.6791206525776943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226300 + }, + { + "epoch": 1.0975626020735403, + "grad_norm": 3.678384814520541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226310 + }, + { + "epoch": 1.0976111002663764, + "grad_norm": 3.7065677815917297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226320 + }, + { + "epoch": 1.0976595984592123, + "grad_norm": 3.606709810810571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226330 + }, + { + "epoch": 1.0977080966520485, + "grad_norm": 4.19067930579331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226340 + }, + { + "epoch": 1.0977565948448846, + "grad_norm": 3.567218982425402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226350 + }, + { + "epoch": 1.0978050930377208, + "grad_norm": 3.6439311656977225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226360 + }, + { + "epoch": 1.0978535912305567, + "grad_norm": 3.6741465692102793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226370 + }, + { + "epoch": 1.0979020894233928, + "grad_norm": 3.442516742779844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226380 + }, + { + "epoch": 1.097950587616229, + "grad_norm": 4.2429661561982357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226390 + }, + { + "epoch": 1.0979990858090651, + "grad_norm": 4.68699340672174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226400 + }, + { + "epoch": 1.098047584001901, + "grad_norm": 3.459593358456914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226410 + }, + { + "epoch": 1.0980960821947372, + "grad_norm": 3.410255828839581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226420 + }, + { + "epoch": 1.0981445803875733, + "grad_norm": 3.3018613976310007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226430 + }, + { + "epoch": 1.0981930785804095, + "grad_norm": 4.070195700478507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226440 + }, + { + "epoch": 1.0982415767732454, + "grad_norm": 3.407168094327062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226450 + }, + { + "epoch": 1.0982900749660816, + "grad_norm": 3.271029243023804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226460 + }, + { + "epoch": 1.0983385731589177, + "grad_norm": 3.37966440611126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226470 + }, + { + "epoch": 1.0983870713517538, + "grad_norm": 3.368752459209645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226480 + }, + { + "epoch": 1.0984355695445898, + "grad_norm": 4.0497872078049113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226490 + }, + { + "epoch": 1.098484067737426, + "grad_norm": 3.2394854088124703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226500 + }, + { + "epoch": 1.098532565930262, + "grad_norm": 3.3600093729546643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226510 + }, + { + "epoch": 1.0985810641230982, + "grad_norm": 3.309837381948455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226520 + }, + { + "epoch": 1.0986295623159341, + "grad_norm": 3.1410118594976666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226530 + }, + { + "epoch": 1.0986780605087703, + "grad_norm": 3.818057052740187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226540 + }, + { + "epoch": 1.0987265587016064, + "grad_norm": 3.137811290798709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226550 + }, + { + "epoch": 1.0987750568944425, + "grad_norm": 2.995629984070547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226560 + }, + { + "epoch": 1.0988235550872785, + "grad_norm": 3.0981865961621224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226570 + }, + { + "epoch": 1.0988720532801146, + "grad_norm": 3.132295205432456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226580 + }, + { + "epoch": 1.0989205514729508, + "grad_norm": 3.662274536964105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226590 + }, + { + "epoch": 1.098969049665787, + "grad_norm": 3.0204850531845295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226600 + }, + { + "epoch": 1.0990175478586228, + "grad_norm": 2.9554061597991677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226610 + }, + { + "epoch": 1.099066046051459, + "grad_norm": 2.7552013648346474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226620 + }, + { + "epoch": 1.099114544244295, + "grad_norm": 2.9565129011643876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226630 + }, + { + "epoch": 1.0991630424371313, + "grad_norm": 3.5816464105664636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226640 + }, + { + "epoch": 1.0992115406299672, + "grad_norm": 2.922813564509852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226650 + }, + { + "epoch": 1.0992600388228033, + "grad_norm": 2.8370405402711185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226660 + }, + { + "epoch": 1.0993085370156395, + "grad_norm": 6.307683361228555e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 226670 + }, + { + "epoch": 1.0993570352084756, + "grad_norm": 6.70210356474854e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 226680 + }, + { + "epoch": 1.0994055334013115, + "grad_norm": 0.029514243826270103, + "learning_rate": 0.0002, + "loss": 0.0032, + "step": 226690 + }, + { + "epoch": 1.0994540315941477, + "grad_norm": 0.0003725166607182473, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 226700 + }, + { + "epoch": 1.0995025297869838, + "grad_norm": 0.00013449341349769384, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 226710 + }, + { + "epoch": 1.09955102797982, + "grad_norm": 0.052828460931777954, + "learning_rate": 0.0002, + "loss": 0.0035, + "step": 226720 + }, + { + "epoch": 1.099599526172656, + "grad_norm": 0.0010816901922225952, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 226730 + }, + { + "epoch": 1.099648024365492, + "grad_norm": 0.15484163165092468, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 226740 + }, + { + "epoch": 1.0996965225583282, + "grad_norm": 0.0005932878120802343, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 226750 + }, + { + "epoch": 1.0997450207511643, + "grad_norm": 8.450189488939941e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 226760 + }, + { + "epoch": 1.0997935189440002, + "grad_norm": 0.0001517674681963399, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226770 + }, + { + "epoch": 1.0998420171368364, + "grad_norm": 0.00015082875324878842, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226780 + }, + { + "epoch": 1.0998905153296725, + "grad_norm": 9.717250213725492e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226790 + }, + { + "epoch": 1.0999390135225087, + "grad_norm": 7.393447594949976e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226800 + }, + { + "epoch": 1.0999875117153448, + "grad_norm": 5.237320510786958e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226810 + }, + { + "epoch": 1.1000360099081807, + "grad_norm": 4.223829091642983e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226820 + }, + { + "epoch": 1.1000845081010169, + "grad_norm": 3.46647939295508e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226830 + }, + { + "epoch": 1.100133006293853, + "grad_norm": 2.9684884793823585e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226840 + }, + { + "epoch": 1.1001815044866892, + "grad_norm": 2.6590822017169558e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226850 + }, + { + "epoch": 1.100230002679525, + "grad_norm": 2.3684402549406514e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226860 + }, + { + "epoch": 1.1002785008723612, + "grad_norm": 2.0749284885823727e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226870 + }, + { + "epoch": 1.1003269990651974, + "grad_norm": 2.0698513253591955e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226880 + }, + { + "epoch": 1.1003754972580335, + "grad_norm": 1.846610030042939e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226890 + }, + { + "epoch": 1.1004239954508694, + "grad_norm": 1.7566862879903056e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226900 + }, + { + "epoch": 1.1004724936437056, + "grad_norm": 1.648815486987587e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226910 + }, + { + "epoch": 1.1005209918365417, + "grad_norm": 1.5161252122197766e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226920 + }, + { + "epoch": 1.1005694900293779, + "grad_norm": 1.4061923138797283e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226930 + }, + { + "epoch": 1.1006179882222138, + "grad_norm": 1.335347224085126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226940 + }, + { + "epoch": 1.10066648641505, + "grad_norm": 1.316159341513412e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226950 + }, + { + "epoch": 1.100714984607886, + "grad_norm": 1.1931262633879669e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226960 + }, + { + "epoch": 1.1007634828007222, + "grad_norm": 1.1915529285033699e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226970 + }, + { + "epoch": 1.1008119809935581, + "grad_norm": 1.1228345101699233e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226980 + }, + { + "epoch": 1.1008604791863943, + "grad_norm": 1.0273208317812532e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 226990 + }, + { + "epoch": 1.1009089773792304, + "grad_norm": 1.064947537088301e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227000 + }, + { + "epoch": 1.1009574755720666, + "grad_norm": 9.593637514626607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227010 + }, + { + "epoch": 1.1010059737649025, + "grad_norm": 9.552034498483408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227020 + }, + { + "epoch": 1.1010544719577386, + "grad_norm": 9.066113307198975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227030 + }, + { + "epoch": 1.1011029701505748, + "grad_norm": 8.430032721662428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227040 + }, + { + "epoch": 1.101151468343411, + "grad_norm": 8.52745597512694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227050 + }, + { + "epoch": 1.1011999665362469, + "grad_norm": 8.155003342835698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227060 + }, + { + "epoch": 1.101248464729083, + "grad_norm": 8.172452908183914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227070 + }, + { + "epoch": 1.1012969629219191, + "grad_norm": 7.705379175604321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227080 + }, + { + "epoch": 1.1013454611147553, + "grad_norm": 7.106286375346826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227090 + }, + { + "epoch": 1.1013939593075912, + "grad_norm": 7.342361641349271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227100 + }, + { + "epoch": 1.1014424575004274, + "grad_norm": 7.1949175435293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227110 + }, + { + "epoch": 1.1014909556932635, + "grad_norm": 6.918789949850179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227120 + }, + { + "epoch": 1.1015394538860996, + "grad_norm": 6.7557325564848725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227130 + }, + { + "epoch": 1.1015879520789356, + "grad_norm": 6.328759809548501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227140 + }, + { + "epoch": 1.1016364502717717, + "grad_norm": 6.502276846731547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227150 + }, + { + "epoch": 1.1016849484646078, + "grad_norm": 6.383424988598563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227160 + }, + { + "epoch": 1.101733446657444, + "grad_norm": 6.34742718830239e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227170 + }, + { + "epoch": 1.10178194485028, + "grad_norm": 5.865963430551346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227180 + }, + { + "epoch": 1.101830443043116, + "grad_norm": 5.521187176782405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227190 + }, + { + "epoch": 1.1018789412359522, + "grad_norm": 5.689398221875308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227200 + }, + { + "epoch": 1.1019274394287883, + "grad_norm": 5.66273411095608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227210 + }, + { + "epoch": 1.1019759376216243, + "grad_norm": 5.39850816494436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227220 + }, + { + "epoch": 1.1020244358144604, + "grad_norm": 5.22988830198301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227230 + }, + { + "epoch": 1.1020729340072966, + "grad_norm": 5.109539870318258e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227240 + }, + { + "epoch": 1.1021214322001327, + "grad_norm": 5.308920663082972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227250 + }, + { + "epoch": 1.1021699303929688, + "grad_norm": 4.882331722910749e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227260 + }, + { + "epoch": 1.1022184285858048, + "grad_norm": 4.949947651766706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227270 + }, + { + "epoch": 1.102266926778641, + "grad_norm": 4.891719527222449e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227280 + }, + { + "epoch": 1.102315424971477, + "grad_norm": 4.615175384969916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227290 + }, + { + "epoch": 1.102363923164313, + "grad_norm": 4.82292807646445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227300 + }, + { + "epoch": 1.1024124213571491, + "grad_norm": 4.751369033328956e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227310 + }, + { + "epoch": 1.1024609195499853, + "grad_norm": 4.615397301677149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227320 + }, + { + "epoch": 1.1025094177428214, + "grad_norm": 4.565306426229654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227330 + }, + { + "epoch": 1.1025579159356576, + "grad_norm": 4.522693416220136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227340 + }, + { + "epoch": 1.1026064141284935, + "grad_norm": 4.188079401501454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227350 + }, + { + "epoch": 1.1026549123213296, + "grad_norm": 4.314556463214103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227360 + }, + { + "epoch": 1.1027034105141658, + "grad_norm": 4.209962753520813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227370 + }, + { + "epoch": 1.102751908707002, + "grad_norm": 4.17821729570278e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227380 + }, + { + "epoch": 1.1028004068998378, + "grad_norm": 3.9172755350591615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227390 + }, + { + "epoch": 1.102848905092674, + "grad_norm": 4.072748197359033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227400 + }, + { + "epoch": 1.1028974032855101, + "grad_norm": 3.962440132454503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227410 + }, + { + "epoch": 1.1029459014783463, + "grad_norm": 3.748544031623169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227420 + }, + { + "epoch": 1.1029943996711822, + "grad_norm": 3.6392318634170806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227430 + }, + { + "epoch": 1.1030428978640183, + "grad_norm": 3.6950132198398933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227440 + }, + { + "epoch": 1.1030913960568545, + "grad_norm": 3.853272573905997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227450 + }, + { + "epoch": 1.1031398942496906, + "grad_norm": 3.7325025914469734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227460 + }, + { + "epoch": 1.1031883924425265, + "grad_norm": 3.455320893408498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227470 + }, + { + "epoch": 1.1032368906353627, + "grad_norm": 3.365980774105992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227480 + }, + { + "epoch": 1.1032853888281988, + "grad_norm": 3.1564920845994493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227490 + }, + { + "epoch": 1.103333887021035, + "grad_norm": 3.332301957925665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227500 + }, + { + "epoch": 1.1033823852138709, + "grad_norm": 3.197325440851273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227510 + }, + { + "epoch": 1.103430883406707, + "grad_norm": 3.1240892894857097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227520 + }, + { + "epoch": 1.1034793815995432, + "grad_norm": 3.078753479712759e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227530 + }, + { + "epoch": 1.1035278797923793, + "grad_norm": 2.8751160243700724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227540 + }, + { + "epoch": 1.1035763779852152, + "grad_norm": 3.1171582577371737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227550 + }, + { + "epoch": 1.1036248761780514, + "grad_norm": 2.9955933769088006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227560 + }, + { + "epoch": 1.1036733743708875, + "grad_norm": 2.9764557893940946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227570 + }, + { + "epoch": 1.1037218725637237, + "grad_norm": 2.8738027140207123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227580 + }, + { + "epoch": 1.1037703707565596, + "grad_norm": 2.6056193291879026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227590 + }, + { + "epoch": 1.1038188689493957, + "grad_norm": 2.7401156330597587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227600 + }, + { + "epoch": 1.1038673671422319, + "grad_norm": 2.8247418413229752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227610 + }, + { + "epoch": 1.103915865335068, + "grad_norm": 2.7938681341765914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227620 + }, + { + "epoch": 1.103964363527904, + "grad_norm": 2.613499646031414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227630 + }, + { + "epoch": 1.10401286172074, + "grad_norm": 2.4995524654514156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227640 + }, + { + "epoch": 1.1040613599135762, + "grad_norm": 2.5559777441230835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227650 + }, + { + "epoch": 1.1041098581064124, + "grad_norm": 2.450781039442518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227660 + }, + { + "epoch": 1.1041583562992483, + "grad_norm": 2.4930795916588977e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227670 + }, + { + "epoch": 1.1042068544920844, + "grad_norm": 2.3823038191039814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227680 + }, + { + "epoch": 1.1042553526849206, + "grad_norm": 2.247714064651518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227690 + }, + { + "epoch": 1.1043038508777567, + "grad_norm": 2.5222450403816765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227700 + }, + { + "epoch": 1.1043523490705927, + "grad_norm": 2.374771383983898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227710 + }, + { + "epoch": 1.1044008472634288, + "grad_norm": 2.3746881652186858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227720 + }, + { + "epoch": 1.104449345456265, + "grad_norm": 2.251106479889131e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227730 + }, + { + "epoch": 1.104497843649101, + "grad_norm": 2.0741113075928297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227740 + }, + { + "epoch": 1.104546341841937, + "grad_norm": 2.307994463990326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227750 + }, + { + "epoch": 1.1045948400347732, + "grad_norm": 2.193160526076099e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227760 + }, + { + "epoch": 1.1046433382276093, + "grad_norm": 2.15244767787226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227770 + }, + { + "epoch": 1.1046918364204454, + "grad_norm": 2.0943123217875836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227780 + }, + { + "epoch": 1.1047403346132816, + "grad_norm": 1.9398307813389692e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227790 + }, + { + "epoch": 1.1047888328061175, + "grad_norm": 2.128856294802972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227800 + }, + { + "epoch": 1.1048373309989536, + "grad_norm": 1.98805219042697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227810 + }, + { + "epoch": 1.1048858291917898, + "grad_norm": 2.0272768779250327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227820 + }, + { + "epoch": 1.1049343273846257, + "grad_norm": 2.050580633294885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227830 + }, + { + "epoch": 1.1049828255774619, + "grad_norm": 1.8857726900023408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227840 + }, + { + "epoch": 1.105031323770298, + "grad_norm": 1.924164507727255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227850 + }, + { + "epoch": 1.1050798219631341, + "grad_norm": 1.8137936876883032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227860 + }, + { + "epoch": 1.1051283201559703, + "grad_norm": 1.867756850515434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227870 + }, + { + "epoch": 1.1051768183488062, + "grad_norm": 1.7560854530529468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227880 + }, + { + "epoch": 1.1052253165416424, + "grad_norm": 1.688762040430447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227890 + }, + { + "epoch": 1.1052738147344785, + "grad_norm": 1.8708288962443476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227900 + }, + { + "epoch": 1.1053223129273146, + "grad_norm": 1.6764946622060961e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227910 + }, + { + "epoch": 1.1053708111201506, + "grad_norm": 1.861824330262607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227920 + }, + { + "epoch": 1.1054193093129867, + "grad_norm": 1.694041998234752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227930 + }, + { + "epoch": 1.1054678075058229, + "grad_norm": 1.730851067804906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227940 + }, + { + "epoch": 1.105516305698659, + "grad_norm": 1.7281997770624002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227950 + }, + { + "epoch": 1.105564803891495, + "grad_norm": 1.6909565374589874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227960 + }, + { + "epoch": 1.105613302084331, + "grad_norm": 1.6573476386838593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227970 + }, + { + "epoch": 1.1056618002771672, + "grad_norm": 1.603938017069595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227980 + }, + { + "epoch": 1.1057102984700033, + "grad_norm": 1.5655632523703389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 227990 + }, + { + "epoch": 1.1057587966628393, + "grad_norm": 1.5660506278436515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228000 + }, + { + "epoch": 1.1058072948556754, + "grad_norm": 1.5194182196864858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228010 + }, + { + "epoch": 1.1058557930485116, + "grad_norm": 1.4923233493391308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228020 + }, + { + "epoch": 1.1059042912413477, + "grad_norm": 1.5209441244223854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228030 + }, + { + "epoch": 1.1059527894341836, + "grad_norm": 1.5928707171042333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228040 + }, + { + "epoch": 1.1060012876270198, + "grad_norm": 1.5169705420703394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228050 + }, + { + "epoch": 1.106049785819856, + "grad_norm": 1.5025461834738962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228060 + }, + { + "epoch": 1.106098284012692, + "grad_norm": 1.4223628568288404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228070 + }, + { + "epoch": 1.106146782205528, + "grad_norm": 1.732022724354465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228080 + }, + { + "epoch": 1.1061952803983641, + "grad_norm": 1.4320846730697667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228090 + }, + { + "epoch": 1.1062437785912003, + "grad_norm": 1.4377715160662774e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228100 + }, + { + "epoch": 1.1062922767840364, + "grad_norm": 1.4006322999193799e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228110 + }, + { + "epoch": 1.1063407749768723, + "grad_norm": 1.456888867323869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228120 + }, + { + "epoch": 1.1063892731697085, + "grad_norm": 1.328922962784418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228130 + }, + { + "epoch": 1.1064377713625446, + "grad_norm": 1.3745050182478735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228140 + }, + { + "epoch": 1.1064862695553808, + "grad_norm": 1.3271960597194266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228150 + }, + { + "epoch": 1.1065347677482167, + "grad_norm": 1.3347765843718662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228160 + }, + { + "epoch": 1.1065832659410528, + "grad_norm": 1.2881920383733814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228170 + }, + { + "epoch": 1.106631764133889, + "grad_norm": 1.3386073760557338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228180 + }, + { + "epoch": 1.1066802623267251, + "grad_norm": 1.3470233852785896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228190 + }, + { + "epoch": 1.106728760519561, + "grad_norm": 1.3163863741283421e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228200 + }, + { + "epoch": 1.1067772587123972, + "grad_norm": 1.3417229638434947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228210 + }, + { + "epoch": 1.1068257569052333, + "grad_norm": 1.2691353958871332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228220 + }, + { + "epoch": 1.1068742550980695, + "grad_norm": 1.2795958355127368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228230 + }, + { + "epoch": 1.1069227532909056, + "grad_norm": 1.2408980865075137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228240 + }, + { + "epoch": 1.1069712514837415, + "grad_norm": 1.339048367299256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228250 + }, + { + "epoch": 1.1070197496765777, + "grad_norm": 1.2098394108761568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228260 + }, + { + "epoch": 1.1070682478694138, + "grad_norm": 1.232064278156031e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228270 + }, + { + "epoch": 1.1071167460622497, + "grad_norm": 1.212145889439853e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228280 + }, + { + "epoch": 1.107165244255086, + "grad_norm": 1.152374352386687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228290 + }, + { + "epoch": 1.107213742447922, + "grad_norm": 2.836053681676276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228300 + }, + { + "epoch": 1.1072622406407582, + "grad_norm": 1.1285671916994033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228310 + }, + { + "epoch": 1.1073107388335943, + "grad_norm": 1.1241922948102001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228320 + }, + { + "epoch": 1.1073592370264302, + "grad_norm": 1.1635464716164279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228330 + }, + { + "epoch": 1.1074077352192664, + "grad_norm": 1.1312139349684003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228340 + }, + { + "epoch": 1.1074562334121025, + "grad_norm": 1.140394033427583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228350 + }, + { + "epoch": 1.1075047316049387, + "grad_norm": 1.207568743666343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228360 + }, + { + "epoch": 1.1075532297977746, + "grad_norm": 1.0568627430984634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228370 + }, + { + "epoch": 1.1076017279906107, + "grad_norm": 1.087922782971873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228380 + }, + { + "epoch": 1.1076502261834469, + "grad_norm": 1.1083325262006838e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228390 + }, + { + "epoch": 1.107698724376283, + "grad_norm": 1.1157451353938086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228400 + }, + { + "epoch": 1.107747222569119, + "grad_norm": 1.0960865211018245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228410 + }, + { + "epoch": 1.107795720761955, + "grad_norm": 1.0473107749930932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228420 + }, + { + "epoch": 1.1078442189547912, + "grad_norm": 1.069102950168599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228430 + }, + { + "epoch": 1.1078927171476274, + "grad_norm": 1.0363561386839137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228440 + }, + { + "epoch": 1.1079412153404633, + "grad_norm": 1.0160816827919916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228450 + }, + { + "epoch": 1.1079897135332994, + "grad_norm": 1.091572016775899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228460 + }, + { + "epoch": 1.1080382117261356, + "grad_norm": 1.0710867854868411e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228470 + }, + { + "epoch": 1.1080867099189717, + "grad_norm": 9.985847100324463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228480 + }, + { + "epoch": 1.1081352081118077, + "grad_norm": 1.0314156497770455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228490 + }, + { + "epoch": 1.1081837063046438, + "grad_norm": 9.980267350329086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228500 + }, + { + "epoch": 1.10823220449748, + "grad_norm": 1.1017596079909708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228510 + }, + { + "epoch": 1.108280702690316, + "grad_norm": 9.430403906662832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228520 + }, + { + "epoch": 1.108329200883152, + "grad_norm": 9.497711630501726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228530 + }, + { + "epoch": 1.1083776990759882, + "grad_norm": 1.018557554743893e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228540 + }, + { + "epoch": 1.1084261972688243, + "grad_norm": 2.5025467493833276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228550 + }, + { + "epoch": 1.1084746954616604, + "grad_norm": 9.949113746188232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228560 + }, + { + "epoch": 1.1085231936544964, + "grad_norm": 9.9110081919207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228570 + }, + { + "epoch": 1.1085716918473325, + "grad_norm": 9.779440688362229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228580 + }, + { + "epoch": 1.1086201900401687, + "grad_norm": 9.566817880113376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228590 + }, + { + "epoch": 1.1086686882330048, + "grad_norm": 9.178515938401688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228600 + }, + { + "epoch": 1.1087171864258407, + "grad_norm": 9.77358013187768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228610 + }, + { + "epoch": 1.1087656846186769, + "grad_norm": 9.891476793200127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228620 + }, + { + "epoch": 1.108814182811513, + "grad_norm": 9.421376603313547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228630 + }, + { + "epoch": 1.1088626810043491, + "grad_norm": 9.641717042541131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228640 + }, + { + "epoch": 1.108911179197185, + "grad_norm": 9.310319342148432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228650 + }, + { + "epoch": 1.1089596773900212, + "grad_norm": 8.968952442955924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228660 + }, + { + "epoch": 1.1090081755828574, + "grad_norm": 9.788552688405616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228670 + }, + { + "epoch": 1.1090566737756935, + "grad_norm": 8.608653843111824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228680 + }, + { + "epoch": 1.1091051719685294, + "grad_norm": 9.311731332672935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228690 + }, + { + "epoch": 1.1091536701613656, + "grad_norm": 9.53321887209313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228700 + }, + { + "epoch": 1.1092021683542017, + "grad_norm": 8.944817864176002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228710 + }, + { + "epoch": 1.1092506665470379, + "grad_norm": 8.35711716717924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228720 + }, + { + "epoch": 1.1092991647398738, + "grad_norm": 8.677343998897413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228730 + }, + { + "epoch": 1.10934766293271, + "grad_norm": 8.237298629865109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228740 + }, + { + "epoch": 1.109396161125546, + "grad_norm": 8.484931299790333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228750 + }, + { + "epoch": 1.1094446593183822, + "grad_norm": 8.406381084569148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228760 + }, + { + "epoch": 1.1094931575112184, + "grad_norm": 8.784799092609319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228770 + }, + { + "epoch": 1.1095416557040543, + "grad_norm": 8.375507718483277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228780 + }, + { + "epoch": 1.1095901538968904, + "grad_norm": 8.306168979288486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228790 + }, + { + "epoch": 1.1096386520897266, + "grad_norm": 8.410960390392574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228800 + }, + { + "epoch": 1.1096871502825625, + "grad_norm": 8.192844802579202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228810 + }, + { + "epoch": 1.1097356484753986, + "grad_norm": 8.915644684748258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228820 + }, + { + "epoch": 1.1097841466682348, + "grad_norm": 7.788852371959365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228830 + }, + { + "epoch": 1.109832644861071, + "grad_norm": 7.906971859483747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228840 + }, + { + "epoch": 1.109881143053907, + "grad_norm": 7.64664378039015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228850 + }, + { + "epoch": 1.109929641246743, + "grad_norm": 8.428770570390043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228860 + }, + { + "epoch": 1.1099781394395791, + "grad_norm": 8.263992299362144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228870 + }, + { + "epoch": 1.1100266376324153, + "grad_norm": 7.99304530119116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228880 + }, + { + "epoch": 1.1100751358252514, + "grad_norm": 7.481079364879406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228890 + }, + { + "epoch": 1.1101236340180873, + "grad_norm": 8.007058909242915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228900 + }, + { + "epoch": 1.1101721322109235, + "grad_norm": 2.0894563022011425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228910 + }, + { + "epoch": 1.1102206304037596, + "grad_norm": 7.448118140018778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228920 + }, + { + "epoch": 1.1102691285965958, + "grad_norm": 7.599481364195526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228930 + }, + { + "epoch": 1.1103176267894317, + "grad_norm": 7.601360607623064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228940 + }, + { + "epoch": 1.1103661249822678, + "grad_norm": 7.598003435305145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228950 + }, + { + "epoch": 1.110414623175104, + "grad_norm": 7.540079423051793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228960 + }, + { + "epoch": 1.1104631213679401, + "grad_norm": 7.592985298288113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228970 + }, + { + "epoch": 1.110511619560776, + "grad_norm": 6.654786375293043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228980 + }, + { + "epoch": 1.1105601177536122, + "grad_norm": 7.343803076764743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 228990 + }, + { + "epoch": 1.1106086159464483, + "grad_norm": 7.082755359988369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229000 + }, + { + "epoch": 1.1106571141392845, + "grad_norm": 6.557623919434263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229010 + }, + { + "epoch": 1.1107056123321204, + "grad_norm": 6.714013238706684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229020 + }, + { + "epoch": 1.1107541105249565, + "grad_norm": 6.625062951570726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229030 + }, + { + "epoch": 1.1108026087177927, + "grad_norm": 6.921684416738572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229040 + }, + { + "epoch": 1.1108511069106288, + "grad_norm": 6.578549687219493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229050 + }, + { + "epoch": 1.1108996051034647, + "grad_norm": 6.613158802792896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229060 + }, + { + "epoch": 1.110948103296301, + "grad_norm": 6.414690574274573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229070 + }, + { + "epoch": 1.110996601489137, + "grad_norm": 6.167128390188736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229080 + }, + { + "epoch": 1.1110450996819732, + "grad_norm": 6.792931230847898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229090 + }, + { + "epoch": 1.111093597874809, + "grad_norm": 6.494577746707364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229100 + }, + { + "epoch": 1.1111420960676452, + "grad_norm": 6.38708513633901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229110 + }, + { + "epoch": 1.1111905942604814, + "grad_norm": 5.782549123978242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229120 + }, + { + "epoch": 1.1112390924533175, + "grad_norm": 6.512635764011065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229130 + }, + { + "epoch": 1.1112875906461535, + "grad_norm": 6.024124559189659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229140 + }, + { + "epoch": 1.1113360888389896, + "grad_norm": 6.081053243178758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229150 + }, + { + "epoch": 1.1113845870318257, + "grad_norm": 6.403575412150531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229160 + }, + { + "epoch": 1.1114330852246619, + "grad_norm": 5.956346171842597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229170 + }, + { + "epoch": 1.1114815834174978, + "grad_norm": 6.120916964391654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229180 + }, + { + "epoch": 1.111530081610334, + "grad_norm": 5.998429060127819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229190 + }, + { + "epoch": 1.11157857980317, + "grad_norm": 6.140475647953281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229200 + }, + { + "epoch": 1.1116270779960062, + "grad_norm": 5.525612323253881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229210 + }, + { + "epoch": 1.1116755761888422, + "grad_norm": 6.407751129700046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229220 + }, + { + "epoch": 1.1117240743816783, + "grad_norm": 5.564338607655372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229230 + }, + { + "epoch": 1.1117725725745145, + "grad_norm": 6.14906412010896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229240 + }, + { + "epoch": 1.1118210707673506, + "grad_norm": 5.683144763679593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229250 + }, + { + "epoch": 1.1118695689601865, + "grad_norm": 6.524861078105459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229260 + }, + { + "epoch": 1.1119180671530227, + "grad_norm": 5.386651764638373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229270 + }, + { + "epoch": 1.1119665653458588, + "grad_norm": 6.049078820069553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229280 + }, + { + "epoch": 1.112015063538695, + "grad_norm": 5.344359124137554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229290 + }, + { + "epoch": 1.112063561731531, + "grad_norm": 5.303073749018949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229300 + }, + { + "epoch": 1.112112059924367, + "grad_norm": 5.182438940209977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229310 + }, + { + "epoch": 1.1121605581172032, + "grad_norm": 4.998708504899696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229320 + }, + { + "epoch": 1.1122090563100393, + "grad_norm": 4.854850885749329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229330 + }, + { + "epoch": 1.1122575545028752, + "grad_norm": 5.466467314363399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229340 + }, + { + "epoch": 1.1123060526957114, + "grad_norm": 5.030790930504736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229350 + }, + { + "epoch": 1.1123545508885475, + "grad_norm": 5.710004415959702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229360 + }, + { + "epoch": 1.1124030490813837, + "grad_norm": 5.05757896007708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229370 + }, + { + "epoch": 1.1124515472742198, + "grad_norm": 5.258762598714384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229380 + }, + { + "epoch": 1.1125000454670557, + "grad_norm": 5.101036890664545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229390 + }, + { + "epoch": 1.1125485436598919, + "grad_norm": 5.108630034555972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229400 + }, + { + "epoch": 1.112597041852728, + "grad_norm": 4.750734490244213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229410 + }, + { + "epoch": 1.1126455400455642, + "grad_norm": 4.816492946702056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229420 + }, + { + "epoch": 1.1126940382384, + "grad_norm": 5.023010771765257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229430 + }, + { + "epoch": 1.1127425364312362, + "grad_norm": 5.150783408680581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229440 + }, + { + "epoch": 1.1127910346240724, + "grad_norm": 4.7006690806483675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229450 + }, + { + "epoch": 1.1128395328169085, + "grad_norm": 8.50528431328712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229460 + }, + { + "epoch": 1.1128880310097444, + "grad_norm": 4.773033879246213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229470 + }, + { + "epoch": 1.1129365292025806, + "grad_norm": 4.4400204046723957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229480 + }, + { + "epoch": 1.1129850273954167, + "grad_norm": 4.61759924519356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229490 + }, + { + "epoch": 1.1130335255882529, + "grad_norm": 4.398523856252723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229500 + }, + { + "epoch": 1.1130820237810888, + "grad_norm": 4.749964261918649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229510 + }, + { + "epoch": 1.113130521973925, + "grad_norm": 4.4428171008803474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229520 + }, + { + "epoch": 1.113179020166761, + "grad_norm": 4.540718521184317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229530 + }, + { + "epoch": 1.1132275183595972, + "grad_norm": 4.1992987576122687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229540 + }, + { + "epoch": 1.1132760165524331, + "grad_norm": 4.5140464521864487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229550 + }, + { + "epoch": 1.1133245147452693, + "grad_norm": 4.4206885263520235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229560 + }, + { + "epoch": 1.1133730129381054, + "grad_norm": 0.006481776479631662, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 229570 + }, + { + "epoch": 1.1134215111309416, + "grad_norm": 0.027538303285837173, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229580 + }, + { + "epoch": 1.1134700093237775, + "grad_norm": 9.701584531285334e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 229590 + }, + { + "epoch": 1.1135185075166136, + "grad_norm": 7.53082858864218e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229600 + }, + { + "epoch": 1.1135670057094498, + "grad_norm": 3.210749491699971e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229610 + }, + { + "epoch": 1.113615503902286, + "grad_norm": 3.1204166589304805e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229620 + }, + { + "epoch": 1.1136640020951218, + "grad_norm": 6.9626830736524425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229630 + }, + { + "epoch": 1.113712500287958, + "grad_norm": 6.409111392713385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229640 + }, + { + "epoch": 1.1137609984807941, + "grad_norm": 7.143550192267867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229650 + }, + { + "epoch": 1.1138094966736303, + "grad_norm": 1.2554173736134544e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229660 + }, + { + "epoch": 1.1138579948664662, + "grad_norm": 2.4427367861790117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229670 + }, + { + "epoch": 1.1139064930593023, + "grad_norm": 3.2153973279491765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229680 + }, + { + "epoch": 1.1139549912521385, + "grad_norm": 8.182948477042373e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229690 + }, + { + "epoch": 1.1140034894449746, + "grad_norm": 6.943646894796984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229700 + }, + { + "epoch": 1.1140519876378105, + "grad_norm": 2.8777722036466002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229710 + }, + { + "epoch": 1.1141004858306467, + "grad_norm": 2.181725449190708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229720 + }, + { + "epoch": 1.1141489840234828, + "grad_norm": 3.0163987503328826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229730 + }, + { + "epoch": 1.114197482216319, + "grad_norm": 3.0674239042127738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229740 + }, + { + "epoch": 1.114245980409155, + "grad_norm": 0.00012780244287569076, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229750 + }, + { + "epoch": 1.114294478601991, + "grad_norm": 1.7716403135636938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229760 + }, + { + "epoch": 1.1143429767948272, + "grad_norm": 1.804745238587202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229770 + }, + { + "epoch": 1.1143914749876633, + "grad_norm": 3.387681772437645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229780 + }, + { + "epoch": 1.1144399731804993, + "grad_norm": 2.0474089978961274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229790 + }, + { + "epoch": 1.1144884713733354, + "grad_norm": 2.2992317099124193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229800 + }, + { + "epoch": 1.1145369695661715, + "grad_norm": 1.7285423155044555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229810 + }, + { + "epoch": 1.1145854677590077, + "grad_norm": 2.1206669771345332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229820 + }, + { + "epoch": 1.1146339659518438, + "grad_norm": 2.477367843312095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229830 + }, + { + "epoch": 1.1146824641446798, + "grad_norm": 1.6524606962775579e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229840 + }, + { + "epoch": 1.114730962337516, + "grad_norm": 1.0391432851974969e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229850 + }, + { + "epoch": 1.114779460530352, + "grad_norm": 1.1912369473066065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229860 + }, + { + "epoch": 1.114827958723188, + "grad_norm": 1.5036429203973967e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229870 + }, + { + "epoch": 1.114876456916024, + "grad_norm": 4.344699391367612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229880 + }, + { + "epoch": 1.1149249551088602, + "grad_norm": 1.3733052810493973e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229890 + }, + { + "epoch": 1.1149734533016964, + "grad_norm": 1.0767759022201062e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229900 + }, + { + "epoch": 1.1150219514945325, + "grad_norm": 1.1477912948976154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229910 + }, + { + "epoch": 1.1150704496873685, + "grad_norm": 5.952212632109877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229920 + }, + { + "epoch": 1.1151189478802046, + "grad_norm": 1.245398266291886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229930 + }, + { + "epoch": 1.1151674460730407, + "grad_norm": 9.685199984232895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229940 + }, + { + "epoch": 1.115215944265877, + "grad_norm": 1.255377924280765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229950 + }, + { + "epoch": 1.1152644424587128, + "grad_norm": 1.6813563661344233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229960 + }, + { + "epoch": 1.115312940651549, + "grad_norm": 9.096856956603006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229970 + }, + { + "epoch": 1.115361438844385, + "grad_norm": 8.673280262883054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229980 + }, + { + "epoch": 1.1154099370372212, + "grad_norm": 8.255285024461045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 229990 + }, + { + "epoch": 1.1154584352300572, + "grad_norm": 1.320010369454394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230000 + }, + { + "epoch": 1.1155069334228933, + "grad_norm": 8.59686963394779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230010 + }, + { + "epoch": 1.1155554316157295, + "grad_norm": 7.794002385708154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230020 + }, + { + "epoch": 1.1156039298085656, + "grad_norm": 1.1419913334975718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230030 + }, + { + "epoch": 1.1156524280014015, + "grad_norm": 8.104200333036715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230040 + }, + { + "epoch": 1.1157009261942377, + "grad_norm": 7.563050417047634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230050 + }, + { + "epoch": 1.1157494243870738, + "grad_norm": 7.97454561052291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230060 + }, + { + "epoch": 1.11579792257991, + "grad_norm": 8.479912025904923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230070 + }, + { + "epoch": 1.1158464207727459, + "grad_norm": 8.893513268048991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230080 + }, + { + "epoch": 1.115894918965582, + "grad_norm": 9.086492127607926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230090 + }, + { + "epoch": 1.1159434171584182, + "grad_norm": 1.0427467032059212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230100 + }, + { + "epoch": 1.1159919153512543, + "grad_norm": 8.883277587301563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230110 + }, + { + "epoch": 1.1160404135440902, + "grad_norm": 7.080102477630135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230120 + }, + { + "epoch": 1.1160889117369264, + "grad_norm": 8.709014309715712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230130 + }, + { + "epoch": 1.1161374099297625, + "grad_norm": 8.388918786295108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230140 + }, + { + "epoch": 1.1161859081225987, + "grad_norm": 6.288653366937069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230150 + }, + { + "epoch": 1.1162344063154346, + "grad_norm": 5.960517341918603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230160 + }, + { + "epoch": 1.1162829045082707, + "grad_norm": 6.3653550341769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230170 + }, + { + "epoch": 1.1163314027011069, + "grad_norm": 7.499601224481012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230180 + }, + { + "epoch": 1.116379900893943, + "grad_norm": 6.804582426411798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230190 + }, + { + "epoch": 1.116428399086779, + "grad_norm": 1.661376813899551e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230200 + }, + { + "epoch": 1.116476897279615, + "grad_norm": 6.108285219852405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230210 + }, + { + "epoch": 1.1165253954724512, + "grad_norm": 5.880544335923332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230220 + }, + { + "epoch": 1.1165738936652874, + "grad_norm": 5.880103230992972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230230 + }, + { + "epoch": 1.1166223918581233, + "grad_norm": 1.2394086752465228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230240 + }, + { + "epoch": 1.1166708900509594, + "grad_norm": 1.4186341559252469e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230250 + }, + { + "epoch": 1.1167193882437956, + "grad_norm": 1.3956333759779227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230260 + }, + { + "epoch": 1.1167678864366317, + "grad_norm": 5.582844551099697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230270 + }, + { + "epoch": 1.1168163846294679, + "grad_norm": 5.395795596996322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230280 + }, + { + "epoch": 1.1168648828223038, + "grad_norm": 1.4348214563142392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230290 + }, + { + "epoch": 1.11691338101514, + "grad_norm": 4.499421493164846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230300 + }, + { + "epoch": 1.116961879207976, + "grad_norm": 4.702047533555742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230310 + }, + { + "epoch": 1.117010377400812, + "grad_norm": 5.214826614974299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230320 + }, + { + "epoch": 1.1170588755936481, + "grad_norm": 2.035397073996137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230330 + }, + { + "epoch": 1.1171073737864843, + "grad_norm": 5.234368813944457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230340 + }, + { + "epoch": 1.1171558719793204, + "grad_norm": 5.851119340150035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230350 + }, + { + "epoch": 1.1172043701721566, + "grad_norm": 4.789009722117044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230360 + }, + { + "epoch": 1.1172528683649925, + "grad_norm": 4.768678536493098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230370 + }, + { + "epoch": 1.1173013665578286, + "grad_norm": 4.3208629563196155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230380 + }, + { + "epoch": 1.1173498647506648, + "grad_norm": 5.281428911985131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230390 + }, + { + "epoch": 1.1173983629435007, + "grad_norm": 6.138028538771323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230400 + }, + { + "epoch": 1.1174468611363368, + "grad_norm": 4.6338141146406997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230410 + }, + { + "epoch": 1.117495359329173, + "grad_norm": 4.656410510506248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230420 + }, + { + "epoch": 1.1175438575220091, + "grad_norm": 4.780626454703452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230430 + }, + { + "epoch": 1.1175923557148453, + "grad_norm": 4.5294862616174214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230440 + }, + { + "epoch": 1.1176408539076812, + "grad_norm": 4.229870853578177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230450 + }, + { + "epoch": 1.1176893521005173, + "grad_norm": 8.462832283839816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230460 + }, + { + "epoch": 1.1177378502933535, + "grad_norm": 4.346823914147535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230470 + }, + { + "epoch": 1.1177863484861896, + "grad_norm": 4.6415604515459563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230480 + }, + { + "epoch": 1.1178348466790256, + "grad_norm": 3.8138892932693125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230490 + }, + { + "epoch": 1.1178833448718617, + "grad_norm": 4.9193255335922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230500 + }, + { + "epoch": 1.1179318430646978, + "grad_norm": 5.154842597221432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230510 + }, + { + "epoch": 1.117980341257534, + "grad_norm": 3.366636747159646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230520 + }, + { + "epoch": 1.11802883945037, + "grad_norm": 4.0937550238595577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230530 + }, + { + "epoch": 1.118077337643206, + "grad_norm": 4.6303784984047525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230540 + }, + { + "epoch": 1.1181258358360422, + "grad_norm": 5.07407378336211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230550 + }, + { + "epoch": 1.1181743340288783, + "grad_norm": 4.055581541706488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230560 + }, + { + "epoch": 1.1182228322217143, + "grad_norm": 3.5966348832516815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230570 + }, + { + "epoch": 1.1182713304145504, + "grad_norm": 4.046769390697591e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230580 + }, + { + "epoch": 1.1183198286073865, + "grad_norm": 4.4519455855152046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230590 + }, + { + "epoch": 1.1183683268002227, + "grad_norm": 3.5169048828720406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230600 + }, + { + "epoch": 1.1184168249930586, + "grad_norm": 3.6002001024826313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230610 + }, + { + "epoch": 1.1184653231858948, + "grad_norm": 5.998635401738284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230620 + }, + { + "epoch": 1.118513821378731, + "grad_norm": 3.9744395508023445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230630 + }, + { + "epoch": 1.118562319571567, + "grad_norm": 4.1286443774879444e-07, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 230640 + }, + { + "epoch": 1.118610817764403, + "grad_norm": 1.3017217952437932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230650 + }, + { + "epoch": 1.118659315957239, + "grad_norm": 3.5366949759918498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230660 + }, + { + "epoch": 1.1187078141500753, + "grad_norm": 2.949767122117919e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230670 + }, + { + "epoch": 1.1187563123429114, + "grad_norm": 6.874501650599996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230680 + }, + { + "epoch": 1.1188048105357473, + "grad_norm": 4.388564411783591e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230690 + }, + { + "epoch": 1.1188533087285835, + "grad_norm": 1.6812513194963685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230700 + }, + { + "epoch": 1.1189018069214196, + "grad_norm": 1.3718546370000695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230710 + }, + { + "epoch": 1.1189503051142557, + "grad_norm": 4.06128629037994e-06, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 230720 + }, + { + "epoch": 1.1189988033070917, + "grad_norm": 0.0004396587028168142, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230730 + }, + { + "epoch": 1.1190473014999278, + "grad_norm": 6.647716509178281e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230740 + }, + { + "epoch": 1.119095799692764, + "grad_norm": 0.00013619971286971122, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 230750 + }, + { + "epoch": 1.1191442978856, + "grad_norm": 2.6215826437692158e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230760 + }, + { + "epoch": 1.119192796078436, + "grad_norm": 1.605574470886495e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230770 + }, + { + "epoch": 1.1192412942712722, + "grad_norm": 2.8057793315383606e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230780 + }, + { + "epoch": 1.1192897924641083, + "grad_norm": 1.6219615645240992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230790 + }, + { + "epoch": 1.1193382906569445, + "grad_norm": 1.1996650755463634e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230800 + }, + { + "epoch": 1.1193867888497806, + "grad_norm": 1.0307426236977335e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230810 + }, + { + "epoch": 1.1194352870426165, + "grad_norm": 9.823649634199683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230820 + }, + { + "epoch": 1.1194837852354527, + "grad_norm": 8.498417628288735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230830 + }, + { + "epoch": 1.1195322834282888, + "grad_norm": 8.173081369022839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230840 + }, + { + "epoch": 1.1195807816211247, + "grad_norm": 7.293721409951104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230850 + }, + { + "epoch": 1.1196292798139609, + "grad_norm": 6.746251528966241e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230860 + }, + { + "epoch": 1.119677778006797, + "grad_norm": 5.771375981566962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230870 + }, + { + "epoch": 1.1197262761996332, + "grad_norm": 5.711764970328659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230880 + }, + { + "epoch": 1.1197747743924693, + "grad_norm": 5.810931270389119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230890 + }, + { + "epoch": 1.1198232725853052, + "grad_norm": 5.408406195783755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230900 + }, + { + "epoch": 1.1198717707781414, + "grad_norm": 4.360020284366328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230910 + }, + { + "epoch": 1.1199202689709775, + "grad_norm": 4.397381871967809e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230920 + }, + { + "epoch": 1.1199687671638137, + "grad_norm": 4.4506195990834385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230930 + }, + { + "epoch": 1.1200172653566496, + "grad_norm": 4.531656031758757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230940 + }, + { + "epoch": 1.1200657635494857, + "grad_norm": 4.118537162867142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230950 + }, + { + "epoch": 1.1201142617423219, + "grad_norm": 4.165818154433509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230960 + }, + { + "epoch": 1.120162759935158, + "grad_norm": 3.387154947631643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230970 + }, + { + "epoch": 1.120211258127994, + "grad_norm": 3.5591438063420355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 230980 + }, + { + "epoch": 1.12025975632083, + "grad_norm": 0.00048046198207885027, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 230990 + }, + { + "epoch": 1.1203082545136662, + "grad_norm": 0.0002171523228753358, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 231000 + }, + { + "epoch": 1.1203567527065024, + "grad_norm": 1.8453874872648157e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231010 + }, + { + "epoch": 1.1204052508993383, + "grad_norm": 9.581823178450577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231020 + }, + { + "epoch": 1.1204537490921744, + "grad_norm": 7.619517418788746e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231030 + }, + { + "epoch": 1.1205022472850106, + "grad_norm": 7.193704732344486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231040 + }, + { + "epoch": 1.1205507454778467, + "grad_norm": 6.146566192910541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231050 + }, + { + "epoch": 1.1205992436706826, + "grad_norm": 6.0239126469241455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231060 + }, + { + "epoch": 1.1206477418635188, + "grad_norm": 6.058783583284821e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231070 + }, + { + "epoch": 1.120696240056355, + "grad_norm": 5.70236261410173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231080 + }, + { + "epoch": 1.120744738249191, + "grad_norm": 5.427158157544909e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231090 + }, + { + "epoch": 1.120793236442027, + "grad_norm": 5.102564500703011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231100 + }, + { + "epoch": 1.1208417346348631, + "grad_norm": 4.9445993681729306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231110 + }, + { + "epoch": 1.1208902328276993, + "grad_norm": 4.636319317796733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231120 + }, + { + "epoch": 1.1209387310205354, + "grad_norm": 4.392966729938053e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231130 + }, + { + "epoch": 1.1209872292133714, + "grad_norm": 4.776096830028109e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231140 + }, + { + "epoch": 1.1210357274062075, + "grad_norm": 4.406902462505968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231150 + }, + { + "epoch": 1.1210842255990436, + "grad_norm": 3.851759629469598e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231160 + }, + { + "epoch": 1.1211327237918798, + "grad_norm": 4.002888090326451e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231170 + }, + { + "epoch": 1.1211812219847157, + "grad_norm": 3.679590008687228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231180 + }, + { + "epoch": 1.1212297201775518, + "grad_norm": 3.7758231883344706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231190 + }, + { + "epoch": 1.121278218370388, + "grad_norm": 3.5592418043961516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231200 + }, + { + "epoch": 1.1213267165632241, + "grad_norm": 3.158205799991265e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 231210 + }, + { + "epoch": 1.12137521475606, + "grad_norm": 3.065637429244816e-05, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 231220 + }, + { + "epoch": 1.1214237129488962, + "grad_norm": 0.00012412549403961748, + "learning_rate": 0.0002, + "loss": 0.0024, + "step": 231230 + }, + { + "epoch": 1.1214722111417323, + "grad_norm": 0.012353068217635155, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 231240 + }, + { + "epoch": 1.1215207093345685, + "grad_norm": 3.809969712165184e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 231250 + }, + { + "epoch": 1.1215692075274044, + "grad_norm": 0.0002815985935740173, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231260 + }, + { + "epoch": 1.1216177057202406, + "grad_norm": 1.950149999174755e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231270 + }, + { + "epoch": 1.1216662039130767, + "grad_norm": 1.2634656741283834e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231280 + }, + { + "epoch": 1.1217147021059128, + "grad_norm": 8.487694867653772e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231290 + }, + { + "epoch": 1.1217632002987488, + "grad_norm": 0.00406273128464818, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231300 + }, + { + "epoch": 1.121811698491585, + "grad_norm": 1.6680558474035934e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231310 + }, + { + "epoch": 1.121860196684421, + "grad_norm": 9.940052223100793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231320 + }, + { + "epoch": 1.1219086948772572, + "grad_norm": 1.0227034181298222e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231330 + }, + { + "epoch": 1.1219571930700933, + "grad_norm": 7.213554908958031e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231340 + }, + { + "epoch": 1.1220056912629293, + "grad_norm": 8.345556125277653e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231350 + }, + { + "epoch": 1.1220541894557654, + "grad_norm": 1.1037317563022953e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231360 + }, + { + "epoch": 1.1221026876486015, + "grad_norm": 9.927962310030125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231370 + }, + { + "epoch": 1.1221511858414375, + "grad_norm": 8.124410669552162e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231380 + }, + { + "epoch": 1.1221996840342736, + "grad_norm": 6.144907729321858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231390 + }, + { + "epoch": 1.1222481822271098, + "grad_norm": 2.24612213060027e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231400 + }, + { + "epoch": 1.122296680419946, + "grad_norm": 6.732598194503225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231410 + }, + { + "epoch": 1.122345178612782, + "grad_norm": 6.691352155030472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231420 + }, + { + "epoch": 1.122393676805618, + "grad_norm": 6.677171313640429e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231430 + }, + { + "epoch": 1.1224421749984541, + "grad_norm": 4.936836376145948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231440 + }, + { + "epoch": 1.1224906731912903, + "grad_norm": 6.177159775688779e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231450 + }, + { + "epoch": 1.1225391713841264, + "grad_norm": 5.454985966935055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231460 + }, + { + "epoch": 1.1225876695769623, + "grad_norm": 5.4641614042338915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231470 + }, + { + "epoch": 1.1226361677697985, + "grad_norm": 4.787209036294371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231480 + }, + { + "epoch": 1.1226846659626346, + "grad_norm": 4.3149516386620235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231490 + }, + { + "epoch": 1.1227331641554708, + "grad_norm": 5.074352884548716e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231500 + }, + { + "epoch": 1.1227816623483067, + "grad_norm": 4.490819264901802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231510 + }, + { + "epoch": 1.1228301605411428, + "grad_norm": 5.3227176977088675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231520 + }, + { + "epoch": 1.122878658733979, + "grad_norm": 7.801627361914143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231530 + }, + { + "epoch": 1.122927156926815, + "grad_norm": 3.598735702325939e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231540 + }, + { + "epoch": 1.122975655119651, + "grad_norm": 4.493514097703155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231550 + }, + { + "epoch": 1.1230241533124872, + "grad_norm": 4.309323685447453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231560 + }, + { + "epoch": 1.1230726515053233, + "grad_norm": 1.4507072592095938e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231570 + }, + { + "epoch": 1.1231211496981595, + "grad_norm": 3.713203795996378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231580 + }, + { + "epoch": 1.1231696478909954, + "grad_norm": 3.3433204862376442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231590 + }, + { + "epoch": 1.1232181460838315, + "grad_norm": 3.7107843127159867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231600 + }, + { + "epoch": 1.1232666442766677, + "grad_norm": 4.218204594508279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231610 + }, + { + "epoch": 1.1233151424695038, + "grad_norm": 3.6625847315008286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231620 + }, + { + "epoch": 1.1233636406623397, + "grad_norm": 3.495558303256985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231630 + }, + { + "epoch": 1.1234121388551759, + "grad_norm": 3.147913275824976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231640 + }, + { + "epoch": 1.123460637048012, + "grad_norm": 3.889141225954518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231650 + }, + { + "epoch": 1.1235091352408482, + "grad_norm": 3.221127144570346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231660 + }, + { + "epoch": 1.123557633433684, + "grad_norm": 3.3808682928793132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231670 + }, + { + "epoch": 1.1236061316265202, + "grad_norm": 1.3037832104600966e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231680 + }, + { + "epoch": 1.1236546298193564, + "grad_norm": 2.5419844860152807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231690 + }, + { + "epoch": 1.1237031280121925, + "grad_norm": 3.565155338947079e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231700 + }, + { + "epoch": 1.1237516262050284, + "grad_norm": 2.776334440568462e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231710 + }, + { + "epoch": 1.1238001243978646, + "grad_norm": 1.2903273272968363e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231720 + }, + { + "epoch": 1.1238486225907007, + "grad_norm": 2.594591251181555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231730 + }, + { + "epoch": 1.1238971207835369, + "grad_norm": 2.337705382160493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231740 + }, + { + "epoch": 1.1239456189763728, + "grad_norm": 2.5948982056434033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231750 + }, + { + "epoch": 1.123994117169209, + "grad_norm": 2.7428313842392527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231760 + }, + { + "epoch": 1.124042615362045, + "grad_norm": 2.7216269700147677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231770 + }, + { + "epoch": 1.1240911135548812, + "grad_norm": 2.4657936137373326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231780 + }, + { + "epoch": 1.1241396117477171, + "grad_norm": 2.263629994558869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231790 + }, + { + "epoch": 1.1241881099405533, + "grad_norm": 5.249307378107915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231800 + }, + { + "epoch": 1.1242366081333894, + "grad_norm": 2.1201673007453792e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231810 + }, + { + "epoch": 1.1242851063262256, + "grad_norm": 2.3945285647641867e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 231820 + }, + { + "epoch": 1.1243336045190615, + "grad_norm": 0.001094431383535266, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231830 + }, + { + "epoch": 1.1243821027118976, + "grad_norm": 5.151602817932144e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231840 + }, + { + "epoch": 1.1244306009047338, + "grad_norm": 6.407625278370688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231850 + }, + { + "epoch": 1.12447909909757, + "grad_norm": 7.281471425812924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231860 + }, + { + "epoch": 1.124527597290406, + "grad_norm": 3.6030367027706234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231870 + }, + { + "epoch": 1.124576095483242, + "grad_norm": 3.629914090197417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231880 + }, + { + "epoch": 1.1246245936760781, + "grad_norm": 5.317197974363808e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231890 + }, + { + "epoch": 1.1246730918689143, + "grad_norm": 3.16513887810288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231900 + }, + { + "epoch": 1.1247215900617502, + "grad_norm": 3.230279389754287e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231910 + }, + { + "epoch": 1.1247700882545864, + "grad_norm": 3.133867494398146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231920 + }, + { + "epoch": 1.1248185864474225, + "grad_norm": 3.994214694102993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231930 + }, + { + "epoch": 1.1248670846402586, + "grad_norm": 0.0007896709721535444, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231940 + }, + { + "epoch": 1.1249155828330948, + "grad_norm": 2.9503537462005625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231950 + }, + { + "epoch": 1.1249640810259307, + "grad_norm": 2.9982686555740656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231960 + }, + { + "epoch": 1.1250125792187669, + "grad_norm": 2.798360583255999e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231970 + }, + { + "epoch": 1.125061077411603, + "grad_norm": 2.879110525100259e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231980 + }, + { + "epoch": 1.125109575604439, + "grad_norm": 4.282329882698832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 231990 + }, + { + "epoch": 1.125158073797275, + "grad_norm": 2.5844476567726815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232000 + }, + { + "epoch": 1.1252065719901112, + "grad_norm": 2.4788823793642223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232010 + }, + { + "epoch": 1.1252550701829473, + "grad_norm": 2.5056656340893824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232020 + }, + { + "epoch": 1.1253035683757835, + "grad_norm": 2.64466416410869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232030 + }, + { + "epoch": 1.1253520665686194, + "grad_norm": 4.045719833811745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232040 + }, + { + "epoch": 1.1254005647614556, + "grad_norm": 2.427990466458141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232050 + }, + { + "epoch": 1.1254490629542917, + "grad_norm": 2.30722184824117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232060 + }, + { + "epoch": 1.1254975611471278, + "grad_norm": 2.445999143674271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232070 + }, + { + "epoch": 1.1255460593399638, + "grad_norm": 2.47522098106856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232080 + }, + { + "epoch": 1.1255945575328, + "grad_norm": 3.7352526760514593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232090 + }, + { + "epoch": 1.125643055725636, + "grad_norm": 2.2662393348582555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232100 + }, + { + "epoch": 1.1256915539184722, + "grad_norm": 2.228541006843443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232110 + }, + { + "epoch": 1.1257400521113081, + "grad_norm": 2.1294197267707204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232120 + }, + { + "epoch": 1.1257885503041443, + "grad_norm": 2.281252818647772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232130 + }, + { + "epoch": 1.1258370484969804, + "grad_norm": 3.5004254641535226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232140 + }, + { + "epoch": 1.1258855466898166, + "grad_norm": 2.0607108126569074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232150 + }, + { + "epoch": 1.1259340448826525, + "grad_norm": 2.060906581391464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232160 + }, + { + "epoch": 1.1259825430754886, + "grad_norm": 1.90851824299898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232170 + }, + { + "epoch": 1.1260310412683248, + "grad_norm": 2.227017830591649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232180 + }, + { + "epoch": 1.126079539461161, + "grad_norm": 2.9699351671297336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232190 + }, + { + "epoch": 1.1261280376539968, + "grad_norm": 1.9226292806706624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232200 + }, + { + "epoch": 1.126176535846833, + "grad_norm": 1.9351195987837855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232210 + }, + { + "epoch": 1.1262250340396691, + "grad_norm": 2.2294466361927334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232220 + }, + { + "epoch": 1.1262735322325053, + "grad_norm": 2.0823374597966904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232230 + }, + { + "epoch": 1.1263220304253412, + "grad_norm": 2.878381565096788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232240 + }, + { + "epoch": 1.1263705286181773, + "grad_norm": 1.774846850821632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232250 + }, + { + "epoch": 1.1264190268110135, + "grad_norm": 1.7752181520336308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232260 + }, + { + "epoch": 1.1264675250038496, + "grad_norm": 1.6577240558035555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232270 + }, + { + "epoch": 1.1265160231966855, + "grad_norm": 1.7523608448755112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232280 + }, + { + "epoch": 1.1265645213895217, + "grad_norm": 2.511421143935877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232290 + }, + { + "epoch": 1.1266130195823578, + "grad_norm": 1.5887362678768113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232300 + }, + { + "epoch": 1.126661517775194, + "grad_norm": 1.8871575093726278e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232310 + }, + { + "epoch": 1.12671001596803, + "grad_norm": 1.7409312249583309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232320 + }, + { + "epoch": 1.126758514160866, + "grad_norm": 1.6681154875186621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232330 + }, + { + "epoch": 1.1268070123537022, + "grad_norm": 2.4165199192793807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232340 + }, + { + "epoch": 1.1268555105465383, + "grad_norm": 1.659786221352988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232350 + }, + { + "epoch": 1.1269040087393742, + "grad_norm": 1.6528304058738286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232360 + }, + { + "epoch": 1.1269525069322104, + "grad_norm": 1.6127621620398713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232370 + }, + { + "epoch": 1.1270010051250465, + "grad_norm": 1.6505149460499524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232380 + }, + { + "epoch": 1.1270495033178827, + "grad_norm": 2.487308165655122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232390 + }, + { + "epoch": 1.1270980015107188, + "grad_norm": 1.48333924698818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232400 + }, + { + "epoch": 1.1271464997035547, + "grad_norm": 1.6659741959301755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232410 + }, + { + "epoch": 1.1271949978963909, + "grad_norm": 1.4829004157945747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232420 + }, + { + "epoch": 1.127243496089227, + "grad_norm": 1.525191805740178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232430 + }, + { + "epoch": 1.127291994282063, + "grad_norm": 2.617713334984728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232440 + }, + { + "epoch": 1.127340492474899, + "grad_norm": 1.4239876691135578e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232450 + }, + { + "epoch": 1.1273889906677352, + "grad_norm": 1.4161493027131655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232460 + }, + { + "epoch": 1.1274374888605714, + "grad_norm": 2.9507457384170266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232470 + }, + { + "epoch": 1.1274859870534075, + "grad_norm": 1.401991312377504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232480 + }, + { + "epoch": 1.1275344852462434, + "grad_norm": 2.1545054096350214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232490 + }, + { + "epoch": 1.1275829834390796, + "grad_norm": 1.2793489077012055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232500 + }, + { + "epoch": 1.1276314816319157, + "grad_norm": 1.2725329270324437e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232510 + }, + { + "epoch": 1.1276799798247519, + "grad_norm": 1.3526316706702346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232520 + }, + { + "epoch": 1.1277284780175878, + "grad_norm": 1.24556822811428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232530 + }, + { + "epoch": 1.127776976210424, + "grad_norm": 2.0718093765026424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232540 + }, + { + "epoch": 1.12782547440326, + "grad_norm": 1.2242579714438762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232550 + }, + { + "epoch": 1.1278739725960962, + "grad_norm": 1.211547555612924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232560 + }, + { + "epoch": 1.1279224707889322, + "grad_norm": 1.2866759107055259e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232570 + }, + { + "epoch": 1.1279709689817683, + "grad_norm": 1.660276666370919e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232580 + }, + { + "epoch": 1.1280194671746044, + "grad_norm": 1.7268826013605576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232590 + }, + { + "epoch": 1.1280679653674406, + "grad_norm": 1.1971080766670639e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232600 + }, + { + "epoch": 1.1281164635602765, + "grad_norm": 1.2358964340819512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232610 + }, + { + "epoch": 1.1281649617531126, + "grad_norm": 1.3934744629295892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232620 + }, + { + "epoch": 1.1282134599459488, + "grad_norm": 1.081059053831268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232630 + }, + { + "epoch": 1.128261958138785, + "grad_norm": 1.8244321609017788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232640 + }, + { + "epoch": 1.1283104563316209, + "grad_norm": 1.089278612198541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232650 + }, + { + "epoch": 1.128358954524457, + "grad_norm": 1.161335035249067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232660 + }, + { + "epoch": 1.1284074527172931, + "grad_norm": 1.139059463639569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232670 + }, + { + "epoch": 1.1284559509101293, + "grad_norm": 1.1151860235258937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232680 + }, + { + "epoch": 1.1285044491029652, + "grad_norm": 1.548119598737685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232690 + }, + { + "epoch": 1.1285529472958014, + "grad_norm": 1.5293209116862272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232700 + }, + { + "epoch": 1.1286014454886375, + "grad_norm": 1.0477307341716369e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232710 + }, + { + "epoch": 1.1286499436814736, + "grad_norm": 1.0456784593770863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232720 + }, + { + "epoch": 1.1286984418743096, + "grad_norm": 1.278563331652549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232730 + }, + { + "epoch": 1.1287469400671457, + "grad_norm": 1.470619849897048e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232740 + }, + { + "epoch": 1.1287954382599819, + "grad_norm": 1.012307279779634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232750 + }, + { + "epoch": 1.128843936452818, + "grad_norm": 9.58779082793626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232760 + }, + { + "epoch": 1.128892434645654, + "grad_norm": 1.0810847470565932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232770 + }, + { + "epoch": 1.12894093283849, + "grad_norm": 9.672279475125833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232780 + }, + { + "epoch": 1.1289894310313262, + "grad_norm": 1.428535711056611e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232790 + }, + { + "epoch": 1.1290379292241624, + "grad_norm": 9.455368399358122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232800 + }, + { + "epoch": 1.1290864274169983, + "grad_norm": 9.535090725876216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232810 + }, + { + "epoch": 1.1291349256098344, + "grad_norm": 9.494094683759613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232820 + }, + { + "epoch": 1.1291834238026706, + "grad_norm": 9.161192906503857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232830 + }, + { + "epoch": 1.1292319219955067, + "grad_norm": 1.416607346982346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232840 + }, + { + "epoch": 1.1292804201883428, + "grad_norm": 8.83453537880996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232850 + }, + { + "epoch": 1.1293289183811788, + "grad_norm": 1.1321067177050281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232860 + }, + { + "epoch": 1.129377416574015, + "grad_norm": 1.0955657216982218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232870 + }, + { + "epoch": 1.129425914766851, + "grad_norm": 8.961090429693286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232880 + }, + { + "epoch": 1.129474412959687, + "grad_norm": 1.422108198312344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232890 + }, + { + "epoch": 1.1295229111525231, + "grad_norm": 7.962493668856041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232900 + }, + { + "epoch": 1.1295714093453593, + "grad_norm": 9.290683919971343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232910 + }, + { + "epoch": 1.1296199075381954, + "grad_norm": 8.505674600201019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232920 + }, + { + "epoch": 1.1296684057310316, + "grad_norm": 8.520994470018195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232930 + }, + { + "epoch": 1.1297169039238675, + "grad_norm": 1.3779363143839873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232940 + }, + { + "epoch": 1.1297654021167036, + "grad_norm": 8.464565439680882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232950 + }, + { + "epoch": 1.1298139003095398, + "grad_norm": 8.859527724780492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232960 + }, + { + "epoch": 1.1298623985023757, + "grad_norm": 8.084866180979589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232970 + }, + { + "epoch": 1.1299108966952118, + "grad_norm": 1.004068622023624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232980 + }, + { + "epoch": 1.129959394888048, + "grad_norm": 1.096025471269968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 232990 + }, + { + "epoch": 1.1300078930808841, + "grad_norm": 8.025574516068446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233000 + }, + { + "epoch": 1.1300563912737203, + "grad_norm": 7.188588142525987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233010 + }, + { + "epoch": 1.1301048894665562, + "grad_norm": 7.760011158097768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233020 + }, + { + "epoch": 1.1301533876593923, + "grad_norm": 7.73883584770374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233030 + }, + { + "epoch": 1.1302018858522285, + "grad_norm": 1.2316352240304695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233040 + }, + { + "epoch": 1.1302503840450646, + "grad_norm": 7.792967835484887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233050 + }, + { + "epoch": 1.1302988822379005, + "grad_norm": 1.2708236454272992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233060 + }, + { + "epoch": 1.1303473804307367, + "grad_norm": 7.109698003660014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233070 + }, + { + "epoch": 1.1303958786235728, + "grad_norm": 8.670202191751741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233080 + }, + { + "epoch": 1.130444376816409, + "grad_norm": 9.82933215709636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233090 + }, + { + "epoch": 1.130492875009245, + "grad_norm": 7.09347887095646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233100 + }, + { + "epoch": 1.130541373202081, + "grad_norm": 7.062019449222134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233110 + }, + { + "epoch": 1.1305898713949172, + "grad_norm": 7.060424991323089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233120 + }, + { + "epoch": 1.1306383695877533, + "grad_norm": 6.55650637781946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233130 + }, + { + "epoch": 1.1306868677805892, + "grad_norm": 9.999228041124297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233140 + }, + { + "epoch": 1.1307353659734254, + "grad_norm": 7.528969945269637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233150 + }, + { + "epoch": 1.1307838641662615, + "grad_norm": 6.426793675018416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233160 + }, + { + "epoch": 1.1308323623590977, + "grad_norm": 6.92685091507883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233170 + }, + { + "epoch": 1.1308808605519336, + "grad_norm": 8.18948876712966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233180 + }, + { + "epoch": 1.1309293587447697, + "grad_norm": 9.686350495030638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233190 + }, + { + "epoch": 1.1309778569376059, + "grad_norm": 6.739333571204043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233200 + }, + { + "epoch": 1.131026355130442, + "grad_norm": 6.434216857087449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233210 + }, + { + "epoch": 1.131074853323278, + "grad_norm": 8.071822890087788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233220 + }, + { + "epoch": 1.131123351516114, + "grad_norm": 6.415293114514498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233230 + }, + { + "epoch": 1.1311718497089502, + "grad_norm": 9.140099450632988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233240 + }, + { + "epoch": 1.1312203479017864, + "grad_norm": 6.53732456612488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233250 + }, + { + "epoch": 1.1312688460946223, + "grad_norm": 6.040481821401045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233260 + }, + { + "epoch": 1.1313173442874584, + "grad_norm": 6.199312565513537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233270 + }, + { + "epoch": 1.1313658424802946, + "grad_norm": 5.997732159812585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233280 + }, + { + "epoch": 1.1314143406731307, + "grad_norm": 8.515733043168439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233290 + }, + { + "epoch": 1.1314628388659669, + "grad_norm": 5.541250516216678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233300 + }, + { + "epoch": 1.1315113370588028, + "grad_norm": 6.079420131754887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233310 + }, + { + "epoch": 1.131559835251639, + "grad_norm": 0.00026488120784051716, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233320 + }, + { + "epoch": 1.131608333444475, + "grad_norm": 5.78180674892792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233330 + }, + { + "epoch": 1.131656831637311, + "grad_norm": 7.930823358037742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233340 + }, + { + "epoch": 1.1317053298301472, + "grad_norm": 6.048680916137528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233350 + }, + { + "epoch": 1.1317538280229833, + "grad_norm": 5.971851919639448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233360 + }, + { + "epoch": 1.1318023262158194, + "grad_norm": 5.890459533475223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233370 + }, + { + "epoch": 1.1318508244086556, + "grad_norm": 5.937280889156682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233380 + }, + { + "epoch": 1.1318993226014915, + "grad_norm": 8.360938181795063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233390 + }, + { + "epoch": 1.1319478207943277, + "grad_norm": 5.571276915361523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233400 + }, + { + "epoch": 1.1319963189871638, + "grad_norm": 5.408948595686525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233410 + }, + { + "epoch": 1.1320448171799997, + "grad_norm": 5.825787070534716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233420 + }, + { + "epoch": 1.1320933153728359, + "grad_norm": 5.247793524176814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233430 + }, + { + "epoch": 1.132141813565672, + "grad_norm": 7.168323463702109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233440 + }, + { + "epoch": 1.1321903117585081, + "grad_norm": 5.164096137377783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233450 + }, + { + "epoch": 1.1322388099513443, + "grad_norm": 5.312879807206627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233460 + }, + { + "epoch": 1.1322873081441802, + "grad_norm": 5.446981958812103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233470 + }, + { + "epoch": 1.1323358063370164, + "grad_norm": 5.596952519226761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233480 + }, + { + "epoch": 1.1323843045298525, + "grad_norm": 8.682419547767495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233490 + }, + { + "epoch": 1.1324328027226884, + "grad_norm": 5.187253009353299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233500 + }, + { + "epoch": 1.1324813009155246, + "grad_norm": 5.491820616043697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233510 + }, + { + "epoch": 1.1325297991083607, + "grad_norm": 5.021147444495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233520 + }, + { + "epoch": 1.1325782973011969, + "grad_norm": 6.14853718161612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233530 + }, + { + "epoch": 1.132626795494033, + "grad_norm": 6.924723834345059e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233540 + }, + { + "epoch": 1.132675293686869, + "grad_norm": 4.934556727675954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233550 + }, + { + "epoch": 1.132723791879705, + "grad_norm": 4.772632564709056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233560 + }, + { + "epoch": 1.1327722900725412, + "grad_norm": 4.925955749968125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233570 + }, + { + "epoch": 1.1328207882653774, + "grad_norm": 5.20988919561205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233580 + }, + { + "epoch": 1.1328692864582133, + "grad_norm": 6.820215503466898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233590 + }, + { + "epoch": 1.1329177846510494, + "grad_norm": 4.759394585107657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233600 + }, + { + "epoch": 1.1329662828438856, + "grad_norm": 0.0001328234648099169, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233610 + }, + { + "epoch": 1.1330147810367217, + "grad_norm": 5.281247013044776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233620 + }, + { + "epoch": 1.1330632792295576, + "grad_norm": 4.988781370229844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233630 + }, + { + "epoch": 1.1331117774223938, + "grad_norm": 5.864686727363733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233640 + }, + { + "epoch": 1.13316027561523, + "grad_norm": 4.489435241339379e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233650 + }, + { + "epoch": 1.133208773808066, + "grad_norm": 4.3063593579972803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233660 + }, + { + "epoch": 1.133257272000902, + "grad_norm": 4.948105356561427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233670 + }, + { + "epoch": 1.1333057701937381, + "grad_norm": 5.000131295673782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233680 + }, + { + "epoch": 1.1333542683865743, + "grad_norm": 6.055703352103592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233690 + }, + { + "epoch": 1.1334027665794104, + "grad_norm": 4.77829132705665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233700 + }, + { + "epoch": 1.1334512647722463, + "grad_norm": 4.877309152107046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233710 + }, + { + "epoch": 1.1334997629650825, + "grad_norm": 4.765572043652355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233720 + }, + { + "epoch": 1.1335482611579186, + "grad_norm": 4.340652424161817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233730 + }, + { + "epoch": 1.1335967593507548, + "grad_norm": 5.928242217123625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233740 + }, + { + "epoch": 1.1336452575435907, + "grad_norm": 4.5950665139571356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233750 + }, + { + "epoch": 1.1336937557364268, + "grad_norm": 4.272296791896224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233760 + }, + { + "epoch": 1.133742253929263, + "grad_norm": 4.148422476646374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233770 + }, + { + "epoch": 1.1337907521220991, + "grad_norm": 4.2521352838775783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233780 + }, + { + "epoch": 1.133839250314935, + "grad_norm": 6.062387001293246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233790 + }, + { + "epoch": 1.1338877485077712, + "grad_norm": 4.5091499600857787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233800 + }, + { + "epoch": 1.1339362467006073, + "grad_norm": 4.781150551025348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233810 + }, + { + "epoch": 1.1339847448934435, + "grad_norm": 4.266021562671085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233820 + }, + { + "epoch": 1.1340332430862796, + "grad_norm": 4.4096290707784647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233830 + }, + { + "epoch": 1.1340817412791155, + "grad_norm": 6.505255782940367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233840 + }, + { + "epoch": 1.1341302394719517, + "grad_norm": 4.289080095531972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233850 + }, + { + "epoch": 1.1341787376647878, + "grad_norm": 4.2359931740065804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233860 + }, + { + "epoch": 1.1342272358576238, + "grad_norm": 1.0918581665464444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233870 + }, + { + "epoch": 1.13427573405046, + "grad_norm": 4.6329529368449585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233880 + }, + { + "epoch": 1.134324232243296, + "grad_norm": 5.816536372549308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233890 + }, + { + "epoch": 1.1343727304361322, + "grad_norm": 3.931690741865168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233900 + }, + { + "epoch": 1.1344212286289683, + "grad_norm": 4.026253179745254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233910 + }, + { + "epoch": 1.1344697268218042, + "grad_norm": 3.8891633380444546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233920 + }, + { + "epoch": 1.1345182250146404, + "grad_norm": 9.424265385860053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233930 + }, + { + "epoch": 1.1345667232074765, + "grad_norm": 5.367572839531931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233940 + }, + { + "epoch": 1.1346152214003125, + "grad_norm": 3.877634924265294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233950 + }, + { + "epoch": 1.1346637195931486, + "grad_norm": 3.843470608444477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233960 + }, + { + "epoch": 1.1347122177859847, + "grad_norm": 4.172734691110236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233970 + }, + { + "epoch": 1.1347607159788209, + "grad_norm": 3.8063390661591257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233980 + }, + { + "epoch": 1.134809214171657, + "grad_norm": 4.6957995891716564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 233990 + }, + { + "epoch": 1.134857712364493, + "grad_norm": 3.905198582287994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234000 + }, + { + "epoch": 1.134906210557329, + "grad_norm": 3.856185060158168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234010 + }, + { + "epoch": 1.1349547087501652, + "grad_norm": 3.8159092241585313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234020 + }, + { + "epoch": 1.1350032069430012, + "grad_norm": 3.9425515296898084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234030 + }, + { + "epoch": 1.1350517051358373, + "grad_norm": 5.668911740031035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234040 + }, + { + "epoch": 1.1351002033286735, + "grad_norm": 4.441997702997469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234050 + }, + { + "epoch": 1.1351487015215096, + "grad_norm": 4.3143023731317953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234060 + }, + { + "epoch": 1.1351971997143457, + "grad_norm": 4.5059974240757583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234070 + }, + { + "epoch": 1.1352456979071817, + "grad_norm": 4.2846730252676934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234080 + }, + { + "epoch": 1.1352941961000178, + "grad_norm": 5.591019771600259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234090 + }, + { + "epoch": 1.135342694292854, + "grad_norm": 4.451204631550354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234100 + }, + { + "epoch": 1.13539119248569, + "grad_norm": 4.153617112478969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234110 + }, + { + "epoch": 1.135439690678526, + "grad_norm": 3.8604855490120826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234120 + }, + { + "epoch": 1.1354881888713622, + "grad_norm": 3.7917672557341575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234130 + }, + { + "epoch": 1.1355366870641983, + "grad_norm": 5.786662882201199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234140 + }, + { + "epoch": 1.1355851852570344, + "grad_norm": 4.0616734509058006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234150 + }, + { + "epoch": 1.1356336834498704, + "grad_norm": 3.810010582583345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234160 + }, + { + "epoch": 1.1356821816427065, + "grad_norm": 4.6086759653007903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234170 + }, + { + "epoch": 1.1357306798355427, + "grad_norm": 4.642395765586116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234180 + }, + { + "epoch": 1.1357791780283788, + "grad_norm": 5.169235919311177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234190 + }, + { + "epoch": 1.1358276762212147, + "grad_norm": 4.1212493329112476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234200 + }, + { + "epoch": 1.1358761744140509, + "grad_norm": 4.0502715137336054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234210 + }, + { + "epoch": 1.135924672606887, + "grad_norm": 3.9104787674659747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234220 + }, + { + "epoch": 1.1359731707997232, + "grad_norm": 3.981957945597969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234230 + }, + { + "epoch": 1.136021668992559, + "grad_norm": 4.477204527120193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234240 + }, + { + "epoch": 1.1360701671853952, + "grad_norm": 3.7441932931869815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234250 + }, + { + "epoch": 1.1361186653782314, + "grad_norm": 4.067443342137267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234260 + }, + { + "epoch": 1.1361671635710675, + "grad_norm": 4.1483528434582695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234270 + }, + { + "epoch": 1.1362156617639034, + "grad_norm": 4.2420126078468456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234280 + }, + { + "epoch": 1.1362641599567396, + "grad_norm": 4.859105047216872e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234290 + }, + { + "epoch": 1.1363126581495757, + "grad_norm": 3.993150130554568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234300 + }, + { + "epoch": 1.1363611563424119, + "grad_norm": 4.992562026018277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234310 + }, + { + "epoch": 1.1364096545352478, + "grad_norm": 3.617752497575566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234320 + }, + { + "epoch": 1.136458152728084, + "grad_norm": 4.069060821620951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234330 + }, + { + "epoch": 1.13650665092092, + "grad_norm": 4.2270406197530974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234340 + }, + { + "epoch": 1.1365551491137562, + "grad_norm": 3.9043726474119467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234350 + }, + { + "epoch": 1.1366036473065924, + "grad_norm": 3.290226118224382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234360 + }, + { + "epoch": 1.1366521454994283, + "grad_norm": 3.376096913143556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234370 + }, + { + "epoch": 1.1367006436922644, + "grad_norm": 3.3896250783982396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234380 + }, + { + "epoch": 1.1367491418851006, + "grad_norm": 4.5926665848128323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234390 + }, + { + "epoch": 1.1367976400779365, + "grad_norm": 3.599547540034109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234400 + }, + { + "epoch": 1.1368461382707726, + "grad_norm": 3.326984199247818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234410 + }, + { + "epoch": 1.1368946364636088, + "grad_norm": 3.2128858151736495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234420 + }, + { + "epoch": 1.136943134656445, + "grad_norm": 3.3717429914759123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234430 + }, + { + "epoch": 1.136991632849281, + "grad_norm": 4.67433977746623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234440 + }, + { + "epoch": 1.137040131042117, + "grad_norm": 3.1937494782141584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234450 + }, + { + "epoch": 1.1370886292349531, + "grad_norm": 3.3236784702239675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234460 + }, + { + "epoch": 1.1371371274277893, + "grad_norm": 3.449381722475664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234470 + }, + { + "epoch": 1.1371856256206252, + "grad_norm": 3.133827419787849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234480 + }, + { + "epoch": 1.1372341238134613, + "grad_norm": 4.2454254867152486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234490 + }, + { + "epoch": 1.1372826220062975, + "grad_norm": 3.1243919806911435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234500 + }, + { + "epoch": 1.1373311201991336, + "grad_norm": 3.0688769925291126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234510 + }, + { + "epoch": 1.1373796183919698, + "grad_norm": 3.0626014790868794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234520 + }, + { + "epoch": 1.1374281165848057, + "grad_norm": 3.03420449654368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234530 + }, + { + "epoch": 1.1374766147776418, + "grad_norm": 3.5897210182156414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234540 + }, + { + "epoch": 1.137525112970478, + "grad_norm": 3.333253459913976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234550 + }, + { + "epoch": 1.137573611163314, + "grad_norm": 2.9862152928217256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234560 + }, + { + "epoch": 1.13762210935615, + "grad_norm": 3.162710981996497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234570 + }, + { + "epoch": 1.1376706075489862, + "grad_norm": 3.1731127592138364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234580 + }, + { + "epoch": 1.1377191057418223, + "grad_norm": 3.717315451012837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234590 + }, + { + "epoch": 1.1377676039346585, + "grad_norm": 3.139508919502987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234600 + }, + { + "epoch": 1.1378161021274944, + "grad_norm": 3.373801007455768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234610 + }, + { + "epoch": 1.1378646003203305, + "grad_norm": 3.0266670592027367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234620 + }, + { + "epoch": 1.1379130985131667, + "grad_norm": 2.870811499633419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234630 + }, + { + "epoch": 1.1379615967060028, + "grad_norm": 3.422279917231208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234640 + }, + { + "epoch": 1.1380100948988388, + "grad_norm": 3.957804324272729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234650 + }, + { + "epoch": 1.138058593091675, + "grad_norm": 2.8681964181487274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234660 + }, + { + "epoch": 1.138107091284511, + "grad_norm": 3.997144517597917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234670 + }, + { + "epoch": 1.1381555894773472, + "grad_norm": 3.25291097169611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234680 + }, + { + "epoch": 1.138204087670183, + "grad_norm": 3.4553488603705773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234690 + }, + { + "epoch": 1.1382525858630193, + "grad_norm": 3.0999422051536385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234700 + }, + { + "epoch": 1.1383010840558554, + "grad_norm": 4.4142731780993927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234710 + }, + { + "epoch": 1.1383495822486915, + "grad_norm": 2.8658519113378134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234720 + }, + { + "epoch": 1.1383980804415275, + "grad_norm": 2.7444932015896484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234730 + }, + { + "epoch": 1.1384465786343636, + "grad_norm": 3.249809878980159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234740 + }, + { + "epoch": 1.1384950768271997, + "grad_norm": 3.600395359626418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234750 + }, + { + "epoch": 1.138543575020036, + "grad_norm": 3.153212162487762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234760 + }, + { + "epoch": 1.1385920732128718, + "grad_norm": 2.7824566473100276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234770 + }, + { + "epoch": 1.138640571405708, + "grad_norm": 2.671919503427489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234780 + }, + { + "epoch": 1.138689069598544, + "grad_norm": 3.2739319522079313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234790 + }, + { + "epoch": 1.1387375677913802, + "grad_norm": 2.4839306433932506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234800 + }, + { + "epoch": 1.1387860659842162, + "grad_norm": 1.074657347999164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234810 + }, + { + "epoch": 1.1388345641770523, + "grad_norm": 2.6805614083968976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234820 + }, + { + "epoch": 1.1388830623698885, + "grad_norm": 2.594366605990217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234830 + }, + { + "epoch": 1.1389315605627246, + "grad_norm": 3.071923799780052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234840 + }, + { + "epoch": 1.1389800587555605, + "grad_norm": 2.9812429147568764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234850 + }, + { + "epoch": 1.1390285569483967, + "grad_norm": 2.9258896461215045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234860 + }, + { + "epoch": 1.1390770551412328, + "grad_norm": 2.5662978941909387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234870 + }, + { + "epoch": 1.139125553334069, + "grad_norm": 2.619929659886111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234880 + }, + { + "epoch": 1.139174051526905, + "grad_norm": 3.196572890828975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234890 + }, + { + "epoch": 1.139222549719741, + "grad_norm": 3.332380913434463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234900 + }, + { + "epoch": 1.1392710479125772, + "grad_norm": 2.593780550341762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234910 + }, + { + "epoch": 1.1393195461054133, + "grad_norm": 2.4059670522547094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234920 + }, + { + "epoch": 1.1393680442982492, + "grad_norm": 2.625634749620076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234930 + }, + { + "epoch": 1.1394165424910854, + "grad_norm": 3.1390356980409706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234940 + }, + { + "epoch": 1.1394650406839215, + "grad_norm": 2.477858629390539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234950 + }, + { + "epoch": 1.1395135388767577, + "grad_norm": 3.005905284680921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234960 + }, + { + "epoch": 1.1395620370695938, + "grad_norm": 2.403812118245696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234970 + }, + { + "epoch": 1.1396105352624297, + "grad_norm": 2.5628023081480933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234980 + }, + { + "epoch": 1.1396590334552659, + "grad_norm": 2.722166811963689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 234990 + }, + { + "epoch": 1.139707531648102, + "grad_norm": 2.348710950172972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235000 + }, + { + "epoch": 1.139756029840938, + "grad_norm": 2.3611825383795804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235010 + }, + { + "epoch": 1.139804528033774, + "grad_norm": 2.375189325221072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235020 + }, + { + "epoch": 1.1398530262266102, + "grad_norm": 2.394307898612169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235030 + }, + { + "epoch": 1.1399015244194464, + "grad_norm": 2.4763303940744663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235040 + }, + { + "epoch": 1.1399500226122825, + "grad_norm": 2.3798480697223567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235050 + }, + { + "epoch": 1.1399985208051184, + "grad_norm": 2.2665754784156888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235060 + }, + { + "epoch": 1.1400470189979546, + "grad_norm": 2.2925816267616028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235070 + }, + { + "epoch": 1.1400955171907907, + "grad_norm": 2.2431341051287745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235080 + }, + { + "epoch": 1.1401440153836269, + "grad_norm": 2.476794804806559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235090 + }, + { + "epoch": 1.1401925135764628, + "grad_norm": 2.3477896604617854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235100 + }, + { + "epoch": 1.140241011769299, + "grad_norm": 2.2856463033349428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235110 + }, + { + "epoch": 1.140289509962135, + "grad_norm": 2.242227736815039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235120 + }, + { + "epoch": 1.1403380081549712, + "grad_norm": 2.0996516525428888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235130 + }, + { + "epoch": 1.1403865063478071, + "grad_norm": 2.387778010870534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235140 + }, + { + "epoch": 1.1404350045406433, + "grad_norm": 2.348365200077751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235150 + }, + { + "epoch": 1.1404835027334794, + "grad_norm": 2.3021908646114753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235160 + }, + { + "epoch": 1.1405320009263156, + "grad_norm": 2.1375676340085192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235170 + }, + { + "epoch": 1.1405804991191515, + "grad_norm": 2.2890047546297865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235180 + }, + { + "epoch": 1.1406289973119876, + "grad_norm": 2.443022140141693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235190 + }, + { + "epoch": 1.1406774955048238, + "grad_norm": 2.1214815149050992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235200 + }, + { + "epoch": 1.14072599369766, + "grad_norm": 2.153180389541376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235210 + }, + { + "epoch": 1.1407744918904958, + "grad_norm": 2.1425431384614058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235220 + }, + { + "epoch": 1.140822990083332, + "grad_norm": 2.114936705766013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235230 + }, + { + "epoch": 1.1408714882761681, + "grad_norm": 2.643014624936768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235240 + }, + { + "epoch": 1.1409199864690043, + "grad_norm": 2.0770229980371369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235250 + }, + { + "epoch": 1.1409684846618402, + "grad_norm": 2.124633624589478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235260 + }, + { + "epoch": 1.1410169828546763, + "grad_norm": 2.0854871252140583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235270 + }, + { + "epoch": 1.1410654810475125, + "grad_norm": 2.2794483811594546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235280 + }, + { + "epoch": 1.1411139792403486, + "grad_norm": 2.499921833987173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235290 + }, + { + "epoch": 1.1411624774331846, + "grad_norm": 1.9704029341482965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235300 + }, + { + "epoch": 1.1412109756260207, + "grad_norm": 2.0799747346700315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235310 + }, + { + "epoch": 1.1412594738188568, + "grad_norm": 1.947906582699943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235320 + }, + { + "epoch": 1.141307972011693, + "grad_norm": 2.2434578283991868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235330 + }, + { + "epoch": 1.1413564702045291, + "grad_norm": 2.5550107807248423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235340 + }, + { + "epoch": 1.141404968397365, + "grad_norm": 1.9955201935317746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235350 + }, + { + "epoch": 1.1414534665902012, + "grad_norm": 2.107475154389249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235360 + }, + { + "epoch": 1.1415019647830373, + "grad_norm": 2.0447684789814957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235370 + }, + { + "epoch": 1.1415504629758733, + "grad_norm": 1.922706047707834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235380 + }, + { + "epoch": 1.1415989611687094, + "grad_norm": 2.5061555675165437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235390 + }, + { + "epoch": 1.1416474593615455, + "grad_norm": 2.048353735517594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235400 + }, + { + "epoch": 1.1416959575543817, + "grad_norm": 1.9232429337989743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235410 + }, + { + "epoch": 1.1417444557472178, + "grad_norm": 1.7671277419140097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235420 + }, + { + "epoch": 1.1417929539400538, + "grad_norm": 2.1711619524467096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235430 + }, + { + "epoch": 1.14184145213289, + "grad_norm": 2.102739529163955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235440 + }, + { + "epoch": 1.141889950325726, + "grad_norm": 1.8082486974435596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235450 + }, + { + "epoch": 1.141938448518562, + "grad_norm": 1.779704774662605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235460 + }, + { + "epoch": 1.141986946711398, + "grad_norm": 1.763648782571181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235470 + }, + { + "epoch": 1.1420354449042343, + "grad_norm": 1.8021685832536605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235480 + }, + { + "epoch": 1.1420839430970704, + "grad_norm": 2.2906931462784996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235490 + }, + { + "epoch": 1.1421324412899065, + "grad_norm": 1.761980144010522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235500 + }, + { + "epoch": 1.1421809394827425, + "grad_norm": 1.8623015307639434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235510 + }, + { + "epoch": 1.1422294376755786, + "grad_norm": 1.755234677602857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235520 + }, + { + "epoch": 1.1422779358684148, + "grad_norm": 2.0321127180977783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235530 + }, + { + "epoch": 1.1423264340612507, + "grad_norm": 2.125908480365979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235540 + }, + { + "epoch": 1.1423749322540868, + "grad_norm": 1.8191903450315294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235550 + }, + { + "epoch": 1.142423430446923, + "grad_norm": 1.8406788626634807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235560 + }, + { + "epoch": 1.142471928639759, + "grad_norm": 1.780676228690936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235570 + }, + { + "epoch": 1.1425204268325952, + "grad_norm": 1.7075898028906522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235580 + }, + { + "epoch": 1.1425689250254312, + "grad_norm": 1.887203353589939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235590 + }, + { + "epoch": 1.1426174232182673, + "grad_norm": 1.6826925275381655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235600 + }, + { + "epoch": 1.1426659214111035, + "grad_norm": 1.706385575062086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235610 + }, + { + "epoch": 1.1427144196039396, + "grad_norm": 1.6896802890187246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235620 + }, + { + "epoch": 1.1427629177967755, + "grad_norm": 1.7914261718487978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235630 + }, + { + "epoch": 1.1428114159896117, + "grad_norm": 2.2263172638758988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235640 + }, + { + "epoch": 1.1428599141824478, + "grad_norm": 1.88806154710619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235650 + }, + { + "epoch": 1.142908412375284, + "grad_norm": 1.7150308906366263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235660 + }, + { + "epoch": 1.1429569105681199, + "grad_norm": 1.7071869251594762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235670 + }, + { + "epoch": 1.143005408760956, + "grad_norm": 1.790870243212339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235680 + }, + { + "epoch": 1.1430539069537922, + "grad_norm": 1.8593978268199862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235690 + }, + { + "epoch": 1.1431024051466283, + "grad_norm": 1.6332369057181495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235700 + }, + { + "epoch": 1.1431509033394642, + "grad_norm": 1.5881629167324718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235710 + }, + { + "epoch": 1.1431994015323004, + "grad_norm": 1.7791684570056532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235720 + }, + { + "epoch": 1.1432478997251365, + "grad_norm": 1.5345173665082257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235730 + }, + { + "epoch": 1.1432963979179727, + "grad_norm": 1.8389360434412083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235740 + }, + { + "epoch": 1.1433448961108086, + "grad_norm": 1.5922695695280709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235750 + }, + { + "epoch": 1.1433933943036447, + "grad_norm": 1.620831540094514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235760 + }, + { + "epoch": 1.1434418924964809, + "grad_norm": 1.638077833376883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235770 + }, + { + "epoch": 1.143490390689317, + "grad_norm": 1.6728708374103007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235780 + }, + { + "epoch": 1.143538888882153, + "grad_norm": 1.9214763824493275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235790 + }, + { + "epoch": 1.143587387074989, + "grad_norm": 1.6428768390142068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235800 + }, + { + "epoch": 1.1436358852678252, + "grad_norm": 1.68830283087118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235810 + }, + { + "epoch": 1.1436843834606614, + "grad_norm": 1.663750310854084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235820 + }, + { + "epoch": 1.1437328816534973, + "grad_norm": 1.743479316473895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235830 + }, + { + "epoch": 1.1437813798463334, + "grad_norm": 1.6918393441756052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235840 + }, + { + "epoch": 1.1438298780391696, + "grad_norm": 1.5468583569600014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235850 + }, + { + "epoch": 1.1438783762320057, + "grad_norm": 1.5503208317113604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235860 + }, + { + "epoch": 1.1439268744248419, + "grad_norm": 1.6407797431838844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235870 + }, + { + "epoch": 1.1439753726176778, + "grad_norm": 1.519141932249113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235880 + }, + { + "epoch": 1.144023870810514, + "grad_norm": 1.7077627489925362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235890 + }, + { + "epoch": 1.14407236900335, + "grad_norm": 1.544348151583108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235900 + }, + { + "epoch": 1.144120867196186, + "grad_norm": 1.6555038939713995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235910 + }, + { + "epoch": 1.1441693653890221, + "grad_norm": 1.5571424683002988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235920 + }, + { + "epoch": 1.1442178635818583, + "grad_norm": 1.508934843741372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235930 + }, + { + "epoch": 1.1442663617746944, + "grad_norm": 1.5844349832150328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235940 + }, + { + "epoch": 1.1443148599675306, + "grad_norm": 1.5030126121473586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235950 + }, + { + "epoch": 1.1443633581603665, + "grad_norm": 1.486233287550931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235960 + }, + { + "epoch": 1.1444118563532026, + "grad_norm": 1.5606468650730676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235970 + }, + { + "epoch": 1.1444603545460388, + "grad_norm": 1.5166119737841655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235980 + }, + { + "epoch": 1.1445088527388747, + "grad_norm": 1.728290897062834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 235990 + }, + { + "epoch": 1.1445573509317108, + "grad_norm": 1.423991449200912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236000 + }, + { + "epoch": 1.144605849124547, + "grad_norm": 1.5831682276257197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236010 + }, + { + "epoch": 1.1446543473173831, + "grad_norm": 1.5040049561321212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236020 + }, + { + "epoch": 1.1447028455102193, + "grad_norm": 1.450087410148626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236030 + }, + { + "epoch": 1.1447513437030552, + "grad_norm": 1.5707929890140804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236040 + }, + { + "epoch": 1.1447998418958913, + "grad_norm": 1.3775050433650904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236050 + }, + { + "epoch": 1.1448483400887275, + "grad_norm": 1.3986382896291616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236060 + }, + { + "epoch": 1.1448968382815634, + "grad_norm": 1.5161248256845283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236070 + }, + { + "epoch": 1.1449453364743996, + "grad_norm": 1.5649956708330137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236080 + }, + { + "epoch": 1.1449938346672357, + "grad_norm": 1.727775753579408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236090 + }, + { + "epoch": 1.1450423328600718, + "grad_norm": 1.709138643946062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236100 + }, + { + "epoch": 1.145090831052908, + "grad_norm": 1.617078311255682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236110 + }, + { + "epoch": 1.145139329245744, + "grad_norm": 1.3930132070072432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236120 + }, + { + "epoch": 1.14518782743858, + "grad_norm": 1.3835875734002911e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236130 + }, + { + "epoch": 1.1452363256314162, + "grad_norm": 1.6606320230039273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236140 + }, + { + "epoch": 1.1452848238242523, + "grad_norm": 1.6143948755598103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236150 + }, + { + "epoch": 1.1453333220170883, + "grad_norm": 1.4769398148928303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236160 + }, + { + "epoch": 1.1453818202099244, + "grad_norm": 4.6118580598886183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236170 + }, + { + "epoch": 1.1454303184027606, + "grad_norm": 1.3966585754587868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236180 + }, + { + "epoch": 1.1454788165955967, + "grad_norm": 1.7118070161359356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236190 + }, + { + "epoch": 1.1455273147884326, + "grad_norm": 1.364925026337005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236200 + }, + { + "epoch": 1.1455758129812688, + "grad_norm": 1.3932319120613101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236210 + }, + { + "epoch": 1.145624311174105, + "grad_norm": 1.375852036744618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236220 + }, + { + "epoch": 1.145672809366941, + "grad_norm": 1.3946385024610208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236230 + }, + { + "epoch": 1.145721307559777, + "grad_norm": 1.6598363572484232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236240 + }, + { + "epoch": 1.1457698057526131, + "grad_norm": 1.4484378141332854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236250 + }, + { + "epoch": 1.1458183039454493, + "grad_norm": 1.3253982444894064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236260 + }, + { + "epoch": 1.1458668021382854, + "grad_norm": 1.3542286581014196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236270 + }, + { + "epoch": 1.1459153003311213, + "grad_norm": 1.3995558845181222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236280 + }, + { + "epoch": 1.1459637985239575, + "grad_norm": 1.420432624854584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236290 + }, + { + "epoch": 1.1460122967167936, + "grad_norm": 1.4545609872129717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236300 + }, + { + "epoch": 1.1460607949096298, + "grad_norm": 1.3370227236464416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236310 + }, + { + "epoch": 1.1461092931024657, + "grad_norm": 1.3821480138176412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236320 + }, + { + "epoch": 1.1461577912953018, + "grad_norm": 1.296856453336659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236330 + }, + { + "epoch": 1.146206289488138, + "grad_norm": 1.4773603140838532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236340 + }, + { + "epoch": 1.146254787680974, + "grad_norm": 1.32658414031539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236350 + }, + { + "epoch": 1.14630328587381, + "grad_norm": 1.5714437040514895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236360 + }, + { + "epoch": 1.1463517840666462, + "grad_norm": 1.3193070458328293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236370 + }, + { + "epoch": 1.1464002822594823, + "grad_norm": 1.2632574453164125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236380 + }, + { + "epoch": 1.1464487804523185, + "grad_norm": 1.4626495214997703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236390 + }, + { + "epoch": 1.1464972786451546, + "grad_norm": 1.3946596766345465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236400 + }, + { + "epoch": 1.1465457768379905, + "grad_norm": 2.0141546031027246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236410 + }, + { + "epoch": 1.1465942750308267, + "grad_norm": 1.317791316068906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236420 + }, + { + "epoch": 1.1466427732236628, + "grad_norm": 1.3406395282800077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236430 + }, + { + "epoch": 1.1466912714164987, + "grad_norm": 1.5385978713311488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236440 + }, + { + "epoch": 1.1467397696093349, + "grad_norm": 1.3015245770020556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236450 + }, + { + "epoch": 1.146788267802171, + "grad_norm": 1.3410583221684647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236460 + }, + { + "epoch": 1.1468367659950072, + "grad_norm": 1.2522147585514176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236470 + }, + { + "epoch": 1.1468852641878433, + "grad_norm": 1.322046614404826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236480 + }, + { + "epoch": 1.1469337623806792, + "grad_norm": 1.4760227884380583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236490 + }, + { + "epoch": 1.1469822605735154, + "grad_norm": 1.3078948768452392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236500 + }, + { + "epoch": 1.1470307587663515, + "grad_norm": 1.3063190351658704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236510 + }, + { + "epoch": 1.1470792569591874, + "grad_norm": 1.262325923789831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236520 + }, + { + "epoch": 1.1471277551520236, + "grad_norm": 1.316225564096385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236530 + }, + { + "epoch": 1.1471762533448597, + "grad_norm": 1.3578699054050958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236540 + }, + { + "epoch": 1.1472247515376959, + "grad_norm": 1.262993265527257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236550 + }, + { + "epoch": 1.147273249730532, + "grad_norm": 1.3702671708415437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236560 + }, + { + "epoch": 1.147321747923368, + "grad_norm": 1.2441189767287142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236570 + }, + { + "epoch": 1.147370246116204, + "grad_norm": 1.1927721743631992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236580 + }, + { + "epoch": 1.1474187443090402, + "grad_norm": 1.4390910507700028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236590 + }, + { + "epoch": 1.1474672425018762, + "grad_norm": 1.4002164050452848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236600 + }, + { + "epoch": 1.1475157406947123, + "grad_norm": 1.291726476893018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236610 + }, + { + "epoch": 1.1475642388875484, + "grad_norm": 1.219699896637394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236620 + }, + { + "epoch": 1.1476127370803846, + "grad_norm": 1.252879826552089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236630 + }, + { + "epoch": 1.1476612352732207, + "grad_norm": 1.4018294791640074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236640 + }, + { + "epoch": 1.1477097334660566, + "grad_norm": 1.3260957132388285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236650 + }, + { + "epoch": 1.1477582316588928, + "grad_norm": 1.2343453192897869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236660 + }, + { + "epoch": 1.147806729851729, + "grad_norm": 1.2196571219647012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236670 + }, + { + "epoch": 1.147855228044565, + "grad_norm": 1.359794907784817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236680 + }, + { + "epoch": 1.147903726237401, + "grad_norm": 1.3706765855658887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236690 + }, + { + "epoch": 1.1479522244302371, + "grad_norm": 1.3153766076356987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236700 + }, + { + "epoch": 1.1480007226230733, + "grad_norm": 1.237545319554556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236710 + }, + { + "epoch": 1.1480492208159094, + "grad_norm": 1.2307495467211993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236720 + }, + { + "epoch": 1.1480977190087454, + "grad_norm": 1.234041633324523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236730 + }, + { + "epoch": 1.1481462172015815, + "grad_norm": 1.3773043860965117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236740 + }, + { + "epoch": 1.1481947153944176, + "grad_norm": 1.2132122151342628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236750 + }, + { + "epoch": 1.1482432135872538, + "grad_norm": 1.2549602956823946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236760 + }, + { + "epoch": 1.1482917117800897, + "grad_norm": 1.2548632355446898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236770 + }, + { + "epoch": 1.1483402099729259, + "grad_norm": 1.2326417220265284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236780 + }, + { + "epoch": 1.148388708165762, + "grad_norm": 1.2477995880999515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236790 + }, + { + "epoch": 1.1484372063585981, + "grad_norm": 1.3412612531737977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236800 + }, + { + "epoch": 1.148485704551434, + "grad_norm": 1.1703865254730772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236810 + }, + { + "epoch": 1.1485342027442702, + "grad_norm": 1.1695428980829092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236820 + }, + { + "epoch": 1.1485827009371063, + "grad_norm": 1.3395177234087896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236830 + }, + { + "epoch": 1.1486311991299425, + "grad_norm": 1.2927245052196668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236840 + }, + { + "epoch": 1.1486796973227784, + "grad_norm": 1.2535903692878492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236850 + }, + { + "epoch": 1.1487281955156146, + "grad_norm": 1.203830635176928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236860 + }, + { + "epoch": 1.1487766937084507, + "grad_norm": 1.2278395900011674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236870 + }, + { + "epoch": 1.1488251919012868, + "grad_norm": 1.14483910351737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236880 + }, + { + "epoch": 1.1488736900941228, + "grad_norm": 1.33604331153947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236890 + }, + { + "epoch": 1.148922188286959, + "grad_norm": 1.5730550728676462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236900 + }, + { + "epoch": 1.148970686479795, + "grad_norm": 1.2851464248342381e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236910 + }, + { + "epoch": 1.1490191846726312, + "grad_norm": 1.194721903630125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236920 + }, + { + "epoch": 1.1490676828654673, + "grad_norm": 1.1679806277697935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236930 + }, + { + "epoch": 1.1491161810583033, + "grad_norm": 1.249244405698846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236940 + }, + { + "epoch": 1.1491646792511394, + "grad_norm": 1.222815626533702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236950 + }, + { + "epoch": 1.1492131774439756, + "grad_norm": 1.1849027004018353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236960 + }, + { + "epoch": 1.1492616756368115, + "grad_norm": 1.2688799699844822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236970 + }, + { + "epoch": 1.1493101738296476, + "grad_norm": 1.1248974374211684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236980 + }, + { + "epoch": 1.1493586720224838, + "grad_norm": 1.2603354093698727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 236990 + }, + { + "epoch": 1.14940717021532, + "grad_norm": 1.1632716478970906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237000 + }, + { + "epoch": 1.149455668408156, + "grad_norm": 1.1339826500034178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237010 + }, + { + "epoch": 1.149504166600992, + "grad_norm": 1.1875932415250645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237020 + }, + { + "epoch": 1.1495526647938281, + "grad_norm": 1.0960555840711095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237030 + }, + { + "epoch": 1.1496011629866643, + "grad_norm": 1.2143027561251074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237040 + }, + { + "epoch": 1.1496496611795002, + "grad_norm": 1.2030767493342864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237050 + }, + { + "epoch": 1.1496981593723363, + "grad_norm": 1.2217252276514046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237060 + }, + { + "epoch": 1.1497466575651725, + "grad_norm": 1.1411285782969571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237070 + }, + { + "epoch": 1.1497951557580086, + "grad_norm": 1.1383234976847234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237080 + }, + { + "epoch": 1.1498436539508448, + "grad_norm": 1.2108107227959408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237090 + }, + { + "epoch": 1.1498921521436807, + "grad_norm": 1.1376260999895749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237100 + }, + { + "epoch": 1.1499406503365168, + "grad_norm": 1.1830929480538543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237110 + }, + { + "epoch": 1.149989148529353, + "grad_norm": 1.2072845834154577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237120 + }, + { + "epoch": 1.150037646722189, + "grad_norm": 1.161435179142245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237130 + }, + { + "epoch": 1.150086144915025, + "grad_norm": 1.1852046100102598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237140 + }, + { + "epoch": 1.1501346431078612, + "grad_norm": 1.2091642531686375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237150 + }, + { + "epoch": 1.1501831413006973, + "grad_norm": 1.231935442547183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237160 + }, + { + "epoch": 1.1502316394935335, + "grad_norm": 1.1272639000026174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237170 + }, + { + "epoch": 1.1502801376863694, + "grad_norm": 1.1249244380451273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237180 + }, + { + "epoch": 1.1503286358792055, + "grad_norm": 1.2074376343207405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237190 + }, + { + "epoch": 1.1503771340720417, + "grad_norm": 1.1425153445543401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237200 + }, + { + "epoch": 1.1504256322648778, + "grad_norm": 1.1206480365899552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237210 + }, + { + "epoch": 1.1504741304577137, + "grad_norm": 1.1456714332780393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237220 + }, + { + "epoch": 1.1505226286505499, + "grad_norm": 1.0550164120104455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237230 + }, + { + "epoch": 1.150571126843386, + "grad_norm": 1.1449581194256098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237240 + }, + { + "epoch": 1.1506196250362222, + "grad_norm": 1.542486955941058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237250 + }, + { + "epoch": 1.150668123229058, + "grad_norm": 1.0988028265046523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237260 + }, + { + "epoch": 1.1507166214218942, + "grad_norm": 1.1158959978274652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237270 + }, + { + "epoch": 1.1507651196147304, + "grad_norm": 1.0795456262258085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237280 + }, + { + "epoch": 1.1508136178075665, + "grad_norm": 1.1983479453192558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237290 + }, + { + "epoch": 1.1508621160004024, + "grad_norm": 1.1174259384461038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237300 + }, + { + "epoch": 1.1509106141932386, + "grad_norm": 1.1227501772737014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237310 + }, + { + "epoch": 1.1509591123860747, + "grad_norm": 1.169928722788427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237320 + }, + { + "epoch": 1.1510076105789109, + "grad_norm": 1.069481072590861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237330 + }, + { + "epoch": 1.1510561087717468, + "grad_norm": 1.324187337559124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237340 + }, + { + "epoch": 1.151104606964583, + "grad_norm": 1.1067368177464232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237350 + }, + { + "epoch": 1.151153105157419, + "grad_norm": 1.1405333566472109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237360 + }, + { + "epoch": 1.1512016033502552, + "grad_norm": 1.1612461037202593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237370 + }, + { + "epoch": 1.1512501015430914, + "grad_norm": 1.0282710150022467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237380 + }, + { + "epoch": 1.1512985997359273, + "grad_norm": 1.1219827200648069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237390 + }, + { + "epoch": 1.1513470979287634, + "grad_norm": 1.1435459867925601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237400 + }, + { + "epoch": 1.1513955961215996, + "grad_norm": 1.127625282038025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237410 + }, + { + "epoch": 1.1514440943144355, + "grad_norm": 1.0576736997336411e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237420 + }, + { + "epoch": 1.1514925925072717, + "grad_norm": 1.0483735479738243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237430 + }, + { + "epoch": 1.1515410907001078, + "grad_norm": 1.1585949266645912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237440 + }, + { + "epoch": 1.151589588892944, + "grad_norm": 1.0823855234320945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237450 + }, + { + "epoch": 1.15163808708578, + "grad_norm": 1.0869208466601776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237460 + }, + { + "epoch": 1.151686585278616, + "grad_norm": 1.0900842539740552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237470 + }, + { + "epoch": 1.1517350834714521, + "grad_norm": 1.1158783763676183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237480 + }, + { + "epoch": 1.1517835816642883, + "grad_norm": 1.1121895937549198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237490 + }, + { + "epoch": 1.1518320798571242, + "grad_norm": 1.103690649983946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237500 + }, + { + "epoch": 1.1518805780499604, + "grad_norm": 1.0546381901122004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237510 + }, + { + "epoch": 1.1519290762427965, + "grad_norm": 1.0840573594350644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237520 + }, + { + "epoch": 1.1519775744356326, + "grad_norm": 1.068243449253714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237530 + }, + { + "epoch": 1.1520260726284688, + "grad_norm": 1.0846472520142925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237540 + }, + { + "epoch": 1.1520745708213047, + "grad_norm": 1.1059456284101543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237550 + }, + { + "epoch": 1.1521230690141409, + "grad_norm": 1.1029725754951869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237560 + }, + { + "epoch": 1.152171567206977, + "grad_norm": 1.3011646160521195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237570 + }, + { + "epoch": 1.152220065399813, + "grad_norm": 9.938613487747716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237580 + }, + { + "epoch": 1.152268563592649, + "grad_norm": 1.0889951340686821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237590 + }, + { + "epoch": 1.1523170617854852, + "grad_norm": 1.0705952036005328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237600 + }, + { + "epoch": 1.1523655599783214, + "grad_norm": 1.0664827243545005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237610 + }, + { + "epoch": 1.1524140581711575, + "grad_norm": 1.0683399409572303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237620 + }, + { + "epoch": 1.1524625563639934, + "grad_norm": 1.0398351690810159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237630 + }, + { + "epoch": 1.1525110545568296, + "grad_norm": 1.0941176498135974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237640 + }, + { + "epoch": 1.1525595527496657, + "grad_norm": 1.8912749055743916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237650 + }, + { + "epoch": 1.1526080509425018, + "grad_norm": 1.1042405390071508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237660 + }, + { + "epoch": 1.1526565491353378, + "grad_norm": 1.0622365209655982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237670 + }, + { + "epoch": 1.152705047328174, + "grad_norm": 1.1029590041289339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237680 + }, + { + "epoch": 1.15275354552101, + "grad_norm": 1.0665097249784594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237690 + }, + { + "epoch": 1.1528020437138462, + "grad_norm": 1.125520086020515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237700 + }, + { + "epoch": 1.1528505419066821, + "grad_norm": 1.0445861420294023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237710 + }, + { + "epoch": 1.1528990400995183, + "grad_norm": 1.0942626005316924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237720 + }, + { + "epoch": 1.1529475382923544, + "grad_norm": 1.123655195556239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237730 + }, + { + "epoch": 1.1529960364851906, + "grad_norm": 1.0432945174443375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237740 + }, + { + "epoch": 1.1530445346780265, + "grad_norm": 1.0964716778971706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237750 + }, + { + "epoch": 1.1530930328708626, + "grad_norm": 1.0925840143727328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237760 + }, + { + "epoch": 1.1531415310636988, + "grad_norm": 1.0372104242151181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237770 + }, + { + "epoch": 1.153190029256535, + "grad_norm": 9.89928068406698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237780 + }, + { + "epoch": 1.1532385274493708, + "grad_norm": 1.0262203176125695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237790 + }, + { + "epoch": 1.153287025642207, + "grad_norm": 1.0664721372677377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237800 + }, + { + "epoch": 1.1533355238350431, + "grad_norm": 1.0475969247636385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237810 + }, + { + "epoch": 1.1533840220278793, + "grad_norm": 1.0258538196694644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237820 + }, + { + "epoch": 1.1534325202207152, + "grad_norm": 1.0563196184421031e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237830 + }, + { + "epoch": 1.1534810184135513, + "grad_norm": 1.0771606184789562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237840 + }, + { + "epoch": 1.1535295166063875, + "grad_norm": 1.001279414936107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237850 + }, + { + "epoch": 1.1535780147992236, + "grad_norm": 1.0377420522900138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237860 + }, + { + "epoch": 1.1536265129920595, + "grad_norm": 1.0305496545015558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237870 + }, + { + "epoch": 1.1536750111848957, + "grad_norm": 9.952236723620445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237880 + }, + { + "epoch": 1.1537235093777318, + "grad_norm": 1.0349199897063954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237890 + }, + { + "epoch": 1.153772007570568, + "grad_norm": 1.0898809676973542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237900 + }, + { + "epoch": 1.1538205057634041, + "grad_norm": 1.008927483781008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237910 + }, + { + "epoch": 1.15386900395624, + "grad_norm": 1.0591336518928074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237920 + }, + { + "epoch": 1.1539175021490762, + "grad_norm": 1.0491257285139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237930 + }, + { + "epoch": 1.1539660003419123, + "grad_norm": 1.0616518864026148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237940 + }, + { + "epoch": 1.1540144985347482, + "grad_norm": 1.0179377341046347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237950 + }, + { + "epoch": 1.1540629967275844, + "grad_norm": 1.0471274691781218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237960 + }, + { + "epoch": 1.1541114949204205, + "grad_norm": 2.7131250135425944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237970 + }, + { + "epoch": 1.1541599931132567, + "grad_norm": 9.568256587044743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237980 + }, + { + "epoch": 1.1542084913060928, + "grad_norm": 9.80100907099768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 237990 + }, + { + "epoch": 1.1542569894989287, + "grad_norm": 1.0142270667756748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238000 + }, + { + "epoch": 1.1543054876917649, + "grad_norm": 1.0035464015345497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238010 + }, + { + "epoch": 1.154353985884601, + "grad_norm": 1.0326294130891256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238020 + }, + { + "epoch": 1.154402484077437, + "grad_norm": 9.670064571309922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238030 + }, + { + "epoch": 1.154450982270273, + "grad_norm": 9.803011380427051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238040 + }, + { + "epoch": 1.1544994804631092, + "grad_norm": 1.0009111406361626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238050 + }, + { + "epoch": 1.1545479786559454, + "grad_norm": 9.912356802033173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238060 + }, + { + "epoch": 1.1545964768487815, + "grad_norm": 1.016818060861624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238070 + }, + { + "epoch": 1.1546449750416174, + "grad_norm": 1.0092448832210721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238080 + }, + { + "epoch": 1.1546934732344536, + "grad_norm": 9.898393216190016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238090 + }, + { + "epoch": 1.1547419714272897, + "grad_norm": 9.827984115418076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238100 + }, + { + "epoch": 1.1547904696201257, + "grad_norm": 1.0136075445643655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238110 + }, + { + "epoch": 1.1548389678129618, + "grad_norm": 1.0197717159599051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238120 + }, + { + "epoch": 1.154887466005798, + "grad_norm": 9.182053872791585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238130 + }, + { + "epoch": 1.154935964198634, + "grad_norm": 9.638178255499952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238140 + }, + { + "epoch": 1.1549844623914702, + "grad_norm": 1.0125397409410652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238150 + }, + { + "epoch": 1.1550329605843062, + "grad_norm": 9.610570117501993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238160 + }, + { + "epoch": 1.1550814587771423, + "grad_norm": 1.0156175989095573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238170 + }, + { + "epoch": 1.1551299569699784, + "grad_norm": 9.698342040564967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238180 + }, + { + "epoch": 1.1551784551628146, + "grad_norm": 1.004664298420721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238190 + }, + { + "epoch": 1.1552269533556505, + "grad_norm": 1.0016365337151001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238200 + }, + { + "epoch": 1.1552754515484867, + "grad_norm": 9.918943533193669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238210 + }, + { + "epoch": 1.1553239497413228, + "grad_norm": 9.400765321743165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238220 + }, + { + "epoch": 1.155372447934159, + "grad_norm": 9.55051504547555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238230 + }, + { + "epoch": 1.1554209461269949, + "grad_norm": 9.631020247979905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238240 + }, + { + "epoch": 1.155469444319831, + "grad_norm": 9.802097622468864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238250 + }, + { + "epoch": 1.1555179425126672, + "grad_norm": 9.60356558721287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238260 + }, + { + "epoch": 1.1555664407055033, + "grad_norm": 9.419203195193404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238270 + }, + { + "epoch": 1.1556149388983392, + "grad_norm": 9.682316459702633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238280 + }, + { + "epoch": 1.1556634370911754, + "grad_norm": 9.735916250974697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238290 + }, + { + "epoch": 1.1557119352840115, + "grad_norm": 9.406081602492122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238300 + }, + { + "epoch": 1.1557604334768476, + "grad_norm": 9.994483463060533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238310 + }, + { + "epoch": 1.1558089316696836, + "grad_norm": 9.621990670893865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238320 + }, + { + "epoch": 1.1558574298625197, + "grad_norm": 9.437052739258434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238330 + }, + { + "epoch": 1.1559059280553559, + "grad_norm": 1.0071789802168496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238340 + }, + { + "epoch": 1.155954426248192, + "grad_norm": 9.315797200315501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238350 + }, + { + "epoch": 1.156002924441028, + "grad_norm": 1.1796820587051116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238360 + }, + { + "epoch": 1.156051422633864, + "grad_norm": 1.0228097835351946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238370 + }, + { + "epoch": 1.1560999208267002, + "grad_norm": 8.876493495790783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238380 + }, + { + "epoch": 1.1561484190195364, + "grad_norm": 9.769959063987699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238390 + }, + { + "epoch": 1.1561969172123723, + "grad_norm": 9.612213602849806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238400 + }, + { + "epoch": 1.1562454154052084, + "grad_norm": 1.1044787129321776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238410 + }, + { + "epoch": 1.1562939135980446, + "grad_norm": 9.169026071731423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238420 + }, + { + "epoch": 1.1563424117908807, + "grad_norm": 8.620623503929892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238430 + }, + { + "epoch": 1.1563909099837169, + "grad_norm": 9.771320463869415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238440 + }, + { + "epoch": 1.1564394081765528, + "grad_norm": 9.646755927406048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238450 + }, + { + "epoch": 1.156487906369389, + "grad_norm": 9.33271664393942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238460 + }, + { + "epoch": 1.156536404562225, + "grad_norm": 9.189942318243993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238470 + }, + { + "epoch": 1.156584902755061, + "grad_norm": 9.078031126819042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238480 + }, + { + "epoch": 1.1566334009478971, + "grad_norm": 9.527163058464794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238490 + }, + { + "epoch": 1.1566818991407333, + "grad_norm": 9.440739745514293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238500 + }, + { + "epoch": 1.1567303973335694, + "grad_norm": 9.505615139460133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238510 + }, + { + "epoch": 1.1567788955264056, + "grad_norm": 9.117523092072588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238520 + }, + { + "epoch": 1.1568273937192415, + "grad_norm": 8.861143641070157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238530 + }, + { + "epoch": 1.1568758919120776, + "grad_norm": 9.112469001593126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238540 + }, + { + "epoch": 1.1569243901049138, + "grad_norm": 9.876656292817643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238550 + }, + { + "epoch": 1.1569728882977497, + "grad_norm": 8.8367663408917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238560 + }, + { + "epoch": 1.1570213864905858, + "grad_norm": 8.594928146976599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238570 + }, + { + "epoch": 1.157069884683422, + "grad_norm": 8.977039556157251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238580 + }, + { + "epoch": 1.1571183828762581, + "grad_norm": 9.256644517563473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238590 + }, + { + "epoch": 1.1571668810690943, + "grad_norm": 8.856683564317791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238600 + }, + { + "epoch": 1.1572153792619302, + "grad_norm": 8.730657441446965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238610 + }, + { + "epoch": 1.1572638774547663, + "grad_norm": 1.2371963009627507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238620 + }, + { + "epoch": 1.1573123756476025, + "grad_norm": 8.61213393932303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238630 + }, + { + "epoch": 1.1573608738404384, + "grad_norm": 9.010353352323364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238640 + }, + { + "epoch": 1.1574093720332745, + "grad_norm": 8.813373852945006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238650 + }, + { + "epoch": 1.1574578702261107, + "grad_norm": 8.817385577231107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238660 + }, + { + "epoch": 1.1575063684189468, + "grad_norm": 8.878067347950491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238670 + }, + { + "epoch": 1.157554866611783, + "grad_norm": 8.174699672736097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238680 + }, + { + "epoch": 1.157603364804619, + "grad_norm": 9.006454604332248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238690 + }, + { + "epoch": 1.157651862997455, + "grad_norm": 9.165047032411167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238700 + }, + { + "epoch": 1.1577003611902912, + "grad_norm": 8.873964674194212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238710 + }, + { + "epoch": 1.1577488593831273, + "grad_norm": 8.681605123683767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238720 + }, + { + "epoch": 1.1577973575759632, + "grad_norm": 8.18589711570894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238730 + }, + { + "epoch": 1.1578458557687994, + "grad_norm": 8.683793595309908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238740 + }, + { + "epoch": 1.1578943539616355, + "grad_norm": 9.37541742018766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238750 + }, + { + "epoch": 1.1579428521544717, + "grad_norm": 9.181849236483686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238760 + }, + { + "epoch": 1.1579913503473076, + "grad_norm": 8.773750437285344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238770 + }, + { + "epoch": 1.1580398485401437, + "grad_norm": 8.676601481738544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238780 + }, + { + "epoch": 1.15808834673298, + "grad_norm": 8.629839953755436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238790 + }, + { + "epoch": 1.158136844925816, + "grad_norm": 8.65315357145846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238800 + }, + { + "epoch": 1.158185343118652, + "grad_norm": 8.725475453275067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238810 + }, + { + "epoch": 1.158233841311488, + "grad_norm": 8.409836027567508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238820 + }, + { + "epoch": 1.1582823395043242, + "grad_norm": 9.500247699634201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238830 + }, + { + "epoch": 1.1583308376971604, + "grad_norm": 8.905612247644967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238840 + }, + { + "epoch": 1.1583793358899963, + "grad_norm": 8.69825242943989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238850 + }, + { + "epoch": 1.1584278340828325, + "grad_norm": 8.186170674662208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238860 + }, + { + "epoch": 1.1584763322756686, + "grad_norm": 8.543036500441303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238870 + }, + { + "epoch": 1.1585248304685047, + "grad_norm": 8.373454818411119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238880 + }, + { + "epoch": 1.1585733286613407, + "grad_norm": 8.521813299466885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238890 + }, + { + "epoch": 1.1586218268541768, + "grad_norm": 8.465644185662313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238900 + }, + { + "epoch": 1.158670325047013, + "grad_norm": 8.200294132620911e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238910 + }, + { + "epoch": 1.158718823239849, + "grad_norm": 8.73776713206098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238920 + }, + { + "epoch": 1.158767321432685, + "grad_norm": 8.190201583602175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238930 + }, + { + "epoch": 1.1588158196255212, + "grad_norm": 8.286860975204036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238940 + }, + { + "epoch": 1.1588643178183573, + "grad_norm": 8.071825874367278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238950 + }, + { + "epoch": 1.1589128160111934, + "grad_norm": 8.493002212617284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238960 + }, + { + "epoch": 1.1589613142040296, + "grad_norm": 8.25863395448323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238970 + }, + { + "epoch": 1.1590098123968655, + "grad_norm": 8.370024318082869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238980 + }, + { + "epoch": 1.1590583105897017, + "grad_norm": 1.0308329478903033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 238990 + }, + { + "epoch": 1.1591068087825378, + "grad_norm": 8.204089141372606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239000 + }, + { + "epoch": 1.1591553069753737, + "grad_norm": 8.275687690684208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239010 + }, + { + "epoch": 1.1592038051682099, + "grad_norm": 8.080229463303112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239020 + }, + { + "epoch": 1.159252303361046, + "grad_norm": 7.772825938445749e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239030 + }, + { + "epoch": 1.1593008015538822, + "grad_norm": 8.15976903822957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239040 + }, + { + "epoch": 1.1593492997467183, + "grad_norm": 8.226436420954997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239050 + }, + { + "epoch": 1.1593977979395542, + "grad_norm": 8.026706410646511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239060 + }, + { + "epoch": 1.1594462961323904, + "grad_norm": 8.10082596558459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239070 + }, + { + "epoch": 1.1594947943252265, + "grad_norm": 7.79593563038361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239080 + }, + { + "epoch": 1.1595432925180624, + "grad_norm": 8.340329316069983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239090 + }, + { + "epoch": 1.1595917907108986, + "grad_norm": 8.336584045309792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239100 + }, + { + "epoch": 1.1596402889037347, + "grad_norm": 8.232792936269107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239110 + }, + { + "epoch": 1.1596887870965709, + "grad_norm": 8.011495111759359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239120 + }, + { + "epoch": 1.159737285289407, + "grad_norm": 7.355378528473011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239130 + }, + { + "epoch": 1.159785783482243, + "grad_norm": 7.90518583926314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239140 + }, + { + "epoch": 1.159834281675079, + "grad_norm": 8.014592367544537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239150 + }, + { + "epoch": 1.1598827798679152, + "grad_norm": 8.066765389003194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239160 + }, + { + "epoch": 1.1599312780607511, + "grad_norm": 1.0293694430174583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239170 + }, + { + "epoch": 1.1599797762535873, + "grad_norm": 7.448895900097341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239180 + }, + { + "epoch": 1.1600282744464234, + "grad_norm": 7.678639946107069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239190 + }, + { + "epoch": 1.1600767726392596, + "grad_norm": 8.520234473508026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239200 + }, + { + "epoch": 1.1601252708320957, + "grad_norm": 1.24919580457572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239210 + }, + { + "epoch": 1.1601737690249316, + "grad_norm": 8.030009013282324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239220 + }, + { + "epoch": 1.1602222672177678, + "grad_norm": 7.605839869029296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239230 + }, + { + "epoch": 1.160270765410604, + "grad_norm": 7.965777371055083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239240 + }, + { + "epoch": 1.16031926360344, + "grad_norm": 7.666809409556663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239250 + }, + { + "epoch": 1.160367761796276, + "grad_norm": 7.556720760248936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239260 + }, + { + "epoch": 1.1604162599891121, + "grad_norm": 7.529782664050799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239270 + }, + { + "epoch": 1.1604647581819483, + "grad_norm": 7.254108425058803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239280 + }, + { + "epoch": 1.1605132563747844, + "grad_norm": 7.855592087935293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239290 + }, + { + "epoch": 1.1605617545676203, + "grad_norm": 7.575407323656691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239300 + }, + { + "epoch": 1.1606102527604565, + "grad_norm": 8.17710557043938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239310 + }, + { + "epoch": 1.1606587509532926, + "grad_norm": 7.74467139308399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239320 + }, + { + "epoch": 1.1607072491461288, + "grad_norm": 7.904620957788211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239330 + }, + { + "epoch": 1.1607557473389647, + "grad_norm": 7.760964848557705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239340 + }, + { + "epoch": 1.1608042455318008, + "grad_norm": 7.594135809085856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239350 + }, + { + "epoch": 1.160852743724637, + "grad_norm": 8.02250355036449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239360 + }, + { + "epoch": 1.1609012419174731, + "grad_norm": 7.828866444015148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239370 + }, + { + "epoch": 1.160949740110309, + "grad_norm": 7.075955466007144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239380 + }, + { + "epoch": 1.1609982383031452, + "grad_norm": 7.61178853281308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239390 + }, + { + "epoch": 1.1610467364959813, + "grad_norm": 7.383989952813863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239400 + }, + { + "epoch": 1.1610952346888175, + "grad_norm": 7.514591970902984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239410 + }, + { + "epoch": 1.1611437328816534, + "grad_norm": 7.776760213573652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239420 + }, + { + "epoch": 1.1611922310744895, + "grad_norm": 6.977661826113035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239430 + }, + { + "epoch": 1.1612407292673257, + "grad_norm": 7.669457602332841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239440 + }, + { + "epoch": 1.1612892274601618, + "grad_norm": 7.462822537718239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239450 + }, + { + "epoch": 1.1613377256529978, + "grad_norm": 7.354056208441762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239460 + }, + { + "epoch": 1.161386223845834, + "grad_norm": 7.272475954778201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239470 + }, + { + "epoch": 1.16143472203867, + "grad_norm": 6.951593434223469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239480 + }, + { + "epoch": 1.1614832202315062, + "grad_norm": 7.416527836312525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239490 + }, + { + "epoch": 1.1615317184243423, + "grad_norm": 7.625895648288861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239500 + }, + { + "epoch": 1.1615802166171783, + "grad_norm": 7.341120067394513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239510 + }, + { + "epoch": 1.1616287148100144, + "grad_norm": 7.563320281178676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239520 + }, + { + "epoch": 1.1616772130028505, + "grad_norm": 7.290559977946032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239530 + }, + { + "epoch": 1.1617257111956865, + "grad_norm": 7.349076724949555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239540 + }, + { + "epoch": 1.1617742093885226, + "grad_norm": 7.189284900732673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239550 + }, + { + "epoch": 1.1618227075813587, + "grad_norm": 7.214074315697871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239560 + }, + { + "epoch": 1.161871205774195, + "grad_norm": 7.132889123795394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239570 + }, + { + "epoch": 1.161919703967031, + "grad_norm": 7.633236265291998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239580 + }, + { + "epoch": 1.161968202159867, + "grad_norm": 7.069554897043417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239590 + }, + { + "epoch": 1.162016700352703, + "grad_norm": 7.420268843816302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239600 + }, + { + "epoch": 1.1620651985455392, + "grad_norm": 7.04002260931702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239610 + }, + { + "epoch": 1.1621136967383752, + "grad_norm": 7.311970051659955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239620 + }, + { + "epoch": 1.1621621949312113, + "grad_norm": 6.896026860658822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239630 + }, + { + "epoch": 1.1622106931240475, + "grad_norm": 7.308184279963825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239640 + }, + { + "epoch": 1.1622591913168836, + "grad_norm": 7.505629184834106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239650 + }, + { + "epoch": 1.1623076895097197, + "grad_norm": 6.773854011044023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239660 + }, + { + "epoch": 1.1623561877025557, + "grad_norm": 6.907501415298611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239670 + }, + { + "epoch": 1.1624046858953918, + "grad_norm": 7.048141270615815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239680 + }, + { + "epoch": 1.162453184088228, + "grad_norm": 7.068279472832728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239690 + }, + { + "epoch": 1.162501682281064, + "grad_norm": 7.107654198534874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239700 + }, + { + "epoch": 1.1625501804739, + "grad_norm": 7.170375937448625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239710 + }, + { + "epoch": 1.1625986786667362, + "grad_norm": 7.203376384268267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239720 + }, + { + "epoch": 1.1626471768595723, + "grad_norm": 6.948179986920877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239730 + }, + { + "epoch": 1.1626956750524085, + "grad_norm": 6.991101741959937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239740 + }, + { + "epoch": 1.1627441732452444, + "grad_norm": 7.069939300663464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239750 + }, + { + "epoch": 1.1627926714380805, + "grad_norm": 7.311170691082225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239760 + }, + { + "epoch": 1.1628411696309167, + "grad_norm": 7.113064270924951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239770 + }, + { + "epoch": 1.1628896678237528, + "grad_norm": 6.756290815701504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239780 + }, + { + "epoch": 1.1629381660165887, + "grad_norm": 6.970074650780589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239790 + }, + { + "epoch": 1.1629866642094249, + "grad_norm": 6.954098807909759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239800 + }, + { + "epoch": 1.163035162402261, + "grad_norm": 6.920412687350108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239810 + }, + { + "epoch": 1.1630836605950972, + "grad_norm": 6.657223394768153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239820 + }, + { + "epoch": 1.163132158787933, + "grad_norm": 6.40053912093208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239830 + }, + { + "epoch": 1.1631806569807692, + "grad_norm": 6.637240801410371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239840 + }, + { + "epoch": 1.1632291551736054, + "grad_norm": 6.918459405369504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239850 + }, + { + "epoch": 1.1632776533664415, + "grad_norm": 6.726627077568992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239860 + }, + { + "epoch": 1.1633261515592774, + "grad_norm": 6.754834913635932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239870 + }, + { + "epoch": 1.1633746497521136, + "grad_norm": 6.407083930071167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239880 + }, + { + "epoch": 1.1634231479449497, + "grad_norm": 6.951956521561442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239890 + }, + { + "epoch": 1.1634716461377859, + "grad_norm": 6.872365077015274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239900 + }, + { + "epoch": 1.1635201443306218, + "grad_norm": 6.893699833199207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239910 + }, + { + "epoch": 1.163568642523458, + "grad_norm": 6.721221268435329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239920 + }, + { + "epoch": 1.163617140716294, + "grad_norm": 6.672267005569665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239930 + }, + { + "epoch": 1.1636656389091302, + "grad_norm": 6.878679670307974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239940 + }, + { + "epoch": 1.1637141371019664, + "grad_norm": 6.564628307614839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239950 + }, + { + "epoch": 1.1637626352948023, + "grad_norm": 6.607317715179306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239960 + }, + { + "epoch": 1.1638111334876384, + "grad_norm": 6.586353151760704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239970 + }, + { + "epoch": 1.1638596316804746, + "grad_norm": 6.538567731695366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239980 + }, + { + "epoch": 1.1639081298733105, + "grad_norm": 6.587139012026455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 239990 + }, + { + "epoch": 1.1639566280661466, + "grad_norm": 6.499013949223809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240000 + }, + { + "epoch": 1.1640051262589828, + "grad_norm": 6.529774765340335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240010 + }, + { + "epoch": 1.164053624451819, + "grad_norm": 6.464254198590424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240020 + }, + { + "epoch": 1.164102122644655, + "grad_norm": 6.290663634445082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240030 + }, + { + "epoch": 1.164150620837491, + "grad_norm": 6.794260798415053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240040 + }, + { + "epoch": 1.1641991190303271, + "grad_norm": 6.661897344883982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240050 + }, + { + "epoch": 1.1642476172231633, + "grad_norm": 6.62219861169433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240060 + }, + { + "epoch": 1.1642961154159992, + "grad_norm": 6.36881196669492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240070 + }, + { + "epoch": 1.1643446136088353, + "grad_norm": 6.515688255603891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240080 + }, + { + "epoch": 1.1643931118016715, + "grad_norm": 6.585907641465383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240090 + }, + { + "epoch": 1.1644416099945076, + "grad_norm": 6.591834988967094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240100 + }, + { + "epoch": 1.1644901081873438, + "grad_norm": 6.293566912063397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240110 + }, + { + "epoch": 1.1645386063801797, + "grad_norm": 6.242414940516028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240120 + }, + { + "epoch": 1.1645871045730158, + "grad_norm": 6.586726186696978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240130 + }, + { + "epoch": 1.164635602765852, + "grad_norm": 6.599430690812369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240140 + }, + { + "epoch": 1.164684100958688, + "grad_norm": 6.291620024967415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240150 + }, + { + "epoch": 1.164732599151524, + "grad_norm": 6.476982861158831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240160 + }, + { + "epoch": 1.1647810973443602, + "grad_norm": 6.343187664015204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240170 + }, + { + "epoch": 1.1648295955371963, + "grad_norm": 6.321076995163821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240180 + }, + { + "epoch": 1.1648780937300325, + "grad_norm": 6.669225172117876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240190 + }, + { + "epoch": 1.1649265919228684, + "grad_norm": 6.322942169845192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240200 + }, + { + "epoch": 1.1649750901157045, + "grad_norm": 6.367666571804875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240210 + }, + { + "epoch": 1.1650235883085407, + "grad_norm": 6.119168460827495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240220 + }, + { + "epoch": 1.1650720865013768, + "grad_norm": 5.958026250141302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240230 + }, + { + "epoch": 1.1651205846942128, + "grad_norm": 6.049019418696844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240240 + }, + { + "epoch": 1.165169082887049, + "grad_norm": 6.210701286590847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240250 + }, + { + "epoch": 1.165217581079885, + "grad_norm": 6.289268839054785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240260 + }, + { + "epoch": 1.1652660792727212, + "grad_norm": 6.013596731690996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240270 + }, + { + "epoch": 1.165314577465557, + "grad_norm": 6.121240403444972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240280 + }, + { + "epoch": 1.1653630756583933, + "grad_norm": 6.143481101616999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240290 + }, + { + "epoch": 1.1654115738512294, + "grad_norm": 6.1158026198882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240300 + }, + { + "epoch": 1.1654600720440655, + "grad_norm": 6.207822167425547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240310 + }, + { + "epoch": 1.1655085702369015, + "grad_norm": 5.94087090632911e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240320 + }, + { + "epoch": 1.1655570684297376, + "grad_norm": 6.047282852250646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240330 + }, + { + "epoch": 1.1656055666225738, + "grad_norm": 6.219747206159809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240340 + }, + { + "epoch": 1.16565406481541, + "grad_norm": 6.027462262636618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240350 + }, + { + "epoch": 1.1657025630082458, + "grad_norm": 6.188934520423572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240360 + }, + { + "epoch": 1.165751061201082, + "grad_norm": 6.188466272760706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240370 + }, + { + "epoch": 1.165799559393918, + "grad_norm": 5.986375839484026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240380 + }, + { + "epoch": 1.1658480575867542, + "grad_norm": 6.088311721441642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240390 + }, + { + "epoch": 1.1658965557795902, + "grad_norm": 5.911163469818348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240400 + }, + { + "epoch": 1.1659450539724263, + "grad_norm": 5.8988639750623406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240410 + }, + { + "epoch": 1.1659935521652625, + "grad_norm": 5.94978466494922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240420 + }, + { + "epoch": 1.1660420503580986, + "grad_norm": 5.6360878630812294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240430 + }, + { + "epoch": 1.1660905485509345, + "grad_norm": 6.142300179590165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240440 + }, + { + "epoch": 1.1661390467437707, + "grad_norm": 5.817777548600134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240450 + }, + { + "epoch": 1.1661875449366068, + "grad_norm": 5.849101114563382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240460 + }, + { + "epoch": 1.166236043129443, + "grad_norm": 6.252516016047593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240470 + }, + { + "epoch": 1.166284541322279, + "grad_norm": 6.132943042302941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240480 + }, + { + "epoch": 1.166333039515115, + "grad_norm": 6.104418304175852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240490 + }, + { + "epoch": 1.1663815377079512, + "grad_norm": 5.7560356481189956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240500 + }, + { + "epoch": 1.1664300359007873, + "grad_norm": 5.7444818679641685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240510 + }, + { + "epoch": 1.1664785340936232, + "grad_norm": 6.087743997795769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240520 + }, + { + "epoch": 1.1665270322864594, + "grad_norm": 5.514926471050785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240530 + }, + { + "epoch": 1.1665755304792955, + "grad_norm": 5.947816106299797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240540 + }, + { + "epoch": 1.1666240286721317, + "grad_norm": 5.7670536790510596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240550 + }, + { + "epoch": 1.1666725268649678, + "grad_norm": 5.847880757414714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240560 + }, + { + "epoch": 1.1667210250578037, + "grad_norm": 6.062131774342561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240570 + }, + { + "epoch": 1.1667695232506399, + "grad_norm": 5.8550206460949994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240580 + }, + { + "epoch": 1.166818021443476, + "grad_norm": 6.190941803652095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240590 + }, + { + "epoch": 1.166866519636312, + "grad_norm": 5.739756403499996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240600 + }, + { + "epoch": 1.166915017829148, + "grad_norm": 5.780043821346226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240610 + }, + { + "epoch": 1.1669635160219842, + "grad_norm": 5.703876126972318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240620 + }, + { + "epoch": 1.1670120142148204, + "grad_norm": 5.595668639557516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240630 + }, + { + "epoch": 1.1670605124076565, + "grad_norm": 5.960194116028106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240640 + }, + { + "epoch": 1.1671090106004924, + "grad_norm": 5.868213648341225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240650 + }, + { + "epoch": 1.1671575087933286, + "grad_norm": 5.709664918640556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240660 + }, + { + "epoch": 1.1672060069861647, + "grad_norm": 5.806809966202309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240670 + }, + { + "epoch": 1.1672545051790006, + "grad_norm": 5.73373775125674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240680 + }, + { + "epoch": 1.1673030033718368, + "grad_norm": 5.762656485330808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240690 + }, + { + "epoch": 1.167351501564673, + "grad_norm": 5.464761798634754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240700 + }, + { + "epoch": 1.167399999757509, + "grad_norm": 5.593556551275469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240710 + }, + { + "epoch": 1.1674484979503452, + "grad_norm": 5.7409188514156995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240720 + }, + { + "epoch": 1.1674969961431811, + "grad_norm": 5.2665463101675414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240730 + }, + { + "epoch": 1.1675454943360173, + "grad_norm": 5.679536130287488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240740 + }, + { + "epoch": 1.1675939925288534, + "grad_norm": 5.738500874485908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240750 + }, + { + "epoch": 1.1676424907216896, + "grad_norm": 5.4768360513435255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240760 + }, + { + "epoch": 1.1676909889145255, + "grad_norm": 5.552145054821267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240770 + }, + { + "epoch": 1.1677394871073616, + "grad_norm": 5.583108730888853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240780 + }, + { + "epoch": 1.1677879853001978, + "grad_norm": 5.5389779873848966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240790 + }, + { + "epoch": 1.167836483493034, + "grad_norm": 5.428046989663926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240800 + }, + { + "epoch": 1.1678849816858698, + "grad_norm": 5.336525887855714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240810 + }, + { + "epoch": 1.167933479878706, + "grad_norm": 5.481670584117637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240820 + }, + { + "epoch": 1.1679819780715421, + "grad_norm": 5.195465746510308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240830 + }, + { + "epoch": 1.1680304762643783, + "grad_norm": 5.6591421326857017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240840 + }, + { + "epoch": 1.1680789744572142, + "grad_norm": 5.600417196660601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240850 + }, + { + "epoch": 1.1681274726500503, + "grad_norm": 5.334311481419718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240860 + }, + { + "epoch": 1.1681759708428865, + "grad_norm": 5.613780373892041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240870 + }, + { + "epoch": 1.1682244690357226, + "grad_norm": 5.555612503371776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240880 + }, + { + "epoch": 1.1682729672285586, + "grad_norm": 5.4371326996260905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240890 + }, + { + "epoch": 1.1683214654213947, + "grad_norm": 5.860821872261113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240900 + }, + { + "epoch": 1.1683699636142308, + "grad_norm": 5.519026302636121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240910 + }, + { + "epoch": 1.168418461807067, + "grad_norm": 5.336457675753081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240920 + }, + { + "epoch": 1.168466959999903, + "grad_norm": 5.967489613567523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240930 + }, + { + "epoch": 1.168515458192739, + "grad_norm": 5.5698052392472164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240940 + }, + { + "epoch": 1.1685639563855752, + "grad_norm": 5.2938599282015275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240950 + }, + { + "epoch": 1.1686124545784113, + "grad_norm": 5.323884622043806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240960 + }, + { + "epoch": 1.1686609527712473, + "grad_norm": 5.334936048484451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240970 + }, + { + "epoch": 1.1687094509640834, + "grad_norm": 5.4307289332200526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240980 + }, + { + "epoch": 1.1687579491569196, + "grad_norm": 5.791207158267753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 240990 + }, + { + "epoch": 1.1688064473497557, + "grad_norm": 5.326772978264671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241000 + }, + { + "epoch": 1.1688549455425918, + "grad_norm": 5.435702732370373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241010 + }, + { + "epoch": 1.1689034437354278, + "grad_norm": 5.4077567313015606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241020 + }, + { + "epoch": 1.168951941928264, + "grad_norm": 8.016516517272976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241030 + }, + { + "epoch": 1.1690004401211, + "grad_norm": 5.901559063659079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241040 + }, + { + "epoch": 1.169048938313936, + "grad_norm": 5.41894813466115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241050 + }, + { + "epoch": 1.1690974365067721, + "grad_norm": 5.2911769188312974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241060 + }, + { + "epoch": 1.1691459346996083, + "grad_norm": 5.276829995182197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241070 + }, + { + "epoch": 1.1691944328924444, + "grad_norm": 5.19685805500103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241080 + }, + { + "epoch": 1.1692429310852805, + "grad_norm": 5.5107484797645157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241090 + }, + { + "epoch": 1.1692914292781165, + "grad_norm": 5.291844473731544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241100 + }, + { + "epoch": 1.1693399274709526, + "grad_norm": 5.258016599896109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241110 + }, + { + "epoch": 1.1693884256637888, + "grad_norm": 5.150840109990895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241120 + }, + { + "epoch": 1.1694369238566247, + "grad_norm": 5.211360587509262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241130 + }, + { + "epoch": 1.1694854220494608, + "grad_norm": 5.344253395378473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241140 + }, + { + "epoch": 1.169533920242297, + "grad_norm": 5.3421732815195355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241150 + }, + { + "epoch": 1.169582418435133, + "grad_norm": 5.201810893140646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241160 + }, + { + "epoch": 1.1696309166279693, + "grad_norm": 5.004925895946144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241170 + }, + { + "epoch": 1.1696794148208052, + "grad_norm": 4.8304844568747285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241180 + }, + { + "epoch": 1.1697279130136413, + "grad_norm": 5.1239602782970906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241190 + }, + { + "epoch": 1.1697764112064775, + "grad_norm": 5.092326915701051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241200 + }, + { + "epoch": 1.1698249093993134, + "grad_norm": 5.1106116671917334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241210 + }, + { + "epoch": 1.1698734075921495, + "grad_norm": 5.0717194000071686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241220 + }, + { + "epoch": 1.1699219057849857, + "grad_norm": 5.0520739591775055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241230 + }, + { + "epoch": 1.1699704039778218, + "grad_norm": 5.100536881741391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241240 + }, + { + "epoch": 1.170018902170658, + "grad_norm": 5.265791358510796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241250 + }, + { + "epoch": 1.1700674003634939, + "grad_norm": 5.041805906103036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241260 + }, + { + "epoch": 1.17011589855633, + "grad_norm": 5.046127782293297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241270 + }, + { + "epoch": 1.1701643967491662, + "grad_norm": 4.76719250741553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241280 + }, + { + "epoch": 1.1702128949420023, + "grad_norm": 5.181048834401736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241290 + }, + { + "epoch": 1.1702613931348382, + "grad_norm": 5.125879098955011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241300 + }, + { + "epoch": 1.1703098913276744, + "grad_norm": 5.102582889549012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241310 + }, + { + "epoch": 1.1703583895205105, + "grad_norm": 5.078452858242599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241320 + }, + { + "epoch": 1.1704068877133467, + "grad_norm": 5.3293423007971796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241330 + }, + { + "epoch": 1.1704553859061826, + "grad_norm": 5.062803865030219e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241340 + }, + { + "epoch": 1.1705038840990187, + "grad_norm": 5.135111180720742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241350 + }, + { + "epoch": 1.1705523822918549, + "grad_norm": 5.002771885642687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241360 + }, + { + "epoch": 1.170600880484691, + "grad_norm": 4.830166844271844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241370 + }, + { + "epoch": 1.170649378677527, + "grad_norm": 5.050632623238016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241380 + }, + { + "epoch": 1.170697876870363, + "grad_norm": 5.216530496454652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241390 + }, + { + "epoch": 1.1707463750631992, + "grad_norm": 4.6668763076240793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241400 + }, + { + "epoch": 1.1707948732560354, + "grad_norm": 4.907164807832487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241410 + }, + { + "epoch": 1.1708433714488713, + "grad_norm": 4.923301588632967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241420 + }, + { + "epoch": 1.1708918696417074, + "grad_norm": 5.2357780333522896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241430 + }, + { + "epoch": 1.1709403678345436, + "grad_norm": 5.1489699615103746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241440 + }, + { + "epoch": 1.1709888660273797, + "grad_norm": 6.167940114210069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241450 + }, + { + "epoch": 1.1710373642202156, + "grad_norm": 5.1629971409283826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241460 + }, + { + "epoch": 1.1710858624130518, + "grad_norm": 4.709044176820498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241470 + }, + { + "epoch": 1.171134360605888, + "grad_norm": 4.443510093210534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241480 + }, + { + "epoch": 1.171182858798724, + "grad_norm": 5.29428518802888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241490 + }, + { + "epoch": 1.17123135699156, + "grad_norm": 4.9595399786994676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241500 + }, + { + "epoch": 1.1712798551843961, + "grad_norm": 4.7660996926879307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241510 + }, + { + "epoch": 1.1713283533772323, + "grad_norm": 4.9959439252234006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241520 + }, + { + "epoch": 1.1713768515700684, + "grad_norm": 5.487800081027672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241530 + }, + { + "epoch": 1.1714253497629046, + "grad_norm": 4.909270501229912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241540 + }, + { + "epoch": 1.1714738479557405, + "grad_norm": 4.9864631535001536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241550 + }, + { + "epoch": 1.1715223461485766, + "grad_norm": 4.779030149393293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241560 + }, + { + "epoch": 1.1715708443414128, + "grad_norm": 4.795785457645252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241570 + }, + { + "epoch": 1.1716193425342487, + "grad_norm": 5.429176397342417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241580 + }, + { + "epoch": 1.1716678407270849, + "grad_norm": 4.8124974227903294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241590 + }, + { + "epoch": 1.171716338919921, + "grad_norm": 6.302281008174759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241600 + }, + { + "epoch": 1.1717648371127571, + "grad_norm": 4.5803780324149557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241610 + }, + { + "epoch": 1.1718133353055933, + "grad_norm": 4.751973037286916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241620 + }, + { + "epoch": 1.1718618334984292, + "grad_norm": 4.429810118722344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241630 + }, + { + "epoch": 1.1719103316912654, + "grad_norm": 4.991300883716576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241640 + }, + { + "epoch": 1.1719588298841015, + "grad_norm": 5.928852075953728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241650 + }, + { + "epoch": 1.1720073280769374, + "grad_norm": 4.620597948701288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241660 + }, + { + "epoch": 1.1720558262697736, + "grad_norm": 4.918484464155881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241670 + }, + { + "epoch": 1.1721043244626097, + "grad_norm": 4.2429665114696036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241680 + }, + { + "epoch": 1.1721528226554458, + "grad_norm": 4.682993193227958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241690 + }, + { + "epoch": 1.172201320848282, + "grad_norm": 4.635673889197278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241700 + }, + { + "epoch": 1.172249819041118, + "grad_norm": 4.5393214520572656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241710 + }, + { + "epoch": 1.172298317233954, + "grad_norm": 4.899200334307352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241720 + }, + { + "epoch": 1.1723468154267902, + "grad_norm": 4.513422524610178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241730 + }, + { + "epoch": 1.1723953136196263, + "grad_norm": 4.6456754887458374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241740 + }, + { + "epoch": 1.1724438118124623, + "grad_norm": 4.514689777579406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241750 + }, + { + "epoch": 1.1724923100052984, + "grad_norm": 4.618143734091973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241760 + }, + { + "epoch": 1.1725408081981346, + "grad_norm": 4.662754449213935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241770 + }, + { + "epoch": 1.1725893063909707, + "grad_norm": 4.336834891205399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241780 + }, + { + "epoch": 1.1726378045838066, + "grad_norm": 9.917491183841776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241790 + }, + { + "epoch": 1.1726863027766428, + "grad_norm": 5.379449419251614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241800 + }, + { + "epoch": 1.172734800969479, + "grad_norm": 4.499388950307548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241810 + }, + { + "epoch": 1.172783299162315, + "grad_norm": 4.4213638972223634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241820 + }, + { + "epoch": 1.172831797355151, + "grad_norm": 4.297015365750667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241830 + }, + { + "epoch": 1.1728802955479871, + "grad_norm": 4.533123032501862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241840 + }, + { + "epoch": 1.1729287937408233, + "grad_norm": 4.643283091354533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241850 + }, + { + "epoch": 1.1729772919336594, + "grad_norm": 4.5979863472211946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241860 + }, + { + "epoch": 1.1730257901264953, + "grad_norm": 4.4596511372674286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241870 + }, + { + "epoch": 1.1730742883193315, + "grad_norm": 4.261035613239983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241880 + }, + { + "epoch": 1.1731227865121676, + "grad_norm": 4.511336015866618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241890 + }, + { + "epoch": 1.1731712847050038, + "grad_norm": 4.5190244435389104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241900 + }, + { + "epoch": 1.1732197828978397, + "grad_norm": 4.5934523740243094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241910 + }, + { + "epoch": 1.1732682810906758, + "grad_norm": 4.692247657089865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241920 + }, + { + "epoch": 1.173316779283512, + "grad_norm": 4.100763817405095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241930 + }, + { + "epoch": 1.1733652774763481, + "grad_norm": 4.43845813435928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241940 + }, + { + "epoch": 1.173413775669184, + "grad_norm": 4.3800465476806494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241950 + }, + { + "epoch": 1.1734622738620202, + "grad_norm": 4.471126047178586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241960 + }, + { + "epoch": 1.1735107720548563, + "grad_norm": 4.2679200618067625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241970 + }, + { + "epoch": 1.1735592702476925, + "grad_norm": 4.162424716014357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241980 + }, + { + "epoch": 1.1736077684405286, + "grad_norm": 4.494247818342956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 241990 + }, + { + "epoch": 1.1736562666333645, + "grad_norm": 4.392881081116684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242000 + }, + { + "epoch": 1.1737047648262007, + "grad_norm": 4.382864915442042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242010 + }, + { + "epoch": 1.1737532630190368, + "grad_norm": 4.4756454542493884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242020 + }, + { + "epoch": 1.1738017612118727, + "grad_norm": 4.024560240623032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242030 + }, + { + "epoch": 1.1738502594047089, + "grad_norm": 4.36030660466713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242040 + }, + { + "epoch": 1.173898757597545, + "grad_norm": 4.1339060175005216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242050 + }, + { + "epoch": 1.1739472557903812, + "grad_norm": 4.5475996302002386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242060 + }, + { + "epoch": 1.1739957539832173, + "grad_norm": 4.1879186341020613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242070 + }, + { + "epoch": 1.1740442521760532, + "grad_norm": 4.177017842721398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242080 + }, + { + "epoch": 1.1740927503688894, + "grad_norm": 4.315183588232685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242090 + }, + { + "epoch": 1.1741412485617255, + "grad_norm": 4.3114145142908455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242100 + }, + { + "epoch": 1.1741897467545614, + "grad_norm": 4.2543554457097343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242110 + }, + { + "epoch": 1.1742382449473976, + "grad_norm": 4.375190698624465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242120 + }, + { + "epoch": 1.1742867431402337, + "grad_norm": 4.1975201980903876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242130 + }, + { + "epoch": 1.1743352413330699, + "grad_norm": 4.344509108022976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242140 + }, + { + "epoch": 1.174383739525906, + "grad_norm": 4.1953438056907544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242150 + }, + { + "epoch": 1.174432237718742, + "grad_norm": 1.2950511063536396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242160 + }, + { + "epoch": 1.174480735911578, + "grad_norm": 4.631823458112194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242170 + }, + { + "epoch": 1.1745292341044142, + "grad_norm": 4.577305645625529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242180 + }, + { + "epoch": 1.1745777322972502, + "grad_norm": 4.3716049447084515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242190 + }, + { + "epoch": 1.1746262304900863, + "grad_norm": 4.323754509982791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242200 + }, + { + "epoch": 1.1746747286829224, + "grad_norm": 4.266956565857072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242210 + }, + { + "epoch": 1.1747232268757586, + "grad_norm": 4.2401563149496724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242220 + }, + { + "epoch": 1.1747717250685947, + "grad_norm": 3.7008657471915285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242230 + }, + { + "epoch": 1.1748202232614307, + "grad_norm": 4.371174000539213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242240 + }, + { + "epoch": 1.1748687214542668, + "grad_norm": 4.458746971636174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242250 + }, + { + "epoch": 1.174917219647103, + "grad_norm": 4.1492754121463804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242260 + }, + { + "epoch": 1.174965717839939, + "grad_norm": 4.011364751477231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242270 + }, + { + "epoch": 1.175014216032775, + "grad_norm": 3.984415286595322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242280 + }, + { + "epoch": 1.1750627142256111, + "grad_norm": 4.52717756616039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242290 + }, + { + "epoch": 1.1751112124184473, + "grad_norm": 3.956251504177999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242300 + }, + { + "epoch": 1.1751597106112834, + "grad_norm": 3.992417418885452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242310 + }, + { + "epoch": 1.1752082088041194, + "grad_norm": 4.387944230188623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242320 + }, + { + "epoch": 1.1752567069969555, + "grad_norm": 4.446368606636497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242330 + }, + { + "epoch": 1.1753052051897916, + "grad_norm": 4.248770224535292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242340 + }, + { + "epoch": 1.1753537033826278, + "grad_norm": 4.323176838738618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242350 + }, + { + "epoch": 1.1754022015754637, + "grad_norm": 4.075339532505495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242360 + }, + { + "epoch": 1.1754506997682999, + "grad_norm": 4.6579454959783106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242370 + }, + { + "epoch": 1.175499197961136, + "grad_norm": 3.735577536190249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242380 + }, + { + "epoch": 1.1755476961539721, + "grad_norm": 4.055640445699282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242390 + }, + { + "epoch": 1.175596194346808, + "grad_norm": 4.324637359331973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242400 + }, + { + "epoch": 1.1756446925396442, + "grad_norm": 4.023035415912091e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242410 + }, + { + "epoch": 1.1756931907324804, + "grad_norm": 4.2566266955645915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242420 + }, + { + "epoch": 1.1757416889253165, + "grad_norm": 4.770959094457794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242430 + }, + { + "epoch": 1.1757901871181524, + "grad_norm": 4.477196213770185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242440 + }, + { + "epoch": 1.1758386853109886, + "grad_norm": 3.958065875053762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242450 + }, + { + "epoch": 1.1758871835038247, + "grad_norm": 3.995963737679631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242460 + }, + { + "epoch": 1.1759356816966609, + "grad_norm": 3.98016233305043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242470 + }, + { + "epoch": 1.1759841798894968, + "grad_norm": 3.863874908915932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242480 + }, + { + "epoch": 1.176032678082333, + "grad_norm": 4.214060567164779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242490 + }, + { + "epoch": 1.176081176275169, + "grad_norm": 3.858688657487619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242500 + }, + { + "epoch": 1.1761296744680052, + "grad_norm": 4.2094804086900695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242510 + }, + { + "epoch": 1.1761781726608413, + "grad_norm": 4.1396642558311214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242520 + }, + { + "epoch": 1.1762266708536773, + "grad_norm": 4.13989411640614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242530 + }, + { + "epoch": 1.1762751690465134, + "grad_norm": 3.9950350583239924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242540 + }, + { + "epoch": 1.1763236672393496, + "grad_norm": 3.7838450595018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242550 + }, + { + "epoch": 1.1763721654321855, + "grad_norm": 3.9635946080807116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242560 + }, + { + "epoch": 1.1764206636250216, + "grad_norm": 3.8213652686636124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242570 + }, + { + "epoch": 1.1764691618178578, + "grad_norm": 4.2483630835477015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242580 + }, + { + "epoch": 1.176517660010694, + "grad_norm": 3.8691535308998937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242590 + }, + { + "epoch": 1.17656615820353, + "grad_norm": 4.253911356499884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242600 + }, + { + "epoch": 1.176614656396366, + "grad_norm": 4.0567208259290055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242610 + }, + { + "epoch": 1.1766631545892021, + "grad_norm": 3.866982112299411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242620 + }, + { + "epoch": 1.1767116527820383, + "grad_norm": 4.258013674984795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242630 + }, + { + "epoch": 1.1767601509748742, + "grad_norm": 4.005695686259969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242640 + }, + { + "epoch": 1.1768086491677103, + "grad_norm": 3.881891785795233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242650 + }, + { + "epoch": 1.1768571473605465, + "grad_norm": 3.724103692093195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242660 + }, + { + "epoch": 1.1769056455533826, + "grad_norm": 4.0404668055771253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242670 + }, + { + "epoch": 1.1769541437462188, + "grad_norm": 3.97086132863933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242680 + }, + { + "epoch": 1.1770026419390547, + "grad_norm": 3.970765050098635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242690 + }, + { + "epoch": 1.1770511401318908, + "grad_norm": 3.9411979457781854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242700 + }, + { + "epoch": 1.177099638324727, + "grad_norm": 3.574388074412127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242710 + }, + { + "epoch": 1.177148136517563, + "grad_norm": 3.891552680101995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242720 + }, + { + "epoch": 1.177196634710399, + "grad_norm": 3.6595984909126855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242730 + }, + { + "epoch": 1.1772451329032352, + "grad_norm": 3.881864785171274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242740 + }, + { + "epoch": 1.1772936310960713, + "grad_norm": 3.830034955853989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242750 + }, + { + "epoch": 1.1773421292889075, + "grad_norm": 3.784817437235688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242760 + }, + { + "epoch": 1.1773906274817434, + "grad_norm": 3.548579030621113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242770 + }, + { + "epoch": 1.1774391256745795, + "grad_norm": 3.7308275580016925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242780 + }, + { + "epoch": 1.1774876238674157, + "grad_norm": 3.810954751770623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242790 + }, + { + "epoch": 1.1775361220602518, + "grad_norm": 3.7348176107343534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242800 + }, + { + "epoch": 1.1775846202530877, + "grad_norm": 3.65023140602716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242810 + }, + { + "epoch": 1.1776331184459239, + "grad_norm": 4.0604231088536835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242820 + }, + { + "epoch": 1.17768161663876, + "grad_norm": 3.6962212845992326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242830 + }, + { + "epoch": 1.1777301148315962, + "grad_norm": 3.817744698153547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242840 + }, + { + "epoch": 1.177778613024432, + "grad_norm": 3.912748169909719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242850 + }, + { + "epoch": 1.1778271112172682, + "grad_norm": 3.711971174880091e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242860 + }, + { + "epoch": 1.1778756094101044, + "grad_norm": 3.5798869646441744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242870 + }, + { + "epoch": 1.1779241076029405, + "grad_norm": 3.511188140237209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242880 + }, + { + "epoch": 1.1779726057957765, + "grad_norm": 3.805996584560489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242890 + }, + { + "epoch": 1.1780211039886126, + "grad_norm": 3.660209557665439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242900 + }, + { + "epoch": 1.1780696021814487, + "grad_norm": 3.608323950743397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242910 + }, + { + "epoch": 1.1781181003742849, + "grad_norm": 3.61028931195051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242920 + }, + { + "epoch": 1.1781665985671208, + "grad_norm": 3.417091676283235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242930 + }, + { + "epoch": 1.178215096759957, + "grad_norm": 3.9530181794589225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242940 + }, + { + "epoch": 1.178263594952793, + "grad_norm": 3.606437815051322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242950 + }, + { + "epoch": 1.1783120931456292, + "grad_norm": 3.602076148467859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242960 + }, + { + "epoch": 1.1783605913384652, + "grad_norm": 3.5214142712902685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242970 + }, + { + "epoch": 1.1784090895313013, + "grad_norm": 3.511084400997788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242980 + }, + { + "epoch": 1.1784575877241374, + "grad_norm": 3.988854757608351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 242990 + }, + { + "epoch": 1.1785060859169736, + "grad_norm": 3.6412409087915876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243000 + }, + { + "epoch": 1.1785545841098095, + "grad_norm": 3.594193742628704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243010 + }, + { + "epoch": 1.1786030823026457, + "grad_norm": 3.665612169356791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243020 + }, + { + "epoch": 1.1786515804954818, + "grad_norm": 3.77839022291937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243030 + }, + { + "epoch": 1.178700078688318, + "grad_norm": 3.64710430744708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243040 + }, + { + "epoch": 1.178748576881154, + "grad_norm": 3.476840504390566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243050 + }, + { + "epoch": 1.17879707507399, + "grad_norm": 3.649607194233795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243060 + }, + { + "epoch": 1.1788455732668262, + "grad_norm": 3.445947527325188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243070 + }, + { + "epoch": 1.1788940714596623, + "grad_norm": 3.667000925133834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243080 + }, + { + "epoch": 1.1789425696524982, + "grad_norm": 3.6472947329002636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243090 + }, + { + "epoch": 1.1789910678453344, + "grad_norm": 3.4961733064164946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243100 + }, + { + "epoch": 1.1790395660381705, + "grad_norm": 1.8872885902965209e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243110 + }, + { + "epoch": 1.1790880642310066, + "grad_norm": 3.320974073517391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243120 + }, + { + "epoch": 1.1791365624238428, + "grad_norm": 3.1269490108343234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243130 + }, + { + "epoch": 1.1791850606166787, + "grad_norm": 3.452618457799872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243140 + }, + { + "epoch": 1.1792335588095149, + "grad_norm": 3.419508942670291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243150 + }, + { + "epoch": 1.179282057002351, + "grad_norm": 3.666993464435109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243160 + }, + { + "epoch": 1.179330555195187, + "grad_norm": 3.662676562043998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243170 + }, + { + "epoch": 1.179379053388023, + "grad_norm": 3.520551317137688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243180 + }, + { + "epoch": 1.1794275515808592, + "grad_norm": 3.6260530578147154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243190 + }, + { + "epoch": 1.1794760497736954, + "grad_norm": 3.687758010073594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243200 + }, + { + "epoch": 1.1795245479665315, + "grad_norm": 3.3444422342654434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243210 + }, + { + "epoch": 1.1795730461593674, + "grad_norm": 3.449760654916645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243220 + }, + { + "epoch": 1.1796215443522036, + "grad_norm": 3.412909777011919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243230 + }, + { + "epoch": 1.1796700425450397, + "grad_norm": 3.8110901101617856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243240 + }, + { + "epoch": 1.1797185407378756, + "grad_norm": 3.4602404497263706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243250 + }, + { + "epoch": 1.1797670389307118, + "grad_norm": 3.377164858875403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243260 + }, + { + "epoch": 1.179815537123548, + "grad_norm": 3.178714180762654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243270 + }, + { + "epoch": 1.179864035316384, + "grad_norm": 3.3622118422727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243280 + }, + { + "epoch": 1.1799125335092202, + "grad_norm": 3.4740665455501585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243290 + }, + { + "epoch": 1.1799610317020561, + "grad_norm": 3.550261595819393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243300 + }, + { + "epoch": 1.1800095298948923, + "grad_norm": 3.37181447207513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243310 + }, + { + "epoch": 1.1800580280877284, + "grad_norm": 3.535310000302161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243320 + }, + { + "epoch": 1.1801065262805646, + "grad_norm": 3.305678930587419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243330 + }, + { + "epoch": 1.1801550244734005, + "grad_norm": 3.578525564762458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243340 + }, + { + "epoch": 1.1802035226662366, + "grad_norm": 3.5368636019939004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243350 + }, + { + "epoch": 1.1802520208590728, + "grad_norm": 3.2153568696458024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243360 + }, + { + "epoch": 1.180300519051909, + "grad_norm": 3.4815926142073295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243370 + }, + { + "epoch": 1.1803490172447448, + "grad_norm": 3.393687109110033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243380 + }, + { + "epoch": 1.180397515437581, + "grad_norm": 3.422152872190054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243390 + }, + { + "epoch": 1.1804460136304171, + "grad_norm": 3.3389007114692504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243400 + }, + { + "epoch": 1.1804945118232533, + "grad_norm": 3.78372106979441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243410 + }, + { + "epoch": 1.1805430100160892, + "grad_norm": 3.293180483865399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243420 + }, + { + "epoch": 1.1805915082089253, + "grad_norm": 3.5013187016375014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243430 + }, + { + "epoch": 1.1806400064017615, + "grad_norm": 3.3467191684621866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243440 + }, + { + "epoch": 1.1806885045945976, + "grad_norm": 4.0412544421997154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243450 + }, + { + "epoch": 1.1807370027874335, + "grad_norm": 3.343760113239114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243460 + }, + { + "epoch": 1.1807855009802697, + "grad_norm": 3.4148627037211554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243470 + }, + { + "epoch": 1.1808339991731058, + "grad_norm": 3.263282621901453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243480 + }, + { + "epoch": 1.180882497365942, + "grad_norm": 3.292296568702113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243490 + }, + { + "epoch": 1.180930995558778, + "grad_norm": 3.435570050669412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243500 + }, + { + "epoch": 1.180979493751614, + "grad_norm": 3.131848202997389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243510 + }, + { + "epoch": 1.1810279919444502, + "grad_norm": 3.23593667417299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243520 + }, + { + "epoch": 1.1810764901372863, + "grad_norm": 3.180388574719473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243530 + }, + { + "epoch": 1.1811249883301222, + "grad_norm": 3.124062786241666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243540 + }, + { + "epoch": 1.1811734865229584, + "grad_norm": 3.3669788734869144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243550 + }, + { + "epoch": 1.1812219847157945, + "grad_norm": 3.1154069546346363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243560 + }, + { + "epoch": 1.1812704829086307, + "grad_norm": 3.0969395936608635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243570 + }, + { + "epoch": 1.1813189811014668, + "grad_norm": 3.1224058005818733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243580 + }, + { + "epoch": 1.1813674792943027, + "grad_norm": 3.2699158936111417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243590 + }, + { + "epoch": 1.181415977487139, + "grad_norm": 3.1676826495186106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243600 + }, + { + "epoch": 1.181464475679975, + "grad_norm": 3.586626817764227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243610 + }, + { + "epoch": 1.181512973872811, + "grad_norm": 3.1906012054605526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243620 + }, + { + "epoch": 1.181561472065647, + "grad_norm": 3.1524262311677376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243630 + }, + { + "epoch": 1.1816099702584832, + "grad_norm": 3.1304288938827085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243640 + }, + { + "epoch": 1.1816584684513194, + "grad_norm": 3.171735230012018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243650 + }, + { + "epoch": 1.1817069666441555, + "grad_norm": 3.074378085443641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243660 + }, + { + "epoch": 1.1817554648369915, + "grad_norm": 3.373540735651659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243670 + }, + { + "epoch": 1.1818039630298276, + "grad_norm": 3.3501109442113375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243680 + }, + { + "epoch": 1.1818524612226637, + "grad_norm": 3.2254884985150056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243690 + }, + { + "epoch": 1.1819009594154997, + "grad_norm": 4.0053688366015194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243700 + }, + { + "epoch": 1.1819494576083358, + "grad_norm": 4.301804779061058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243710 + }, + { + "epoch": 1.181997955801172, + "grad_norm": 3.1082535656423715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243720 + }, + { + "epoch": 1.182046453994008, + "grad_norm": 3.359892986054547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243730 + }, + { + "epoch": 1.1820949521868442, + "grad_norm": 2.989959924093455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243740 + }, + { + "epoch": 1.1821434503796802, + "grad_norm": 3.20969455458453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243750 + }, + { + "epoch": 1.1821919485725163, + "grad_norm": 2.9890433239643244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243760 + }, + { + "epoch": 1.1822404467653524, + "grad_norm": 3.1433987857099055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243770 + }, + { + "epoch": 1.1822889449581886, + "grad_norm": 3.5066907599912156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243780 + }, + { + "epoch": 1.1823374431510245, + "grad_norm": 3.582276164593168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243790 + }, + { + "epoch": 1.1823859413438607, + "grad_norm": 3.014634941678196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243800 + }, + { + "epoch": 1.1824344395366968, + "grad_norm": 3.116782565371068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243810 + }, + { + "epoch": 1.182482937729533, + "grad_norm": 3.093591516289962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243820 + }, + { + "epoch": 1.1825314359223689, + "grad_norm": 2.816571331720752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243830 + }, + { + "epoch": 1.182579934115205, + "grad_norm": 3.228700151680641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243840 + }, + { + "epoch": 1.1826284323080412, + "grad_norm": 3.2028076191181754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243850 + }, + { + "epoch": 1.1826769305008773, + "grad_norm": 3.048871022315325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243860 + }, + { + "epoch": 1.1827254286937132, + "grad_norm": 2.9410768931370512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243870 + }, + { + "epoch": 1.1827739268865494, + "grad_norm": 3.039793128323254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243880 + }, + { + "epoch": 1.1828224250793855, + "grad_norm": 3.149249394596154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243890 + }, + { + "epoch": 1.1828709232722217, + "grad_norm": 3.1830815316880035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243900 + }, + { + "epoch": 1.1829194214650576, + "grad_norm": 3.3539883759203803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243910 + }, + { + "epoch": 1.1829679196578937, + "grad_norm": 3.007723847758825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243920 + }, + { + "epoch": 1.1830164178507299, + "grad_norm": 2.8418904562954594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243930 + }, + { + "epoch": 1.183064916043566, + "grad_norm": 3.189053288110699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243940 + }, + { + "epoch": 1.183113414236402, + "grad_norm": 3.4608511612077564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243950 + }, + { + "epoch": 1.183161912429238, + "grad_norm": 3.230129408393623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243960 + }, + { + "epoch": 1.1832104106220742, + "grad_norm": 2.8755099634736325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243970 + }, + { + "epoch": 1.1832589088149104, + "grad_norm": 3.0427944608391044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243980 + }, + { + "epoch": 1.1833074070077463, + "grad_norm": 2.9771758391916592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 243990 + }, + { + "epoch": 1.1833559052005824, + "grad_norm": 3.0718169341525936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244000 + }, + { + "epoch": 1.1834044033934186, + "grad_norm": 3.4857599473525624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244010 + }, + { + "epoch": 1.1834529015862547, + "grad_norm": 2.9336773010868455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244020 + }, + { + "epoch": 1.1835013997790906, + "grad_norm": 3.593237707377739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244030 + }, + { + "epoch": 1.1835498979719268, + "grad_norm": 2.8459986367579404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244040 + }, + { + "epoch": 1.183598396164763, + "grad_norm": 2.9743725704634016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244050 + }, + { + "epoch": 1.183646894357599, + "grad_norm": 3.061656173031224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244060 + }, + { + "epoch": 1.183695392550435, + "grad_norm": 3.226334754913296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244070 + }, + { + "epoch": 1.1837438907432711, + "grad_norm": 3.217807531541439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244080 + }, + { + "epoch": 1.1837923889361073, + "grad_norm": 7.205563434808937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244090 + }, + { + "epoch": 1.1838408871289434, + "grad_norm": 3.0020544272701954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244100 + }, + { + "epoch": 1.1838893853217796, + "grad_norm": 3.314054808356559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244110 + }, + { + "epoch": 1.1839378835146155, + "grad_norm": 3.0947152396265665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244120 + }, + { + "epoch": 1.1839863817074516, + "grad_norm": 2.7089635423749314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244130 + }, + { + "epoch": 1.1840348799002878, + "grad_norm": 2.7868594543178915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244140 + }, + { + "epoch": 1.1840833780931237, + "grad_norm": 2.892473638382853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244150 + }, + { + "epoch": 1.1841318762859598, + "grad_norm": 2.7982210326626955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244160 + }, + { + "epoch": 1.184180374478796, + "grad_norm": 2.7213221898136908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244170 + }, + { + "epoch": 1.1842288726716321, + "grad_norm": 2.8201274204775473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244180 + }, + { + "epoch": 1.1842773708644683, + "grad_norm": 2.9125265754714746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244190 + }, + { + "epoch": 1.1843258690573042, + "grad_norm": 2.8842473298595905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244200 + }, + { + "epoch": 1.1843743672501403, + "grad_norm": 2.8891957271071078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244210 + }, + { + "epoch": 1.1844228654429765, + "grad_norm": 2.929884601599042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244220 + }, + { + "epoch": 1.1844713636358124, + "grad_norm": 2.6806651121091818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244230 + }, + { + "epoch": 1.1845198618286485, + "grad_norm": 2.9186351113708042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244240 + }, + { + "epoch": 1.1845683600214847, + "grad_norm": 3.004893400770925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244250 + }, + { + "epoch": 1.1846168582143208, + "grad_norm": 3.205450482823835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244260 + }, + { + "epoch": 1.184665356407157, + "grad_norm": 3.372204204765694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244270 + }, + { + "epoch": 1.184713854599993, + "grad_norm": 2.8526738304890387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244280 + }, + { + "epoch": 1.184762352792829, + "grad_norm": 2.839972523815959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244290 + }, + { + "epoch": 1.1848108509856652, + "grad_norm": 2.7653886292000607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244300 + }, + { + "epoch": 1.1848593491785013, + "grad_norm": 2.8019877973406437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244310 + }, + { + "epoch": 1.1849078473713373, + "grad_norm": 2.6377907857977334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244320 + }, + { + "epoch": 1.1849563455641734, + "grad_norm": 2.858116765480645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244330 + }, + { + "epoch": 1.1850048437570095, + "grad_norm": 3.102077528183145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244340 + }, + { + "epoch": 1.1850533419498457, + "grad_norm": 2.995994918819633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244350 + }, + { + "epoch": 1.1851018401426816, + "grad_norm": 2.804380194731948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244360 + }, + { + "epoch": 1.1851503383355178, + "grad_norm": 2.6521250973132737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244370 + }, + { + "epoch": 1.185198836528354, + "grad_norm": 2.7282043291165792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244380 + }, + { + "epoch": 1.18524733472119, + "grad_norm": 3.394127290334836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244390 + }, + { + "epoch": 1.185295832914026, + "grad_norm": 2.6423776944284327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244400 + }, + { + "epoch": 1.185344331106862, + "grad_norm": 2.5509493184472376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244410 + }, + { + "epoch": 1.1853928292996982, + "grad_norm": 2.609548133136741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244420 + }, + { + "epoch": 1.1854413274925344, + "grad_norm": 2.344973282220053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244430 + }, + { + "epoch": 1.1854898256853703, + "grad_norm": 2.856662639771912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244440 + }, + { + "epoch": 1.1855383238782065, + "grad_norm": 2.5746940579551847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244450 + }, + { + "epoch": 1.1855868220710426, + "grad_norm": 2.8966095300120287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244460 + }, + { + "epoch": 1.1856353202638787, + "grad_norm": 2.8452173950199722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244470 + }, + { + "epoch": 1.1856838184567147, + "grad_norm": 4.667907305133667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244480 + }, + { + "epoch": 1.1857323166495508, + "grad_norm": 2.6450845069803108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244490 + }, + { + "epoch": 1.185780814842387, + "grad_norm": 2.57955896643125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244500 + }, + { + "epoch": 1.185829313035223, + "grad_norm": 3.210838173117736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244510 + }, + { + "epoch": 1.185877811228059, + "grad_norm": 2.823421318964847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244520 + }, + { + "epoch": 1.1859263094208952, + "grad_norm": 2.6613225401206364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244530 + }, + { + "epoch": 1.1859748076137313, + "grad_norm": 3.030145023785735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244540 + }, + { + "epoch": 1.1860233058065675, + "grad_norm": 2.8432705079239895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244550 + }, + { + "epoch": 1.1860718039994036, + "grad_norm": 2.9590964345516113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244560 + }, + { + "epoch": 1.1861203021922395, + "grad_norm": 2.754612893340891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244570 + }, + { + "epoch": 1.1861688003850757, + "grad_norm": 2.3323092790406008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244580 + }, + { + "epoch": 1.1862172985779118, + "grad_norm": 2.947403743291943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244590 + }, + { + "epoch": 1.1862657967707477, + "grad_norm": 3.7074528336233925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244600 + }, + { + "epoch": 1.1863142949635839, + "grad_norm": 2.6975303768494996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244610 + }, + { + "epoch": 1.18636279315642, + "grad_norm": 3.040399221276857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244620 + }, + { + "epoch": 1.1864112913492562, + "grad_norm": 2.577030322470364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244630 + }, + { + "epoch": 1.1864597895420923, + "grad_norm": 2.567338874825964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244640 + }, + { + "epoch": 1.1865082877349282, + "grad_norm": 2.5337415721082834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244650 + }, + { + "epoch": 1.1865567859277644, + "grad_norm": 2.584194369603665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244660 + }, + { + "epoch": 1.1866052841206005, + "grad_norm": 2.7206292330106407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244670 + }, + { + "epoch": 1.1866537823134364, + "grad_norm": 2.5245350698810398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244680 + }, + { + "epoch": 1.1867022805062726, + "grad_norm": 2.649819919042784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244690 + }, + { + "epoch": 1.1867507786991087, + "grad_norm": 2.4577204271736264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244700 + }, + { + "epoch": 1.1867992768919449, + "grad_norm": 2.635034057618668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244710 + }, + { + "epoch": 1.186847775084781, + "grad_norm": 2.760515549482534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244720 + }, + { + "epoch": 1.186896273277617, + "grad_norm": 2.8634783433290067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244730 + }, + { + "epoch": 1.186944771470453, + "grad_norm": 2.687740519036197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244740 + }, + { + "epoch": 1.1869932696632892, + "grad_norm": 2.6139545639125572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244750 + }, + { + "epoch": 1.1870417678561251, + "grad_norm": 3.031028583677653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244760 + }, + { + "epoch": 1.1870902660489613, + "grad_norm": 2.491474404564542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244770 + }, + { + "epoch": 1.1871387642417974, + "grad_norm": 5.535698477387996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244780 + }, + { + "epoch": 1.1871872624346336, + "grad_norm": 2.4424881672757692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244790 + }, + { + "epoch": 1.1872357606274697, + "grad_norm": 2.4716916513511933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244800 + }, + { + "epoch": 1.1872842588203056, + "grad_norm": 6.339055147464023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244810 + }, + { + "epoch": 1.1873327570131418, + "grad_norm": 2.8561103704305424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244820 + }, + { + "epoch": 1.187381255205978, + "grad_norm": 2.789310471484896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244830 + }, + { + "epoch": 1.187429753398814, + "grad_norm": 2.4992695912828822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244840 + }, + { + "epoch": 1.18747825159165, + "grad_norm": 2.500910945002488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244850 + }, + { + "epoch": 1.1875267497844861, + "grad_norm": 2.5669910641568094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244860 + }, + { + "epoch": 1.1875752479773223, + "grad_norm": 2.4408107535123236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244870 + }, + { + "epoch": 1.1876237461701584, + "grad_norm": 2.414517652482573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244880 + }, + { + "epoch": 1.1876722443629943, + "grad_norm": 2.7343897812670548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244890 + }, + { + "epoch": 1.1877207425558305, + "grad_norm": 3.800097303496841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244900 + }, + { + "epoch": 1.1877692407486666, + "grad_norm": 2.7082375453346685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244910 + }, + { + "epoch": 1.1878177389415028, + "grad_norm": 2.547147737175237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244920 + }, + { + "epoch": 1.1878662371343387, + "grad_norm": 2.3318754927004193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244930 + }, + { + "epoch": 1.1879147353271748, + "grad_norm": 4.6055802727096307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244940 + }, + { + "epoch": 1.187963233520011, + "grad_norm": 2.6580888601301922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244950 + }, + { + "epoch": 1.1880117317128471, + "grad_norm": 2.793963815861389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244960 + }, + { + "epoch": 1.188060229905683, + "grad_norm": 2.6658925733613614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244970 + }, + { + "epoch": 1.1881087280985192, + "grad_norm": 3.107129842305767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244980 + }, + { + "epoch": 1.1881572262913553, + "grad_norm": 2.665911225108175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 244990 + }, + { + "epoch": 1.1882057244841915, + "grad_norm": 3.536548121019223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245000 + }, + { + "epoch": 1.1882542226770274, + "grad_norm": 2.9154596958846923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245010 + }, + { + "epoch": 1.1883027208698635, + "grad_norm": 3.62962353506191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245020 + }, + { + "epoch": 1.1883512190626997, + "grad_norm": 3.2328014043514486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245030 + }, + { + "epoch": 1.1883997172555358, + "grad_norm": 4.5303231388515997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245040 + }, + { + "epoch": 1.1884482154483718, + "grad_norm": 2.830938683473505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245050 + }, + { + "epoch": 1.188496713641208, + "grad_norm": 2.9540046853071544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245060 + }, + { + "epoch": 1.188545211834044, + "grad_norm": 2.773793106314315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245070 + }, + { + "epoch": 1.1885937100268802, + "grad_norm": 2.986074676414319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245080 + }, + { + "epoch": 1.1886422082197163, + "grad_norm": 5.0040181776012105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245090 + }, + { + "epoch": 1.1886907064125523, + "grad_norm": 3.039087914658012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245100 + }, + { + "epoch": 1.1887392046053884, + "grad_norm": 2.6082643600489064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245110 + }, + { + "epoch": 1.1887877027982245, + "grad_norm": 2.8214710567908696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245120 + }, + { + "epoch": 1.1888362009910605, + "grad_norm": 2.868753057327922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245130 + }, + { + "epoch": 1.1888846991838966, + "grad_norm": 3.643067714165227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245140 + }, + { + "epoch": 1.1889331973767328, + "grad_norm": 2.6842920775038692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245150 + }, + { + "epoch": 1.188981695569569, + "grad_norm": 2.594329728822231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245160 + }, + { + "epoch": 1.189030193762405, + "grad_norm": 2.6000014585747522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245170 + }, + { + "epoch": 1.189078691955241, + "grad_norm": 2.6778700146223855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245180 + }, + { + "epoch": 1.189127190148077, + "grad_norm": 2.8293078102592517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245190 + }, + { + "epoch": 1.1891756883409133, + "grad_norm": 2.4333356662964434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245200 + }, + { + "epoch": 1.1892241865337492, + "grad_norm": 2.3258714065832464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245210 + }, + { + "epoch": 1.1892726847265853, + "grad_norm": 2.76088822914744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245220 + }, + { + "epoch": 1.1893211829194215, + "grad_norm": 2.5387913993313305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245230 + }, + { + "epoch": 1.1893696811122576, + "grad_norm": 2.820602063025035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245240 + }, + { + "epoch": 1.1894181793050937, + "grad_norm": 2.2641136965262376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245250 + }, + { + "epoch": 1.1894666774979297, + "grad_norm": 6.857550971517412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245260 + }, + { + "epoch": 1.1895151756907658, + "grad_norm": 2.4350384819626925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245270 + }, + { + "epoch": 1.189563673883602, + "grad_norm": 2.4265286668878616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245280 + }, + { + "epoch": 1.1896121720764379, + "grad_norm": 2.7676481550997778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245290 + }, + { + "epoch": 1.189660670269274, + "grad_norm": 3.570748319248196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245300 + }, + { + "epoch": 1.1897091684621102, + "grad_norm": 2.8226772030848224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245310 + }, + { + "epoch": 1.1897576666549463, + "grad_norm": 2.5064242237249346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245320 + }, + { + "epoch": 1.1898061648477825, + "grad_norm": 2.2322289794374228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245330 + }, + { + "epoch": 1.1898546630406184, + "grad_norm": 2.7250758094510275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245340 + }, + { + "epoch": 1.1899031612334545, + "grad_norm": 2.77010627769414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245350 + }, + { + "epoch": 1.1899516594262907, + "grad_norm": 2.452792990936814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245360 + }, + { + "epoch": 1.1900001576191268, + "grad_norm": 2.378184404960848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245370 + }, + { + "epoch": 1.1900486558119627, + "grad_norm": 2.4475374615917644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245380 + }, + { + "epoch": 1.1900971540047989, + "grad_norm": 3.057877151491084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245390 + }, + { + "epoch": 1.190145652197635, + "grad_norm": 2.7983634964812154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245400 + }, + { + "epoch": 1.1901941503904712, + "grad_norm": 2.999466630626557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245410 + }, + { + "epoch": 1.190242648583307, + "grad_norm": 2.483619532256398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245420 + }, + { + "epoch": 1.1902911467761432, + "grad_norm": 2.3088693623662948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245430 + }, + { + "epoch": 1.1903396449689794, + "grad_norm": 2.7734685659197567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245440 + }, + { + "epoch": 1.1903881431618155, + "grad_norm": 2.4376005214321594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245450 + }, + { + "epoch": 1.1904366413546514, + "grad_norm": 2.2762844054113884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245460 + }, + { + "epoch": 1.1904851395474876, + "grad_norm": 2.2331134275077602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245470 + }, + { + "epoch": 1.1905336377403237, + "grad_norm": 3.311134122441217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245480 + }, + { + "epoch": 1.1905821359331599, + "grad_norm": 2.736935833524967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245490 + }, + { + "epoch": 1.1906306341259958, + "grad_norm": 2.2954171896572007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245500 + }, + { + "epoch": 1.190679132318832, + "grad_norm": 2.323503522916326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245510 + }, + { + "epoch": 1.190727630511668, + "grad_norm": 2.6672282160689065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245520 + }, + { + "epoch": 1.1907761287045042, + "grad_norm": 2.1878973299749305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245530 + }, + { + "epoch": 1.1908246268973401, + "grad_norm": 2.5914088652712053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245540 + }, + { + "epoch": 1.1908731250901763, + "grad_norm": 2.426640222097376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245550 + }, + { + "epoch": 1.1909216232830124, + "grad_norm": 2.256746256534825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245560 + }, + { + "epoch": 1.1909701214758486, + "grad_norm": 2.6853003376459128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245570 + }, + { + "epoch": 1.1910186196686845, + "grad_norm": 2.5116872137687096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245580 + }, + { + "epoch": 1.1910671178615206, + "grad_norm": 3.040902640805143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245590 + }, + { + "epoch": 1.1911156160543568, + "grad_norm": 2.447331404198394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245600 + }, + { + "epoch": 1.191164114247193, + "grad_norm": 2.1519525716939825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245610 + }, + { + "epoch": 1.191212612440029, + "grad_norm": 2.11657162907386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245620 + }, + { + "epoch": 1.191261110632865, + "grad_norm": 2.1462538413175025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245630 + }, + { + "epoch": 1.1913096088257011, + "grad_norm": 2.4482917027057738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245640 + }, + { + "epoch": 1.1913581070185373, + "grad_norm": 2.541893451279975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245650 + }, + { + "epoch": 1.1914066052113732, + "grad_norm": 2.489134587335684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245660 + }, + { + "epoch": 1.1914551034042093, + "grad_norm": 2.6713104617215322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245670 + }, + { + "epoch": 1.1915036015970455, + "grad_norm": 2.2406833721788644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245680 + }, + { + "epoch": 1.1915520997898816, + "grad_norm": 2.6639460415367466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245690 + }, + { + "epoch": 1.1916005979827178, + "grad_norm": 2.524694941996586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245700 + }, + { + "epoch": 1.1916490961755537, + "grad_norm": 2.211936767082534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245710 + }, + { + "epoch": 1.1916975943683898, + "grad_norm": 2.63313921777808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245720 + }, + { + "epoch": 1.191746092561226, + "grad_norm": 2.4867164327702085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245730 + }, + { + "epoch": 1.191794590754062, + "grad_norm": 2.5959847604895003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245740 + }, + { + "epoch": 1.191843088946898, + "grad_norm": 2.32840218217234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245750 + }, + { + "epoch": 1.1918915871397342, + "grad_norm": 2.2419540002260874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245760 + }, + { + "epoch": 1.1919400853325703, + "grad_norm": 2.0886142593212753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245770 + }, + { + "epoch": 1.1919885835254065, + "grad_norm": 1.9113064908538036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245780 + }, + { + "epoch": 1.1920370817182424, + "grad_norm": 2.3836683737954445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245790 + }, + { + "epoch": 1.1920855799110786, + "grad_norm": 2.234120799471384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245800 + }, + { + "epoch": 1.1921340781039147, + "grad_norm": 2.028102308315738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245810 + }, + { + "epoch": 1.1921825762967506, + "grad_norm": 2.1123442550674554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245820 + }, + { + "epoch": 1.1922310744895868, + "grad_norm": 2.2615575190343407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245830 + }, + { + "epoch": 1.192279572682423, + "grad_norm": 2.5688088101105677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245840 + }, + { + "epoch": 1.192328070875259, + "grad_norm": 2.1315155862566826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245850 + }, + { + "epoch": 1.1923765690680952, + "grad_norm": 2.386368436191333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245860 + }, + { + "epoch": 1.1924250672609311, + "grad_norm": 2.1675086614436623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245870 + }, + { + "epoch": 1.1924735654537673, + "grad_norm": 2.197676174375829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245880 + }, + { + "epoch": 1.1925220636466034, + "grad_norm": 2.44489708478568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245890 + }, + { + "epoch": 1.1925705618394395, + "grad_norm": 2.0530036337618185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245900 + }, + { + "epoch": 1.1926190600322755, + "grad_norm": 2.2937442167858535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245910 + }, + { + "epoch": 1.1926675582251116, + "grad_norm": 2.3956637562605465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245920 + }, + { + "epoch": 1.1927160564179478, + "grad_norm": 2.0611981454976558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245930 + }, + { + "epoch": 1.192764554610784, + "grad_norm": 2.2296035240287893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245940 + }, + { + "epoch": 1.1928130528036198, + "grad_norm": 3.0193302080760986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245950 + }, + { + "epoch": 1.192861550996456, + "grad_norm": 2.2456733361764236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245960 + }, + { + "epoch": 1.192910049189292, + "grad_norm": 2.17178079964242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245970 + }, + { + "epoch": 1.1929585473821283, + "grad_norm": 2.6212660486635286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245980 + }, + { + "epoch": 1.1930070455749642, + "grad_norm": 2.2244398323323367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 245990 + }, + { + "epoch": 1.1930555437678003, + "grad_norm": 2.161837997505245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246000 + }, + { + "epoch": 1.1931040419606365, + "grad_norm": 2.289800171695333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246010 + }, + { + "epoch": 1.1931525401534726, + "grad_norm": 2.5224144550861638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246020 + }, + { + "epoch": 1.1932010383463085, + "grad_norm": 2.2935775945143178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246030 + }, + { + "epoch": 1.1932495365391447, + "grad_norm": 2.2038877389718436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246040 + }, + { + "epoch": 1.1932980347319808, + "grad_norm": 2.381261943185109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246050 + }, + { + "epoch": 1.193346532924817, + "grad_norm": 2.2427828483273515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246060 + }, + { + "epoch": 1.1933950311176529, + "grad_norm": 2.1695548468869674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246070 + }, + { + "epoch": 1.193443529310489, + "grad_norm": 2.3048366770694884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246080 + }, + { + "epoch": 1.1934920275033252, + "grad_norm": 2.2114528874794814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246090 + }, + { + "epoch": 1.1935405256961613, + "grad_norm": 2.1322184906580333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246100 + }, + { + "epoch": 1.1935890238889972, + "grad_norm": 7.589257933204863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246110 + }, + { + "epoch": 1.1936375220818334, + "grad_norm": 2.4291749056715162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246120 + }, + { + "epoch": 1.1936860202746695, + "grad_norm": 2.0059305327890797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246130 + }, + { + "epoch": 1.1937345184675057, + "grad_norm": 2.4693420641597186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246140 + }, + { + "epoch": 1.1937830166603418, + "grad_norm": 2.1669745109420546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246150 + }, + { + "epoch": 1.1938315148531777, + "grad_norm": 2.188431835747906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246160 + }, + { + "epoch": 1.1938800130460139, + "grad_norm": 2.1904389413407443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246170 + }, + { + "epoch": 1.19392851123885, + "grad_norm": 1.8553333092086177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246180 + }, + { + "epoch": 1.193977009431686, + "grad_norm": 2.121228170892664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246190 + }, + { + "epoch": 1.194025507624522, + "grad_norm": 2.355523243124935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246200 + }, + { + "epoch": 1.1940740058173582, + "grad_norm": 2.5970058104007876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246210 + }, + { + "epoch": 1.1941225040101944, + "grad_norm": 2.3238431623440192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246220 + }, + { + "epoch": 1.1941710022030305, + "grad_norm": 2.4271363585626204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246230 + }, + { + "epoch": 1.1942195003958664, + "grad_norm": 2.3933949933052645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246240 + }, + { + "epoch": 1.1942679985887026, + "grad_norm": 1.8923772771017866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246250 + }, + { + "epoch": 1.1943164967815387, + "grad_norm": 1.9335226753014467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246260 + }, + { + "epoch": 1.1943649949743746, + "grad_norm": 2.6010036791035418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246270 + }, + { + "epoch": 1.1944134931672108, + "grad_norm": 2.4559726696793405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246280 + }, + { + "epoch": 1.194461991360047, + "grad_norm": 2.6355831295177268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246290 + }, + { + "epoch": 1.194510489552883, + "grad_norm": 2.2385117759426976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246300 + }, + { + "epoch": 1.1945589877457192, + "grad_norm": 2.2011397149412915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246310 + }, + { + "epoch": 1.1946074859385551, + "grad_norm": 2.0883703655272257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246320 + }, + { + "epoch": 1.1946559841313913, + "grad_norm": 2.0811475209825403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246330 + }, + { + "epoch": 1.1947044823242274, + "grad_norm": 2.2662073106971548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246340 + }, + { + "epoch": 1.1947529805170636, + "grad_norm": 2.22893810075675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246350 + }, + { + "epoch": 1.1948014787098995, + "grad_norm": 2.6697701827060882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246360 + }, + { + "epoch": 1.1948499769027356, + "grad_norm": 2.040985513929172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246370 + }, + { + "epoch": 1.1948984750955718, + "grad_norm": 2.0573315495653333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246380 + }, + { + "epoch": 1.194946973288408, + "grad_norm": 1.9801746020675637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246390 + }, + { + "epoch": 1.1949954714812439, + "grad_norm": 4.893247051995786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246400 + }, + { + "epoch": 1.19504396967408, + "grad_norm": 2.1408714800941198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246410 + }, + { + "epoch": 1.1950924678669161, + "grad_norm": 2.39327171414061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246420 + }, + { + "epoch": 1.1951409660597523, + "grad_norm": 1.953748274274858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246430 + }, + { + "epoch": 1.1951894642525882, + "grad_norm": 2.223429262926402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246440 + }, + { + "epoch": 1.1952379624454244, + "grad_norm": 2.055226033803592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246450 + }, + { + "epoch": 1.1952864606382605, + "grad_norm": 3.162035611126157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246460 + }, + { + "epoch": 1.1953349588310966, + "grad_norm": 2.080138195026393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246470 + }, + { + "epoch": 1.1953834570239326, + "grad_norm": 1.830477103226258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246480 + }, + { + "epoch": 1.1954319552167687, + "grad_norm": 2.3040724883571784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246490 + }, + { + "epoch": 1.1954804534096048, + "grad_norm": 1.90663147492387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246500 + }, + { + "epoch": 1.195528951602441, + "grad_norm": 2.4255566444253418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246510 + }, + { + "epoch": 1.195577449795277, + "grad_norm": 1.8511659760633847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246520 + }, + { + "epoch": 1.195625947988113, + "grad_norm": 1.9677719009791872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246530 + }, + { + "epoch": 1.1956744461809492, + "grad_norm": 2.743315796749357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246540 + }, + { + "epoch": 1.1957229443737853, + "grad_norm": 1.9008728813219022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246550 + }, + { + "epoch": 1.1957714425666213, + "grad_norm": 2.163736390059512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246560 + }, + { + "epoch": 1.1958199407594574, + "grad_norm": 2.0267448164190682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246570 + }, + { + "epoch": 1.1958684389522936, + "grad_norm": 1.82945782967181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246580 + }, + { + "epoch": 1.1959169371451297, + "grad_norm": 2.685335331875649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246590 + }, + { + "epoch": 1.1959654353379658, + "grad_norm": 2.369067964025362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246600 + }, + { + "epoch": 1.1960139335308018, + "grad_norm": 2.3541465665744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246610 + }, + { + "epoch": 1.196062431723638, + "grad_norm": 3.228103651053971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246620 + }, + { + "epoch": 1.196110929916474, + "grad_norm": 2.4343965066009332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246630 + }, + { + "epoch": 1.19615942810931, + "grad_norm": 2.2212983452618573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246640 + }, + { + "epoch": 1.1962079263021461, + "grad_norm": 1.9541284146384896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246650 + }, + { + "epoch": 1.1962564244949823, + "grad_norm": 1.9992254962630795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246660 + }, + { + "epoch": 1.1963049226878184, + "grad_norm": 2.2357376394666062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246670 + }, + { + "epoch": 1.1963534208806546, + "grad_norm": 1.8571677529166664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246680 + }, + { + "epoch": 1.1964019190734905, + "grad_norm": 2.0759042484996826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246690 + }, + { + "epoch": 1.1964504172663266, + "grad_norm": 2.0891409491241575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246700 + }, + { + "epoch": 1.1964989154591628, + "grad_norm": 2.3324515652234368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246710 + }, + { + "epoch": 1.1965474136519987, + "grad_norm": 2.381803554385442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246720 + }, + { + "epoch": 1.1965959118448348, + "grad_norm": 1.8384101352353355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246730 + }, + { + "epoch": 1.196644410037671, + "grad_norm": 2.2902050034190324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246740 + }, + { + "epoch": 1.1966929082305071, + "grad_norm": 2.222640560489708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246750 + }, + { + "epoch": 1.1967414064233433, + "grad_norm": 2.5820666493814315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246760 + }, + { + "epoch": 1.1967899046161792, + "grad_norm": 1.8489931363774303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246770 + }, + { + "epoch": 1.1968384028090153, + "grad_norm": 1.9615294277741668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246780 + }, + { + "epoch": 1.1968869010018515, + "grad_norm": 3.090622868739956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246790 + }, + { + "epoch": 1.1969353991946874, + "grad_norm": 2.0265128242158426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246800 + }, + { + "epoch": 1.1969838973875235, + "grad_norm": 2.1385522686045988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246810 + }, + { + "epoch": 1.1970323955803597, + "grad_norm": 1.8251418154591192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246820 + }, + { + "epoch": 1.1970808937731958, + "grad_norm": 2.031742063479669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246830 + }, + { + "epoch": 1.197129391966032, + "grad_norm": 2.5386455604348157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246840 + }, + { + "epoch": 1.1971778901588679, + "grad_norm": 1.763595847137367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246850 + }, + { + "epoch": 1.197226388351704, + "grad_norm": 2.1039030073666254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246860 + }, + { + "epoch": 1.1972748865445402, + "grad_norm": 2.2348046968545532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246870 + }, + { + "epoch": 1.1973233847373763, + "grad_norm": 2.0138358536314627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246880 + }, + { + "epoch": 1.1973718829302122, + "grad_norm": 2.4899765804775598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246890 + }, + { + "epoch": 1.1974203811230484, + "grad_norm": 2.2641769348297203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246900 + }, + { + "epoch": 1.1974688793158845, + "grad_norm": 1.6903065613860235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246910 + }, + { + "epoch": 1.1975173775087207, + "grad_norm": 1.8250874589398336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246920 + }, + { + "epoch": 1.1975658757015566, + "grad_norm": 1.8535885715209588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246930 + }, + { + "epoch": 1.1976143738943927, + "grad_norm": 2.2739838456686812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246940 + }, + { + "epoch": 1.1976628720872289, + "grad_norm": 2.2312047320838246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246950 + }, + { + "epoch": 1.197711370280065, + "grad_norm": 2.2876747607369907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246960 + }, + { + "epoch": 1.197759868472901, + "grad_norm": 1.8466625562041372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246970 + }, + { + "epoch": 1.197808366665737, + "grad_norm": 2.2090544504749232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246980 + }, + { + "epoch": 1.1978568648585732, + "grad_norm": 2.0750931639668124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 246990 + }, + { + "epoch": 1.1979053630514094, + "grad_norm": 2.3767462664636696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247000 + }, + { + "epoch": 1.1979538612442453, + "grad_norm": 1.6737468300220826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247010 + }, + { + "epoch": 1.1980023594370814, + "grad_norm": 1.9091009662020042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247020 + }, + { + "epoch": 1.1980508576299176, + "grad_norm": 2.2815831002276354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247030 + }, + { + "epoch": 1.1980993558227537, + "grad_norm": 2.4861650516072586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247040 + }, + { + "epoch": 1.1981478540155897, + "grad_norm": 1.7337915991788577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247050 + }, + { + "epoch": 1.1981963522084258, + "grad_norm": 1.9337429435495324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247060 + }, + { + "epoch": 1.198244850401262, + "grad_norm": 2.280817668065538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247070 + }, + { + "epoch": 1.198293348594098, + "grad_norm": 2.0868242245342117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247080 + }, + { + "epoch": 1.198341846786934, + "grad_norm": 2.506482132957899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247090 + }, + { + "epoch": 1.1983903449797702, + "grad_norm": 1.7439280242115274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247100 + }, + { + "epoch": 1.1984388431726063, + "grad_norm": 2.293135104025623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247110 + }, + { + "epoch": 1.1984873413654424, + "grad_norm": 1.6946044567589524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247120 + }, + { + "epoch": 1.1985358395582786, + "grad_norm": 1.8921145539252393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247130 + }, + { + "epoch": 1.1985843377511145, + "grad_norm": 1.8532043455365965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247140 + }, + { + "epoch": 1.1986328359439506, + "grad_norm": 2.0078806173273733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247150 + }, + { + "epoch": 1.1986813341367868, + "grad_norm": 1.9240779636220395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247160 + }, + { + "epoch": 1.1987298323296227, + "grad_norm": 1.778647984451709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247170 + }, + { + "epoch": 1.1987783305224589, + "grad_norm": 1.882465738844985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247180 + }, + { + "epoch": 1.198826828715295, + "grad_norm": 2.7760579612845504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247190 + }, + { + "epoch": 1.1988753269081311, + "grad_norm": 2.5933625025231777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247200 + }, + { + "epoch": 1.1989238251009673, + "grad_norm": 2.3062822762653923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247210 + }, + { + "epoch": 1.1989723232938032, + "grad_norm": 1.7242442140741332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247220 + }, + { + "epoch": 1.1990208214866394, + "grad_norm": 1.8916253452516685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247230 + }, + { + "epoch": 1.1990693196794755, + "grad_norm": 2.148245492605838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247240 + }, + { + "epoch": 1.1991178178723114, + "grad_norm": 2.187780623330582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247250 + }, + { + "epoch": 1.1991663160651476, + "grad_norm": 2.634622653374663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247260 + }, + { + "epoch": 1.1992148142579837, + "grad_norm": 1.8680335500675938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247270 + }, + { + "epoch": 1.1992633124508199, + "grad_norm": 1.8357981801386813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247280 + }, + { + "epoch": 1.199311810643656, + "grad_norm": 2.2190492998674927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247290 + }, + { + "epoch": 1.199360308836492, + "grad_norm": 2.2022447865310824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247300 + }, + { + "epoch": 1.199408807029328, + "grad_norm": 2.0672263900678445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247310 + }, + { + "epoch": 1.1994573052221642, + "grad_norm": 1.916062153384246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247320 + }, + { + "epoch": 1.1995058034150001, + "grad_norm": 2.0176312176545252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247330 + }, + { + "epoch": 1.1995543016078363, + "grad_norm": 1.7912318739377042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247340 + }, + { + "epoch": 1.1996027998006724, + "grad_norm": 2.2449311742889222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247350 + }, + { + "epoch": 1.1996512979935086, + "grad_norm": 1.5424785004825026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247360 + }, + { + "epoch": 1.1996997961863447, + "grad_norm": 2.041649693751424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247370 + }, + { + "epoch": 1.1997482943791806, + "grad_norm": 1.7969441046261636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247380 + }, + { + "epoch": 1.1997967925720168, + "grad_norm": 2.0061067473875482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247390 + }, + { + "epoch": 1.199845290764853, + "grad_norm": 1.923095815925535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247400 + }, + { + "epoch": 1.199893788957689, + "grad_norm": 1.851405428965336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247410 + }, + { + "epoch": 1.199942287150525, + "grad_norm": 2.0645630982585317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247420 + }, + { + "epoch": 1.1999907853433611, + "grad_norm": 2.1594335208874327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247430 + }, + { + "epoch": 1.2000392835361973, + "grad_norm": 1.8556203684738648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247440 + }, + { + "epoch": 1.2000877817290334, + "grad_norm": 2.1042588116415573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247450 + }, + { + "epoch": 1.2001362799218693, + "grad_norm": 2.4207942317389097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247460 + }, + { + "epoch": 1.2001847781147055, + "grad_norm": 1.7111439376549242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247470 + }, + { + "epoch": 1.2002332763075416, + "grad_norm": 2.03462349190886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247480 + }, + { + "epoch": 1.2002817745003778, + "grad_norm": 2.143151256461806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247490 + }, + { + "epoch": 1.2003302726932137, + "grad_norm": 1.744767352818144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247500 + }, + { + "epoch": 1.2003787708860498, + "grad_norm": 2.3356243161742896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247510 + }, + { + "epoch": 1.200427269078886, + "grad_norm": 1.7007764085974486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247520 + }, + { + "epoch": 1.2004757672717221, + "grad_norm": 1.9996161171320637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247530 + }, + { + "epoch": 1.200524265464558, + "grad_norm": 1.9587364619155778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247540 + }, + { + "epoch": 1.2005727636573942, + "grad_norm": 2.4895264516544557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247550 + }, + { + "epoch": 1.2006212618502303, + "grad_norm": 2.0553143187385103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247560 + }, + { + "epoch": 1.2006697600430665, + "grad_norm": 1.8034063131722178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247570 + }, + { + "epoch": 1.2007182582359024, + "grad_norm": 1.5713414569518136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247580 + }, + { + "epoch": 1.2007667564287385, + "grad_norm": 1.9641165138750694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247590 + }, + { + "epoch": 1.2008152546215747, + "grad_norm": 1.9046813903855764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247600 + }, + { + "epoch": 1.2008637528144108, + "grad_norm": 2.1006735906325957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247610 + }, + { + "epoch": 1.2009122510072467, + "grad_norm": 1.9194196454463963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247620 + }, + { + "epoch": 1.2009607492000829, + "grad_norm": 1.910531644000457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247630 + }, + { + "epoch": 1.201009247392919, + "grad_norm": 2.3468789578373617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247640 + }, + { + "epoch": 1.2010577455857552, + "grad_norm": 1.873050337053428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247650 + }, + { + "epoch": 1.2011062437785913, + "grad_norm": 1.6869007524178414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247660 + }, + { + "epoch": 1.2011547419714272, + "grad_norm": 1.722950848659366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247670 + }, + { + "epoch": 1.2012032401642634, + "grad_norm": 2.0851066651061956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247680 + }, + { + "epoch": 1.2012517383570995, + "grad_norm": 2.0416159429714753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247690 + }, + { + "epoch": 1.2013002365499355, + "grad_norm": 2.0668441180760055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247700 + }, + { + "epoch": 1.2013487347427716, + "grad_norm": 2.1767998958921453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247710 + }, + { + "epoch": 1.2013972329356077, + "grad_norm": 1.8885955910263874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247720 + }, + { + "epoch": 1.2014457311284439, + "grad_norm": 1.803864257965415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247730 + }, + { + "epoch": 1.20149422932128, + "grad_norm": 2.039037028112034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247740 + }, + { + "epoch": 1.201542727514116, + "grad_norm": 2.282241950979369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247750 + }, + { + "epoch": 1.201591225706952, + "grad_norm": 2.3084702149844816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247760 + }, + { + "epoch": 1.2016397238997882, + "grad_norm": 1.722067466403132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247770 + }, + { + "epoch": 1.2016882220926242, + "grad_norm": 1.941645955128024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247780 + }, + { + "epoch": 1.2017367202854603, + "grad_norm": 1.804220417511715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247790 + }, + { + "epoch": 1.2017852184782964, + "grad_norm": 1.9050043320589793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247800 + }, + { + "epoch": 1.2018337166711326, + "grad_norm": 1.5950597287428536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247810 + }, + { + "epoch": 1.2018822148639687, + "grad_norm": 2.1827442964195143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247820 + }, + { + "epoch": 1.2019307130568047, + "grad_norm": 1.8969386061939986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247830 + }, + { + "epoch": 1.2019792112496408, + "grad_norm": 1.6561280347104912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247840 + }, + { + "epoch": 1.202027709442477, + "grad_norm": 2.3326633069586933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247850 + }, + { + "epoch": 1.2020762076353129, + "grad_norm": 1.8731789452886005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247860 + }, + { + "epoch": 1.202124705828149, + "grad_norm": 1.741592647874768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247870 + }, + { + "epoch": 1.2021732040209852, + "grad_norm": 2.292350842481028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247880 + }, + { + "epoch": 1.2022217022138213, + "grad_norm": 1.6573446615097964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247890 + }, + { + "epoch": 1.2022702004066574, + "grad_norm": 1.9899404790635344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247900 + }, + { + "epoch": 1.2023186985994934, + "grad_norm": 1.818555439569991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247910 + }, + { + "epoch": 1.2023671967923295, + "grad_norm": 1.5219891125184404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247920 + }, + { + "epoch": 1.2024156949851657, + "grad_norm": 1.9665126416157364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247930 + }, + { + "epoch": 1.2024641931780018, + "grad_norm": 2.42938877903498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247940 + }, + { + "epoch": 1.2025126913708377, + "grad_norm": 2.0048734228339526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247950 + }, + { + "epoch": 1.2025611895636739, + "grad_norm": 1.871220156601794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247960 + }, + { + "epoch": 1.20260968775651, + "grad_norm": 2.09928696648376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247970 + }, + { + "epoch": 1.2026581859493461, + "grad_norm": 5.219329068495426e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 247980 + }, + { + "epoch": 1.202706684142182, + "grad_norm": 4.309924406697974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 247990 + }, + { + "epoch": 1.2027551823350182, + "grad_norm": 0.00022024310601409525, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 248000 + }, + { + "epoch": 1.2028036805278544, + "grad_norm": 0.002534642582759261, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 248010 + }, + { + "epoch": 1.2028521787206905, + "grad_norm": 0.0005599482683464885, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248020 + }, + { + "epoch": 1.2029006769135264, + "grad_norm": 5.776131365564652e-05, + "learning_rate": 0.0002, + "loss": 0.005, + "step": 248030 + }, + { + "epoch": 1.2029491751063626, + "grad_norm": 5.2220540965208784e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248040 + }, + { + "epoch": 1.2029976732991987, + "grad_norm": 0.00011626447667367756, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248050 + }, + { + "epoch": 1.2030461714920349, + "grad_norm": 5.910659183427924e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 248060 + }, + { + "epoch": 1.2030946696848708, + "grad_norm": 1.4799037671764381e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248070 + }, + { + "epoch": 1.203143167877707, + "grad_norm": 7.742353773210198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248080 + }, + { + "epoch": 1.203191666070543, + "grad_norm": 3.4617439723660937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248090 + }, + { + "epoch": 1.2032401642633792, + "grad_norm": 7.410003036056878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248100 + }, + { + "epoch": 1.2032886624562151, + "grad_norm": 5.616142061626306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248110 + }, + { + "epoch": 1.2033371606490513, + "grad_norm": 7.5903285505773965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248120 + }, + { + "epoch": 1.2033856588418874, + "grad_norm": 5.059300747234374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248130 + }, + { + "epoch": 1.2034341570347236, + "grad_norm": 7.607305633428041e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248140 + }, + { + "epoch": 1.2034826552275595, + "grad_norm": 4.655849352275254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248150 + }, + { + "epoch": 1.2035311534203956, + "grad_norm": 3.494634074741043e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 248160 + }, + { + "epoch": 1.2035796516132318, + "grad_norm": 4.21022305090446e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248170 + }, + { + "epoch": 1.203628149806068, + "grad_norm": 4.730173168354668e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 248180 + }, + { + "epoch": 1.203676647998904, + "grad_norm": 0.00010290499631082639, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248190 + }, + { + "epoch": 1.20372514619174, + "grad_norm": 5.7226570788770914e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248200 + }, + { + "epoch": 1.2037736443845761, + "grad_norm": 7.499583261960652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248210 + }, + { + "epoch": 1.2038221425774123, + "grad_norm": 6.492035936389584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248220 + }, + { + "epoch": 1.2038706407702482, + "grad_norm": 5.406579930422595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248230 + }, + { + "epoch": 1.2039191389630843, + "grad_norm": 3.1212441626848886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248240 + }, + { + "epoch": 1.2039676371559205, + "grad_norm": 6.208398644957924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248250 + }, + { + "epoch": 1.2040161353487566, + "grad_norm": 5.341014912119135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248260 + }, + { + "epoch": 1.2040646335415928, + "grad_norm": 6.121563728811452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248270 + }, + { + "epoch": 1.2041131317344287, + "grad_norm": 5.506666639121249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248280 + }, + { + "epoch": 1.2041616299272648, + "grad_norm": 2.7332462195772678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248290 + }, + { + "epoch": 1.204210128120101, + "grad_norm": 4.363405878393678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248300 + }, + { + "epoch": 1.204258626312937, + "grad_norm": 6.020921318850014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248310 + }, + { + "epoch": 1.204307124505773, + "grad_norm": 4.376017386675812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248320 + }, + { + "epoch": 1.2043556226986092, + "grad_norm": 3.2834284411364933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248330 + }, + { + "epoch": 1.2044041208914453, + "grad_norm": 2.263768010379863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248340 + }, + { + "epoch": 1.2044526190842815, + "grad_norm": 4.051059022458503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248350 + }, + { + "epoch": 1.2045011172771174, + "grad_norm": 3.451257498454652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248360 + }, + { + "epoch": 1.2045496154699535, + "grad_norm": 4.040288786200108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248370 + }, + { + "epoch": 1.2045981136627897, + "grad_norm": 2.759514472927549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248380 + }, + { + "epoch": 1.2046466118556258, + "grad_norm": 3.997917701781262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248390 + }, + { + "epoch": 1.2046951100484617, + "grad_norm": 3.455790420048288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248400 + }, + { + "epoch": 1.204743608241298, + "grad_norm": 2.9348834686970804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248410 + }, + { + "epoch": 1.204792106434134, + "grad_norm": 2.711450406422955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248420 + }, + { + "epoch": 1.2048406046269702, + "grad_norm": 2.594391162347165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248430 + }, + { + "epoch": 1.204889102819806, + "grad_norm": 2.6688928755902452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248440 + }, + { + "epoch": 1.2049376010126422, + "grad_norm": 2.3227053134178277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248450 + }, + { + "epoch": 1.2049860992054784, + "grad_norm": 2.4050689262367086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248460 + }, + { + "epoch": 1.2050345973983145, + "grad_norm": 3.1087051866052207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248470 + }, + { + "epoch": 1.2050830955911505, + "grad_norm": 2.237665057691629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248480 + }, + { + "epoch": 1.2051315937839866, + "grad_norm": 1.4180290008880547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248490 + }, + { + "epoch": 1.2051800919768227, + "grad_norm": 2.5171736979245907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248500 + }, + { + "epoch": 1.2052285901696589, + "grad_norm": 2.2200908915692708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248510 + }, + { + "epoch": 1.2052770883624948, + "grad_norm": 2.8831998406531056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248520 + }, + { + "epoch": 1.205325586555331, + "grad_norm": 2.0251186469977256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248530 + }, + { + "epoch": 1.205374084748167, + "grad_norm": 4.728623480332317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248540 + }, + { + "epoch": 1.2054225829410032, + "grad_norm": 2.1489292976184515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248550 + }, + { + "epoch": 1.2054710811338392, + "grad_norm": 2.2399742647394305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248560 + }, + { + "epoch": 1.2055195793266753, + "grad_norm": 2.0465636225708295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248570 + }, + { + "epoch": 1.2055680775195114, + "grad_norm": 2.1014059257140616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248580 + }, + { + "epoch": 1.2056165757123476, + "grad_norm": 1.2969677527507883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248590 + }, + { + "epoch": 1.2056650739051835, + "grad_norm": 6.3693273659737315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248600 + }, + { + "epoch": 1.2057135720980197, + "grad_norm": 1.8885370991483796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248610 + }, + { + "epoch": 1.2057620702908558, + "grad_norm": 1.912442030516104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248620 + }, + { + "epoch": 1.205810568483692, + "grad_norm": 5.288050033414038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248630 + }, + { + "epoch": 1.205859066676528, + "grad_norm": 1.262987666450499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248640 + }, + { + "epoch": 1.205907564869364, + "grad_norm": 1.951716058101738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248650 + }, + { + "epoch": 1.2059560630622002, + "grad_norm": 2.5431379526708042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248660 + }, + { + "epoch": 1.2060045612550363, + "grad_norm": 2.0883153410977684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248670 + }, + { + "epoch": 1.2060530594478722, + "grad_norm": 1.6654685168759897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248680 + }, + { + "epoch": 1.2061015576407084, + "grad_norm": 1.0858854011530639e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248690 + }, + { + "epoch": 1.2061500558335445, + "grad_norm": 1.7016446918205475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248700 + }, + { + "epoch": 1.2061985540263807, + "grad_norm": 1.6640860849292949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248710 + }, + { + "epoch": 1.2062470522192168, + "grad_norm": 3.094944304393721e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248720 + }, + { + "epoch": 1.2062955504120527, + "grad_norm": 1.4363138234330108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248730 + }, + { + "epoch": 1.2063440486048889, + "grad_norm": 5.2943519222026225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248740 + }, + { + "epoch": 1.206392546797725, + "grad_norm": 1.4916341797288624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248750 + }, + { + "epoch": 1.206441044990561, + "grad_norm": 1.5503092072322033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248760 + }, + { + "epoch": 1.206489543183397, + "grad_norm": 1.3826106624037493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248770 + }, + { + "epoch": 1.2065380413762332, + "grad_norm": 1.804803673621791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248780 + }, + { + "epoch": 1.2065865395690694, + "grad_norm": 9.391924891133385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248790 + }, + { + "epoch": 1.2066350377619055, + "grad_norm": 1.3799762200505938e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248800 + }, + { + "epoch": 1.2066835359547414, + "grad_norm": 1.4202867532731034e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 248810 + }, + { + "epoch": 1.2067320341475776, + "grad_norm": 0.00025079911574721336, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 248820 + }, + { + "epoch": 1.2067805323404137, + "grad_norm": 0.039907991886138916, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 248830 + }, + { + "epoch": 1.2068290305332496, + "grad_norm": 7.277626082213828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248840 + }, + { + "epoch": 1.2068775287260858, + "grad_norm": 1.4855370864097495e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 248850 + }, + { + "epoch": 1.206926026918922, + "grad_norm": 2.1330788513296284e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 248860 + }, + { + "epoch": 1.206974525111758, + "grad_norm": 0.0037567538674920797, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 248870 + }, + { + "epoch": 1.2070230233045942, + "grad_norm": 2.254636274301447e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248880 + }, + { + "epoch": 1.2070715214974301, + "grad_norm": 8.918269486457575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248890 + }, + { + "epoch": 1.2071200196902663, + "grad_norm": 1.5008206901256926e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248900 + }, + { + "epoch": 1.2071685178831024, + "grad_norm": 1.045851968228817e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248910 + }, + { + "epoch": 1.2072170160759386, + "grad_norm": 9.467897143622395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248920 + }, + { + "epoch": 1.2072655142687745, + "grad_norm": 6.723502337990794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248930 + }, + { + "epoch": 1.2073140124616106, + "grad_norm": 4.254926352587063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248940 + }, + { + "epoch": 1.2073625106544468, + "grad_norm": 6.217851478140801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248950 + }, + { + "epoch": 1.207411008847283, + "grad_norm": 5.931815394433215e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248960 + }, + { + "epoch": 1.2074595070401188, + "grad_norm": 4.340133727964712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248970 + }, + { + "epoch": 1.207508005232955, + "grad_norm": 4.6523236960638314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248980 + }, + { + "epoch": 1.2075565034257911, + "grad_norm": 3.2011914754548343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 248990 + }, + { + "epoch": 1.2076050016186273, + "grad_norm": 4.308112238504691e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249000 + }, + { + "epoch": 1.2076534998114632, + "grad_norm": 3.978991571784718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249010 + }, + { + "epoch": 1.2077019980042993, + "grad_norm": 3.888602350343717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249020 + }, + { + "epoch": 1.2077504961971355, + "grad_norm": 2.366257831454277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249030 + }, + { + "epoch": 1.2077989943899716, + "grad_norm": 1.855783921200782e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249040 + }, + { + "epoch": 1.2078474925828075, + "grad_norm": 3.155542799504474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249050 + }, + { + "epoch": 1.2078959907756437, + "grad_norm": 2.6833404263015836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249060 + }, + { + "epoch": 1.2079444889684798, + "grad_norm": 2.8922611363668693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249070 + }, + { + "epoch": 1.207992987161316, + "grad_norm": 2.185021912737284e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249080 + }, + { + "epoch": 1.208041485354152, + "grad_norm": 1.410856043548847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249090 + }, + { + "epoch": 1.208089983546988, + "grad_norm": 2.826420313795097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249100 + }, + { + "epoch": 1.2081384817398242, + "grad_norm": 2.303883775311988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249110 + }, + { + "epoch": 1.2081869799326603, + "grad_norm": 2.517402208468411e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249120 + }, + { + "epoch": 1.2082354781254963, + "grad_norm": 2.2982890186540317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249130 + }, + { + "epoch": 1.2082839763183324, + "grad_norm": 1.553365677864349e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249140 + }, + { + "epoch": 1.2083324745111685, + "grad_norm": 1.9519097804732155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249150 + }, + { + "epoch": 1.2083809727040047, + "grad_norm": 2.2291969798970968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249160 + }, + { + "epoch": 1.2084294708968408, + "grad_norm": 1.942653170772246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249170 + }, + { + "epoch": 1.2084779690896768, + "grad_norm": 1.8959333374368725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249180 + }, + { + "epoch": 1.208526467282513, + "grad_norm": 1.1326644653308904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249190 + }, + { + "epoch": 1.208574965475349, + "grad_norm": 1.9022792230316554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249200 + }, + { + "epoch": 1.208623463668185, + "grad_norm": 1.8110281416738871e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249210 + }, + { + "epoch": 1.208671961861021, + "grad_norm": 1.6937573263930972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249220 + }, + { + "epoch": 1.2087204600538572, + "grad_norm": 1.8569916164778988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249230 + }, + { + "epoch": 1.2087689582466934, + "grad_norm": 1.082184553524712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249240 + }, + { + "epoch": 1.2088174564395295, + "grad_norm": 1.8225973690277897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249250 + }, + { + "epoch": 1.2088659546323655, + "grad_norm": 1.9158967461407883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249260 + }, + { + "epoch": 1.2089144528252016, + "grad_norm": 1.5112069604583667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249270 + }, + { + "epoch": 1.2089629510180377, + "grad_norm": 1.719749207040877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249280 + }, + { + "epoch": 1.2090114492108737, + "grad_norm": 8.794244195087231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249290 + }, + { + "epoch": 1.2090599474037098, + "grad_norm": 1.5621016018485534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249300 + }, + { + "epoch": 1.209108445596546, + "grad_norm": 1.622486252017552e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249310 + }, + { + "epoch": 1.209156943789382, + "grad_norm": 1.2538223472802201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249320 + }, + { + "epoch": 1.2092054419822182, + "grad_norm": 1.3312459259395837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249330 + }, + { + "epoch": 1.2092539401750542, + "grad_norm": 8.351332212441775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249340 + }, + { + "epoch": 1.2093024383678903, + "grad_norm": 1.9738920400413917e-06, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 249350 + }, + { + "epoch": 1.2093509365607265, + "grad_norm": 5.941335984971374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249360 + }, + { + "epoch": 1.2093994347535624, + "grad_norm": 8.529475962859578e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249370 + }, + { + "epoch": 1.2094479329463985, + "grad_norm": 8.091960808087606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249380 + }, + { + "epoch": 1.2094964311392347, + "grad_norm": 1.687629264779389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249390 + }, + { + "epoch": 1.2095449293320708, + "grad_norm": 9.106115612667054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249400 + }, + { + "epoch": 1.209593427524907, + "grad_norm": 8.862020877131727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249410 + }, + { + "epoch": 1.2096419257177429, + "grad_norm": 1.0686148016247898e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249420 + }, + { + "epoch": 1.209690423910579, + "grad_norm": 7.594006092404015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249430 + }, + { + "epoch": 1.2097389221034152, + "grad_norm": 1.7169320472021354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249440 + }, + { + "epoch": 1.2097874202962513, + "grad_norm": 7.15017949914909e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249450 + }, + { + "epoch": 1.2098359184890872, + "grad_norm": 6.818924703111406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249460 + }, + { + "epoch": 1.2098844166819234, + "grad_norm": 5.6728104027570225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249470 + }, + { + "epoch": 1.2099329148747595, + "grad_norm": 5.3976250455889385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249480 + }, + { + "epoch": 1.2099814130675957, + "grad_norm": 1.366457695439749e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249490 + }, + { + "epoch": 1.2100299112604316, + "grad_norm": 4.995941708330065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249500 + }, + { + "epoch": 1.2100784094532677, + "grad_norm": 4.783014901477145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249510 + }, + { + "epoch": 1.2101269076461039, + "grad_norm": 4.792146683030296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249520 + }, + { + "epoch": 1.21017540583894, + "grad_norm": 3.860641754727112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249530 + }, + { + "epoch": 1.210223904031776, + "grad_norm": 1.0906076113315066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249540 + }, + { + "epoch": 1.210272402224612, + "grad_norm": 3.6028700378665235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249550 + }, + { + "epoch": 1.2103209004174482, + "grad_norm": 3.7836841784155695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249560 + }, + { + "epoch": 1.2103693986102844, + "grad_norm": 3.3275073292315938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249570 + }, + { + "epoch": 1.2104178968031203, + "grad_norm": 2.8910644687130116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249580 + }, + { + "epoch": 1.2104663949959564, + "grad_norm": 9.820763580137282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249590 + }, + { + "epoch": 1.2105148931887926, + "grad_norm": 3.356116849317914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249600 + }, + { + "epoch": 1.2105633913816287, + "grad_norm": 3.1187603326543467e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249610 + }, + { + "epoch": 1.2106118895744646, + "grad_norm": 3.1675476748205256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249620 + }, + { + "epoch": 1.2106603877673008, + "grad_norm": 2.05789842766535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249630 + }, + { + "epoch": 1.210708885960137, + "grad_norm": 9.475774618294963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249640 + }, + { + "epoch": 1.210757384152973, + "grad_norm": 2.5183189791277982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249650 + }, + { + "epoch": 1.210805882345809, + "grad_norm": 2.4523876618331997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249660 + }, + { + "epoch": 1.2108543805386451, + "grad_norm": 2.3495319965149974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249670 + }, + { + "epoch": 1.2109028787314813, + "grad_norm": 1.7629655530981836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249680 + }, + { + "epoch": 1.2109513769243174, + "grad_norm": 7.833587005734444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249690 + }, + { + "epoch": 1.2109998751171536, + "grad_norm": 2.026982656389009e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249700 + }, + { + "epoch": 1.2110483733099895, + "grad_norm": 2.0916681933158543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249710 + }, + { + "epoch": 1.2110968715028256, + "grad_norm": 2.0640195543819573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249720 + }, + { + "epoch": 1.2111453696956618, + "grad_norm": 1.7973055719266995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249730 + }, + { + "epoch": 1.2111938678884977, + "grad_norm": 8.157024922184064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249740 + }, + { + "epoch": 1.2112423660813338, + "grad_norm": 1.8565511936685652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249750 + }, + { + "epoch": 1.21129086427417, + "grad_norm": 1.7351279666399932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249760 + }, + { + "epoch": 1.2113393624670061, + "grad_norm": 2.087446091536549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249770 + }, + { + "epoch": 1.2113878606598423, + "grad_norm": 2.4907161787268706e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249780 + }, + { + "epoch": 1.2114363588526782, + "grad_norm": 6.427423500099394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249790 + }, + { + "epoch": 1.2114848570455143, + "grad_norm": 1.6889820244614384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249800 + }, + { + "epoch": 1.2115333552383505, + "grad_norm": 1.652854734857101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249810 + }, + { + "epoch": 1.2115818534311864, + "grad_norm": 1.5195861351458007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249820 + }, + { + "epoch": 1.2116303516240226, + "grad_norm": 1.0020980880653951e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249830 + }, + { + "epoch": 1.2116788498168587, + "grad_norm": 6.601095492442255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249840 + }, + { + "epoch": 1.2117273480096948, + "grad_norm": 1.4754000403627288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249850 + }, + { + "epoch": 1.211775846202531, + "grad_norm": 1.3781187817585305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249860 + }, + { + "epoch": 1.211824344395367, + "grad_norm": 1.3029941783315735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249870 + }, + { + "epoch": 1.211872842588203, + "grad_norm": 1.4772670056117931e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249880 + }, + { + "epoch": 1.2119213407810392, + "grad_norm": 6.1739262946503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249890 + }, + { + "epoch": 1.2119698389738751, + "grad_norm": 1.849400973696902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249900 + }, + { + "epoch": 1.2120183371667113, + "grad_norm": 1.3567176893047872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249910 + }, + { + "epoch": 1.2120668353595474, + "grad_norm": 1.3583305644715438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249920 + }, + { + "epoch": 1.2121153335523835, + "grad_norm": 1.2501044466262101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249930 + }, + { + "epoch": 1.2121638317452197, + "grad_norm": 5.863504952685616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249940 + }, + { + "epoch": 1.2122123299380556, + "grad_norm": 1.1082086075475672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249950 + }, + { + "epoch": 1.2122608281308918, + "grad_norm": 1.2481810927056358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249960 + }, + { + "epoch": 1.212309326323728, + "grad_norm": 1.0920097111011273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249970 + }, + { + "epoch": 1.212357824516564, + "grad_norm": 9.994474794439157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249980 + }, + { + "epoch": 1.2124063227094, + "grad_norm": 5.396994993134285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 249990 + }, + { + "epoch": 1.212454820902236, + "grad_norm": 1.1663996701827273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250000 + }, + { + "epoch": 1.2125033190950723, + "grad_norm": 1.1065786793551524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250010 + }, + { + "epoch": 1.2125518172879084, + "grad_norm": 1.1439489071563003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250020 + }, + { + "epoch": 1.2126003154807443, + "grad_norm": 1.0066619324788917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250030 + }, + { + "epoch": 1.2126488136735805, + "grad_norm": 5.332630621524004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250040 + }, + { + "epoch": 1.2126973118664166, + "grad_norm": 1.0604347835396766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250050 + }, + { + "epoch": 1.2127458100592527, + "grad_norm": 1.0614639904815704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250060 + }, + { + "epoch": 1.2127943082520887, + "grad_norm": 1.0183634913119022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250070 + }, + { + "epoch": 1.2128428064449248, + "grad_norm": 9.187893965645344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250080 + }, + { + "epoch": 1.212891304637761, + "grad_norm": 4.741436328004056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250090 + }, + { + "epoch": 1.212939802830597, + "grad_norm": 1.19304957024724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250100 + }, + { + "epoch": 1.212988301023433, + "grad_norm": 9.293798370890727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250110 + }, + { + "epoch": 1.2130367992162692, + "grad_norm": 8.72595080636529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250120 + }, + { + "epoch": 1.2130852974091053, + "grad_norm": 7.708914040449599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250130 + }, + { + "epoch": 1.2131337956019415, + "grad_norm": 4.848452022088168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250140 + }, + { + "epoch": 1.2131822937947774, + "grad_norm": 8.603697096987162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250150 + }, + { + "epoch": 1.2132307919876135, + "grad_norm": 1.9931972019548994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250160 + }, + { + "epoch": 1.2132792901804497, + "grad_norm": 8.3470547451725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250170 + }, + { + "epoch": 1.2133277883732858, + "grad_norm": 9.336377502222604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250180 + }, + { + "epoch": 1.2133762865661217, + "grad_norm": 4.799348971573636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250190 + }, + { + "epoch": 1.2134247847589579, + "grad_norm": 9.223671781910525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250200 + }, + { + "epoch": 1.213473282951794, + "grad_norm": 8.803131663626118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250210 + }, + { + "epoch": 1.2135217811446302, + "grad_norm": 8.864714118317352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250220 + }, + { + "epoch": 1.2135702793374663, + "grad_norm": 6.565439889527624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250230 + }, + { + "epoch": 1.2136187775303022, + "grad_norm": 4.0189260630540957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250240 + }, + { + "epoch": 1.2136672757231384, + "grad_norm": 8.448134281024977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250250 + }, + { + "epoch": 1.2137157739159745, + "grad_norm": 8.736706718082132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250260 + }, + { + "epoch": 1.2137642721088104, + "grad_norm": 8.0290396908822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250270 + }, + { + "epoch": 1.2138127703016466, + "grad_norm": 8.361569712178607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250280 + }, + { + "epoch": 1.2138612684944827, + "grad_norm": 4.438708174347994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250290 + }, + { + "epoch": 1.2139097666873189, + "grad_norm": 7.422635235343478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250300 + }, + { + "epoch": 1.213958264880155, + "grad_norm": 7.333316034419113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250310 + }, + { + "epoch": 1.214006763072991, + "grad_norm": 6.822363616265648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250320 + }, + { + "epoch": 1.214055261265827, + "grad_norm": 6.642412131441233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250330 + }, + { + "epoch": 1.2141037594586632, + "grad_norm": 3.8565104887311463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250340 + }, + { + "epoch": 1.2141522576514991, + "grad_norm": 6.676181101283873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250350 + }, + { + "epoch": 1.2142007558443353, + "grad_norm": 6.960776772757526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250360 + }, + { + "epoch": 1.2142492540371714, + "grad_norm": 7.00620375937433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250370 + }, + { + "epoch": 1.2142977522300076, + "grad_norm": 6.688953817501897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250380 + }, + { + "epoch": 1.2143462504228437, + "grad_norm": 3.887690240844677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250390 + }, + { + "epoch": 1.2143947486156796, + "grad_norm": 1.0690806675484055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250400 + }, + { + "epoch": 1.2144432468085158, + "grad_norm": 1.1824477041955106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250410 + }, + { + "epoch": 1.214491745001352, + "grad_norm": 9.252242421098344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250420 + }, + { + "epoch": 1.2145402431941879, + "grad_norm": 6.753916750312783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250430 + }, + { + "epoch": 1.214588741387024, + "grad_norm": 3.5533469144866103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250440 + }, + { + "epoch": 1.2146372395798601, + "grad_norm": 6.729937354066351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250450 + }, + { + "epoch": 1.2146857377726963, + "grad_norm": 6.713072480124538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250460 + }, + { + "epoch": 1.2147342359655324, + "grad_norm": 6.432540544665244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250470 + }, + { + "epoch": 1.2147827341583683, + "grad_norm": 4.6310697143781e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250480 + }, + { + "epoch": 1.2148312323512045, + "grad_norm": 3.43219255682925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250490 + }, + { + "epoch": 1.2148797305440406, + "grad_norm": 6.02124089255085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250500 + }, + { + "epoch": 1.2149282287368768, + "grad_norm": 7.072141556818679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250510 + }, + { + "epoch": 1.2149767269297127, + "grad_norm": 6.198775963639491e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250520 + }, + { + "epoch": 1.2150252251225488, + "grad_norm": 5.004923195883748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250530 + }, + { + "epoch": 1.215073723315385, + "grad_norm": 3.2252319215331227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250540 + }, + { + "epoch": 1.2151222215082211, + "grad_norm": 5.493063213179994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250550 + }, + { + "epoch": 1.215170719701057, + "grad_norm": 5.815230110783887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250560 + }, + { + "epoch": 1.2152192178938932, + "grad_norm": 6.41433246073575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250570 + }, + { + "epoch": 1.2152677160867293, + "grad_norm": 5.473385158438759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250580 + }, + { + "epoch": 1.2153162142795655, + "grad_norm": 3.6404779280019284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250590 + }, + { + "epoch": 1.2153647124724014, + "grad_norm": 5.736312118642672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250600 + }, + { + "epoch": 1.2154132106652376, + "grad_norm": 5.39589450454514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250610 + }, + { + "epoch": 1.2154617088580737, + "grad_norm": 7.960770176396181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250620 + }, + { + "epoch": 1.2155102070509098, + "grad_norm": 5.200092232371389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250630 + }, + { + "epoch": 1.2155587052437458, + "grad_norm": 3.3905223517649574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250640 + }, + { + "epoch": 1.215607203436582, + "grad_norm": 5.58452995846892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250650 + }, + { + "epoch": 1.215655701629418, + "grad_norm": 1.2996073337490088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250660 + }, + { + "epoch": 1.2157041998222542, + "grad_norm": 5.418101522991492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250670 + }, + { + "epoch": 1.2157526980150901, + "grad_norm": 6.070449103390274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250680 + }, + { + "epoch": 1.2158011962079263, + "grad_norm": 2.935063321274356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250690 + }, + { + "epoch": 1.2158496944007624, + "grad_norm": 5.16598390731815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250700 + }, + { + "epoch": 1.2158981925935985, + "grad_norm": 5.446426598609833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250710 + }, + { + "epoch": 1.2159466907864345, + "grad_norm": 5.468705808198138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250720 + }, + { + "epoch": 1.2159951889792706, + "grad_norm": 4.835059712604561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250730 + }, + { + "epoch": 1.2160436871721068, + "grad_norm": 2.872861557534634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250740 + }, + { + "epoch": 1.216092185364943, + "grad_norm": 5.711133894692466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250750 + }, + { + "epoch": 1.216140683557779, + "grad_norm": 5.176184458832722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250760 + }, + { + "epoch": 1.216189181750615, + "grad_norm": 5.013694703848159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250770 + }, + { + "epoch": 1.216237679943451, + "grad_norm": 4.25063575448803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250780 + }, + { + "epoch": 1.2162861781362873, + "grad_norm": 2.923945885413559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250790 + }, + { + "epoch": 1.2163346763291232, + "grad_norm": 4.789170020558231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250800 + }, + { + "epoch": 1.2163831745219593, + "grad_norm": 5.185727900425263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250810 + }, + { + "epoch": 1.2164316727147955, + "grad_norm": 4.459680553736689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250820 + }, + { + "epoch": 1.2164801709076316, + "grad_norm": 4.4606997562368633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250830 + }, + { + "epoch": 1.2165286691004678, + "grad_norm": 2.6571856892587675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250840 + }, + { + "epoch": 1.2165771672933037, + "grad_norm": 4.7624004650970164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250850 + }, + { + "epoch": 1.2166256654861398, + "grad_norm": 4.3568584828790335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250860 + }, + { + "epoch": 1.216674163678976, + "grad_norm": 5.702752901015629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250870 + }, + { + "epoch": 1.2167226618718119, + "grad_norm": 4.880103574578243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250880 + }, + { + "epoch": 1.216771160064648, + "grad_norm": 2.626897526170069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250890 + }, + { + "epoch": 1.2168196582574842, + "grad_norm": 4.940268354403088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250900 + }, + { + "epoch": 1.2168681564503203, + "grad_norm": 4.303434764096892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250910 + }, + { + "epoch": 1.2169166546431565, + "grad_norm": 4.162716606970207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250920 + }, + { + "epoch": 1.2169651528359924, + "grad_norm": 3.8903056065464625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250930 + }, + { + "epoch": 1.2170136510288285, + "grad_norm": 2.3990367026271997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250940 + }, + { + "epoch": 1.2170621492216647, + "grad_norm": 4.171744762970775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250950 + }, + { + "epoch": 1.2171106474145008, + "grad_norm": 4.63447975107556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250960 + }, + { + "epoch": 1.2171591456073367, + "grad_norm": 5.574577244260581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250970 + }, + { + "epoch": 1.2172076438001729, + "grad_norm": 4.5351521293923724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250980 + }, + { + "epoch": 1.217256141993009, + "grad_norm": 3.0112033755358425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 250990 + }, + { + "epoch": 1.2173046401858452, + "grad_norm": 7.451433816640929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251000 + }, + { + "epoch": 1.217353138378681, + "grad_norm": 4.017398680389306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251010 + }, + { + "epoch": 1.2174016365715172, + "grad_norm": 3.685499621042254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251020 + }, + { + "epoch": 1.2174501347643534, + "grad_norm": 4.345060631294473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251030 + }, + { + "epoch": 1.2174986329571895, + "grad_norm": 2.2842905877951125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251040 + }, + { + "epoch": 1.2175471311500254, + "grad_norm": 3.9623881775696645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251050 + }, + { + "epoch": 1.2175956293428616, + "grad_norm": 3.528901686422614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251060 + }, + { + "epoch": 1.2176441275356977, + "grad_norm": 3.4919500535579573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251070 + }, + { + "epoch": 1.2176926257285339, + "grad_norm": 1.0899722838075832e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251080 + }, + { + "epoch": 1.2177411239213698, + "grad_norm": 2.4799012976473023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251090 + }, + { + "epoch": 1.217789622114206, + "grad_norm": 3.813129580976238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251100 + }, + { + "epoch": 1.217838120307042, + "grad_norm": 4.081686313384125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251110 + }, + { + "epoch": 1.2178866184998782, + "grad_norm": 3.86410079045163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251120 + }, + { + "epoch": 1.2179351166927141, + "grad_norm": 2.8837825993832666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251130 + }, + { + "epoch": 1.2179836148855503, + "grad_norm": 2.3035860863274138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251140 + }, + { + "epoch": 1.2180321130783864, + "grad_norm": 3.531838785875152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251150 + }, + { + "epoch": 1.2180806112712226, + "grad_norm": 3.8020772308300366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251160 + }, + { + "epoch": 1.2181291094640585, + "grad_norm": 3.6757273846887983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251170 + }, + { + "epoch": 1.2181776076568946, + "grad_norm": 3.798575107794022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251180 + }, + { + "epoch": 1.2182261058497308, + "grad_norm": 2.7007726544070465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251190 + }, + { + "epoch": 1.218274604042567, + "grad_norm": 3.5660156072481186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251200 + }, + { + "epoch": 1.218323102235403, + "grad_norm": 3.284055765107041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251210 + }, + { + "epoch": 1.218371600428239, + "grad_norm": 3.6962754279556975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251220 + }, + { + "epoch": 1.2184200986210751, + "grad_norm": 3.476365861843078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251230 + }, + { + "epoch": 1.2184685968139113, + "grad_norm": 2.278285933243751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251240 + }, + { + "epoch": 1.2185170950067472, + "grad_norm": 3.89773703091123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251250 + }, + { + "epoch": 1.2185655931995834, + "grad_norm": 3.569742261788633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251260 + }, + { + "epoch": 1.2186140913924195, + "grad_norm": 1.2388558161546825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251270 + }, + { + "epoch": 1.2186625895852556, + "grad_norm": 3.2936543448158773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251280 + }, + { + "epoch": 1.2187110877780918, + "grad_norm": 2.2141308875234245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251290 + }, + { + "epoch": 1.2187595859709277, + "grad_norm": 3.3724469972185034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251300 + }, + { + "epoch": 1.2188080841637638, + "grad_norm": 3.4363387158009573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251310 + }, + { + "epoch": 1.2188565823566, + "grad_norm": 4.2518203713370895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251320 + }, + { + "epoch": 1.218905080549436, + "grad_norm": 3.2505656122339133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251330 + }, + { + "epoch": 1.218953578742272, + "grad_norm": 2.2022662449217023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251340 + }, + { + "epoch": 1.2190020769351082, + "grad_norm": 3.0913588489056565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251350 + }, + { + "epoch": 1.2190505751279443, + "grad_norm": 3.236858390209818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251360 + }, + { + "epoch": 1.2190990733207805, + "grad_norm": 3.0923732197152276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251370 + }, + { + "epoch": 1.2191475715136164, + "grad_norm": 3.3117103725999186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251380 + }, + { + "epoch": 1.2191960697064526, + "grad_norm": 2.0929238075950707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251390 + }, + { + "epoch": 1.2192445678992887, + "grad_norm": 3.175831864155043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251400 + }, + { + "epoch": 1.2192930660921246, + "grad_norm": 3.1936681921251875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251410 + }, + { + "epoch": 1.2193415642849608, + "grad_norm": 3.3939457466658496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251420 + }, + { + "epoch": 1.219390062477797, + "grad_norm": 2.9127519951543945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251430 + }, + { + "epoch": 1.219438560670633, + "grad_norm": 2.0839316050569323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251440 + }, + { + "epoch": 1.2194870588634692, + "grad_norm": 3.023719159500615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251450 + }, + { + "epoch": 1.2195355570563051, + "grad_norm": 3.263114933815814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251460 + }, + { + "epoch": 1.2195840552491413, + "grad_norm": 5.27197130395507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251470 + }, + { + "epoch": 1.2196325534419774, + "grad_norm": 3.136402710879338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251480 + }, + { + "epoch": 1.2196810516348136, + "grad_norm": 2.185178686886502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251490 + }, + { + "epoch": 1.2197295498276495, + "grad_norm": 2.984654656756902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251500 + }, + { + "epoch": 1.2197780480204856, + "grad_norm": 3.03423092873345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251510 + }, + { + "epoch": 1.2198265462133218, + "grad_norm": 2.973288530938589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251520 + }, + { + "epoch": 1.219875044406158, + "grad_norm": 2.775076950456423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251530 + }, + { + "epoch": 1.2199235425989938, + "grad_norm": 2.79202197361883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251540 + }, + { + "epoch": 1.21997204079183, + "grad_norm": 3.10127290958917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251550 + }, + { + "epoch": 1.2200205389846661, + "grad_norm": 3.1464315952689503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251560 + }, + { + "epoch": 1.2200690371775023, + "grad_norm": 8.971789611678105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251570 + }, + { + "epoch": 1.2201175353703382, + "grad_norm": 3.0534320671904425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251580 + }, + { + "epoch": 1.2201660335631743, + "grad_norm": 2.0462196914650121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251590 + }, + { + "epoch": 1.2202145317560105, + "grad_norm": 2.790107203054504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251600 + }, + { + "epoch": 1.2202630299488466, + "grad_norm": 2.6316635626244533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251610 + }, + { + "epoch": 1.2203115281416825, + "grad_norm": 2.8758321946042997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251620 + }, + { + "epoch": 1.2203600263345187, + "grad_norm": 2.7050782591686584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251630 + }, + { + "epoch": 1.2204085245273548, + "grad_norm": 1.9557006680770428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251640 + }, + { + "epoch": 1.220457022720191, + "grad_norm": 2.740690092650766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251650 + }, + { + "epoch": 1.2205055209130269, + "grad_norm": 2.709843727188854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251660 + }, + { + "epoch": 1.220554019105863, + "grad_norm": 2.789110737921874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251670 + }, + { + "epoch": 1.2206025172986992, + "grad_norm": 2.998796446718188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251680 + }, + { + "epoch": 1.2206510154915353, + "grad_norm": 2.065011983631848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251690 + }, + { + "epoch": 1.2206995136843712, + "grad_norm": 1.0026296877185814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251700 + }, + { + "epoch": 1.2207480118772074, + "grad_norm": 2.830844891832385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251710 + }, + { + "epoch": 1.2207965100700435, + "grad_norm": 2.875995050999336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251720 + }, + { + "epoch": 1.2208450082628797, + "grad_norm": 2.5241305934287084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251730 + }, + { + "epoch": 1.2208935064557158, + "grad_norm": 1.9579245247314248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251740 + }, + { + "epoch": 1.2209420046485517, + "grad_norm": 2.753587580173189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251750 + }, + { + "epoch": 1.2209905028413879, + "grad_norm": 2.634351972119475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251760 + }, + { + "epoch": 1.221039001034224, + "grad_norm": 2.5566825456735387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251770 + }, + { + "epoch": 1.22108749922706, + "grad_norm": 2.547580777445546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251780 + }, + { + "epoch": 1.221135997419896, + "grad_norm": 1.938364988518515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251790 + }, + { + "epoch": 1.2211844956127322, + "grad_norm": 2.5750139798219607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251800 + }, + { + "epoch": 1.2212329938055684, + "grad_norm": 2.4973624590529653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251810 + }, + { + "epoch": 1.2212814919984045, + "grad_norm": 2.609664591091132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251820 + }, + { + "epoch": 1.2213299901912404, + "grad_norm": 2.5271648951274983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251830 + }, + { + "epoch": 1.2213784883840766, + "grad_norm": 1.9396314598907338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251840 + }, + { + "epoch": 1.2214269865769127, + "grad_norm": 2.7868193797075946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251850 + }, + { + "epoch": 1.2214754847697487, + "grad_norm": 2.826195384386665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251860 + }, + { + "epoch": 1.2215239829625848, + "grad_norm": 2.8175603006275196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251870 + }, + { + "epoch": 1.221572481155421, + "grad_norm": 2.3639400126285182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251880 + }, + { + "epoch": 1.221620979348257, + "grad_norm": 1.9644906501525838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251890 + }, + { + "epoch": 1.2216694775410932, + "grad_norm": 2.6952358211929095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251900 + }, + { + "epoch": 1.2217179757339292, + "grad_norm": 2.537707075589424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251910 + }, + { + "epoch": 1.2217664739267653, + "grad_norm": 2.4888987582016853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251920 + }, + { + "epoch": 1.2218149721196014, + "grad_norm": 2.5526665581310226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251930 + }, + { + "epoch": 1.2218634703124374, + "grad_norm": 1.8965944548199332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251940 + }, + { + "epoch": 1.2219119685052735, + "grad_norm": 2.6138050657209533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251950 + }, + { + "epoch": 1.2219604666981096, + "grad_norm": 2.541312085213576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251960 + }, + { + "epoch": 1.2220089648909458, + "grad_norm": 2.612592311379558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251970 + }, + { + "epoch": 1.222057463083782, + "grad_norm": 7.435756401719118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251980 + }, + { + "epoch": 1.2221059612766179, + "grad_norm": 1.9081758750871813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 251990 + }, + { + "epoch": 1.222154459469454, + "grad_norm": 2.5084148091991665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252000 + }, + { + "epoch": 1.2222029576622901, + "grad_norm": 2.684884350401262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252010 + }, + { + "epoch": 1.2222514558551263, + "grad_norm": 2.3019201478291507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252020 + }, + { + "epoch": 1.2222999540479622, + "grad_norm": 2.5764563815755537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252030 + }, + { + "epoch": 1.2223484522407984, + "grad_norm": 1.822315027766308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252040 + }, + { + "epoch": 1.2223969504336345, + "grad_norm": 3.11649131390368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252050 + }, + { + "epoch": 1.2224454486264706, + "grad_norm": 2.497195907835703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252060 + }, + { + "epoch": 1.2224939468193066, + "grad_norm": 2.548447071148985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252070 + }, + { + "epoch": 1.2225424450121427, + "grad_norm": 2.3400629345360358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252080 + }, + { + "epoch": 1.2225909432049789, + "grad_norm": 1.860749421211949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252090 + }, + { + "epoch": 1.222639441397815, + "grad_norm": 2.3126750647861627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252100 + }, + { + "epoch": 1.222687939590651, + "grad_norm": 2.4431702172478253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252110 + }, + { + "epoch": 1.222736437783487, + "grad_norm": 2.360799982170647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252120 + }, + { + "epoch": 1.2227849359763232, + "grad_norm": 2.3386746761389077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252130 + }, + { + "epoch": 1.2228334341691594, + "grad_norm": 2.125176621348146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252140 + }, + { + "epoch": 1.2228819323619953, + "grad_norm": 2.4072673454611504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252150 + }, + { + "epoch": 1.2229304305548314, + "grad_norm": 2.4560529254813446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252160 + }, + { + "epoch": 1.2229789287476676, + "grad_norm": 2.303973332118403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252170 + }, + { + "epoch": 1.2230274269405037, + "grad_norm": 2.2184151760029636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252180 + }, + { + "epoch": 1.2230759251333396, + "grad_norm": 1.7688455500319833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252190 + }, + { + "epoch": 1.2231244233261758, + "grad_norm": 2.3121980063933734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252200 + }, + { + "epoch": 1.223172921519012, + "grad_norm": 2.2491641971100762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252210 + }, + { + "epoch": 1.223221419711848, + "grad_norm": 2.411854040929029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252220 + }, + { + "epoch": 1.223269917904684, + "grad_norm": 2.1922512871697108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252230 + }, + { + "epoch": 1.2233184160975201, + "grad_norm": 1.7868181600988464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252240 + }, + { + "epoch": 1.2233669142903563, + "grad_norm": 2.2988290027115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252250 + }, + { + "epoch": 1.2234154124831924, + "grad_norm": 2.268929364390715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252260 + }, + { + "epoch": 1.2234639106760286, + "grad_norm": 2.2978622382652247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252270 + }, + { + "epoch": 1.2235124088688645, + "grad_norm": 2.0903884490053315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252280 + }, + { + "epoch": 1.2235609070617006, + "grad_norm": 1.691683877425021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252290 + }, + { + "epoch": 1.2236094052545368, + "grad_norm": 2.254679287716499e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252300 + }, + { + "epoch": 1.2236579034473727, + "grad_norm": 2.3889987232905696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252310 + }, + { + "epoch": 1.2237064016402088, + "grad_norm": 2.2537649613241229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252320 + }, + { + "epoch": 1.223754899833045, + "grad_norm": 2.3091270406894182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252330 + }, + { + "epoch": 1.2238033980258811, + "grad_norm": 1.7405260166469816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252340 + }, + { + "epoch": 1.2238518962187173, + "grad_norm": 2.0845284609549708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252350 + }, + { + "epoch": 1.2239003944115532, + "grad_norm": 2.360108055654564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252360 + }, + { + "epoch": 1.2239488926043893, + "grad_norm": 2.737823194820521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252370 + }, + { + "epoch": 1.2239973907972255, + "grad_norm": 2.06289271886817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252380 + }, + { + "epoch": 1.2240458889900614, + "grad_norm": 1.697186320370747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252390 + }, + { + "epoch": 1.2240943871828975, + "grad_norm": 2.474630775850528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252400 + }, + { + "epoch": 1.2241428853757337, + "grad_norm": 2.202665996264841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252410 + }, + { + "epoch": 1.2241913835685698, + "grad_norm": 2.3031068963064172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252420 + }, + { + "epoch": 1.224239881761406, + "grad_norm": 2.0996816374463378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252430 + }, + { + "epoch": 1.224288379954242, + "grad_norm": 1.9412372864735516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252440 + }, + { + "epoch": 1.224336878147078, + "grad_norm": 2.120694233553877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252450 + }, + { + "epoch": 1.2243853763399142, + "grad_norm": 1.6184251308004605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252460 + }, + { + "epoch": 1.22443387453275, + "grad_norm": 2.038678559301843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252470 + }, + { + "epoch": 1.2244823727255862, + "grad_norm": 2.2017495382442576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252480 + }, + { + "epoch": 1.2245308709184224, + "grad_norm": 1.6418961479303107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252490 + }, + { + "epoch": 1.2245793691112585, + "grad_norm": 1.248114767804509e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252500 + }, + { + "epoch": 1.2246278673040947, + "grad_norm": 1.9141307348036207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252510 + }, + { + "epoch": 1.2246763654969306, + "grad_norm": 2.1978937070343818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252520 + }, + { + "epoch": 1.2247248636897667, + "grad_norm": 1.524878001646357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252530 + }, + { + "epoch": 1.2247733618826029, + "grad_norm": 1.5114966345208813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252540 + }, + { + "epoch": 1.224821860075439, + "grad_norm": 1.84458841090418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252550 + }, + { + "epoch": 1.224870358268275, + "grad_norm": 1.5206100556497404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252560 + }, + { + "epoch": 1.224918856461111, + "grad_norm": 1.820638004801367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252570 + }, + { + "epoch": 1.2249673546539472, + "grad_norm": 1.2193198983823095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252580 + }, + { + "epoch": 1.2250158528467834, + "grad_norm": 1.0449292631165008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252590 + }, + { + "epoch": 1.2250643510396193, + "grad_norm": 1.598089767185229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252600 + }, + { + "epoch": 1.2251128492324554, + "grad_norm": 1.5393119667805877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252610 + }, + { + "epoch": 1.2251613474252916, + "grad_norm": 1.6695486237949808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252620 + }, + { + "epoch": 1.2252098456181277, + "grad_norm": 1.5078919091138232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252630 + }, + { + "epoch": 1.2252583438109637, + "grad_norm": 1.0188631449636887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252640 + }, + { + "epoch": 1.2253068420037998, + "grad_norm": 1.5637252204214747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252650 + }, + { + "epoch": 1.225355340196636, + "grad_norm": 1.5724036472875014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252660 + }, + { + "epoch": 1.225403838389472, + "grad_norm": 1.5198924074866227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252670 + }, + { + "epoch": 1.225452336582308, + "grad_norm": 1.43911265126917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252680 + }, + { + "epoch": 1.2255008347751442, + "grad_norm": 1.0681331019668505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252690 + }, + { + "epoch": 1.2255493329679803, + "grad_norm": 1.6774804123542708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252700 + }, + { + "epoch": 1.2255978311608164, + "grad_norm": 1.5956926802118687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252710 + }, + { + "epoch": 1.2256463293536524, + "grad_norm": 1.5722793023087434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252720 + }, + { + "epoch": 1.2256948275464885, + "grad_norm": 1.4975483964008163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252730 + }, + { + "epoch": 1.2257433257393247, + "grad_norm": 1.0404974659650179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252740 + }, + { + "epoch": 1.2257918239321608, + "grad_norm": 1.6325547846918198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252750 + }, + { + "epoch": 1.2258403221249967, + "grad_norm": 1.434057423921331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252760 + }, + { + "epoch": 1.2258888203178329, + "grad_norm": 1.4590457908525423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252770 + }, + { + "epoch": 1.225937318510669, + "grad_norm": 1.1641315467159075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252780 + }, + { + "epoch": 1.2259858167035051, + "grad_norm": 1.0275845596652289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252790 + }, + { + "epoch": 1.2260343148963413, + "grad_norm": 1.504105853200599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252800 + }, + { + "epoch": 1.2260828130891772, + "grad_norm": 1.4064968922866683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252810 + }, + { + "epoch": 1.2261313112820134, + "grad_norm": 1.5606464387474261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252820 + }, + { + "epoch": 1.2261798094748495, + "grad_norm": 1.4140148607566516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252830 + }, + { + "epoch": 1.2262283076676854, + "grad_norm": 9.928641730994059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252840 + }, + { + "epoch": 1.2262768058605216, + "grad_norm": 1.33577884753322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252850 + }, + { + "epoch": 1.2263253040533577, + "grad_norm": 1.4308380968941492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252860 + }, + { + "epoch": 1.2263738022461939, + "grad_norm": 1.2997192300190363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252870 + }, + { + "epoch": 1.22642230043903, + "grad_norm": 1.2805008964278386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252880 + }, + { + "epoch": 1.226470798631866, + "grad_norm": 9.650106846947892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252890 + }, + { + "epoch": 1.226519296824702, + "grad_norm": 1.3889612660022976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252900 + }, + { + "epoch": 1.2265677950175382, + "grad_norm": 1.6042343986555352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252910 + }, + { + "epoch": 1.2266162932103741, + "grad_norm": 1.3962130651634652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252920 + }, + { + "epoch": 1.2266647914032103, + "grad_norm": 9.605367523590758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252930 + }, + { + "epoch": 1.2267132895960464, + "grad_norm": 9.259682798301583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252940 + }, + { + "epoch": 1.2267617877888826, + "grad_norm": 1.3760428885234433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252950 + }, + { + "epoch": 1.2268102859817187, + "grad_norm": 1.4126426606253517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252960 + }, + { + "epoch": 1.2268587841745546, + "grad_norm": 1.357456937967072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252970 + }, + { + "epoch": 1.2269072823673908, + "grad_norm": 1.2843653962590906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252980 + }, + { + "epoch": 1.226955780560227, + "grad_norm": 8.953792018928652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 252990 + }, + { + "epoch": 1.227004278753063, + "grad_norm": 1.2797370629868965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253000 + }, + { + "epoch": 1.227052776945899, + "grad_norm": 1.3135826293364516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253010 + }, + { + "epoch": 1.2271012751387351, + "grad_norm": 1.2204492350065266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253020 + }, + { + "epoch": 1.2271497733315713, + "grad_norm": 1.4629102906837943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253030 + }, + { + "epoch": 1.2271982715244074, + "grad_norm": 9.424057623164117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253040 + }, + { + "epoch": 1.2272467697172433, + "grad_norm": 1.1929247989428404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253050 + }, + { + "epoch": 1.2272952679100795, + "grad_norm": 1.3230072681835736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253060 + }, + { + "epoch": 1.2273437661029156, + "grad_norm": 1.274058689659796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253070 + }, + { + "epoch": 1.2273922642957518, + "grad_norm": 1.1023689694411587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253080 + }, + { + "epoch": 1.2274407624885877, + "grad_norm": 9.640531573040789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253090 + }, + { + "epoch": 1.2274892606814238, + "grad_norm": 1.309720403241954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253100 + }, + { + "epoch": 1.22753775887426, + "grad_norm": 1.5087753979514673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253110 + }, + { + "epoch": 1.2275862570670961, + "grad_norm": 1.2323910425493523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253120 + }, + { + "epoch": 1.227634755259932, + "grad_norm": 1.275373051612405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253130 + }, + { + "epoch": 1.2276832534527682, + "grad_norm": 9.089071539847282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253140 + }, + { + "epoch": 1.2277317516456043, + "grad_norm": 1.1310940095654587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253150 + }, + { + "epoch": 1.2277802498384405, + "grad_norm": 1.32871278424318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253160 + }, + { + "epoch": 1.2278287480312764, + "grad_norm": 1.2286081130241655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253170 + }, + { + "epoch": 1.2278772462241125, + "grad_norm": 1.0946376960419002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253180 + }, + { + "epoch": 1.2279257444169487, + "grad_norm": 9.386910448938579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253190 + }, + { + "epoch": 1.2279742426097848, + "grad_norm": 1.2326044895871746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253200 + }, + { + "epoch": 1.2280227408026207, + "grad_norm": 1.1657424892064228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253210 + }, + { + "epoch": 1.228071238995457, + "grad_norm": 1.2519518577391864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253220 + }, + { + "epoch": 1.228119737188293, + "grad_norm": 1.2323774001288257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253230 + }, + { + "epoch": 1.2281682353811292, + "grad_norm": 8.65162874674752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253240 + }, + { + "epoch": 1.2282167335739653, + "grad_norm": 1.1737267868738854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253250 + }, + { + "epoch": 1.2282652317668012, + "grad_norm": 1.2071902233401488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253260 + }, + { + "epoch": 1.2283137299596374, + "grad_norm": 1.2172145602562523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253270 + }, + { + "epoch": 1.2283622281524735, + "grad_norm": 1.021491158326171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253280 + }, + { + "epoch": 1.2284107263453095, + "grad_norm": 8.702310339003816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253290 + }, + { + "epoch": 1.2284592245381456, + "grad_norm": 1.2161746099081938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253300 + }, + { + "epoch": 1.2285077227309817, + "grad_norm": 1.1989918391464016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253310 + }, + { + "epoch": 1.2285562209238179, + "grad_norm": 1.2149770611813437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253320 + }, + { + "epoch": 1.228604719116654, + "grad_norm": 9.273148293686972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253330 + }, + { + "epoch": 1.22865321730949, + "grad_norm": 8.805699280856061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253340 + }, + { + "epoch": 1.228701715502326, + "grad_norm": 1.0527994476206004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253350 + }, + { + "epoch": 1.2287502136951622, + "grad_norm": 1.0768820857265382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253360 + }, + { + "epoch": 1.2287987118879982, + "grad_norm": 1.2730684773032408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253370 + }, + { + "epoch": 1.2288472100808343, + "grad_norm": 1.3635711582082877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253380 + }, + { + "epoch": 1.2288957082736705, + "grad_norm": 8.496288472770175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253390 + }, + { + "epoch": 1.2289442064665066, + "grad_norm": 1.1083662343480682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253400 + }, + { + "epoch": 1.2289927046593427, + "grad_norm": 1.0798895289099164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253410 + }, + { + "epoch": 1.2290412028521787, + "grad_norm": 1.2276039740299893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253420 + }, + { + "epoch": 1.2290897010450148, + "grad_norm": 1.0151017448833954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253430 + }, + { + "epoch": 1.229138199237851, + "grad_norm": 8.365443449065424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253440 + }, + { + "epoch": 1.2291866974306869, + "grad_norm": 1.075481605994355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253450 + }, + { + "epoch": 1.229235195623523, + "grad_norm": 9.955126500926781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253460 + }, + { + "epoch": 1.2292836938163592, + "grad_norm": 1.114209453589865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253470 + }, + { + "epoch": 1.2293321920091953, + "grad_norm": 9.335082040706766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253480 + }, + { + "epoch": 1.2293806902020314, + "grad_norm": 8.140786889043738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253490 + }, + { + "epoch": 1.2294291883948674, + "grad_norm": 1.0588821197643483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253500 + }, + { + "epoch": 1.2294776865877035, + "grad_norm": 9.95977202933318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253510 + }, + { + "epoch": 1.2295261847805397, + "grad_norm": 1.0217272716772641e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253520 + }, + { + "epoch": 1.2295746829733758, + "grad_norm": 1.0186186472083136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253530 + }, + { + "epoch": 1.2296231811662117, + "grad_norm": 8.134137630122495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253540 + }, + { + "epoch": 1.2296716793590479, + "grad_norm": 1.0040518816367694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253550 + }, + { + "epoch": 1.229720177551884, + "grad_norm": 9.995143557262054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253560 + }, + { + "epoch": 1.2297686757447202, + "grad_norm": 9.815371271315598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253570 + }, + { + "epoch": 1.229817173937556, + "grad_norm": 9.545777146513501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253580 + }, + { + "epoch": 1.2298656721303922, + "grad_norm": 8.273037366279823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253590 + }, + { + "epoch": 1.2299141703232284, + "grad_norm": 9.768822906153218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253600 + }, + { + "epoch": 1.2299626685160645, + "grad_norm": 9.890560903613732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253610 + }, + { + "epoch": 1.2300111667089004, + "grad_norm": 9.722246829824144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253620 + }, + { + "epoch": 1.2300596649017366, + "grad_norm": 8.199679513154479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253630 + }, + { + "epoch": 1.2301081630945727, + "grad_norm": 8.477440616161402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253640 + }, + { + "epoch": 1.2301566612874089, + "grad_norm": 8.998891587452817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253650 + }, + { + "epoch": 1.2302051594802448, + "grad_norm": 1.0778153125556855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253660 + }, + { + "epoch": 1.230253657673081, + "grad_norm": 9.553405533324622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253670 + }, + { + "epoch": 1.230302155865917, + "grad_norm": 9.171547787900636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253680 + }, + { + "epoch": 1.2303506540587532, + "grad_norm": 8.069630297313779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253690 + }, + { + "epoch": 1.2303991522515891, + "grad_norm": 9.64761355248811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253700 + }, + { + "epoch": 1.2304476504444253, + "grad_norm": 8.917099592054001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253710 + }, + { + "epoch": 1.2304961486372614, + "grad_norm": 9.293900404827582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253720 + }, + { + "epoch": 1.2305446468300976, + "grad_norm": 8.319936029010933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253730 + }, + { + "epoch": 1.2305931450229335, + "grad_norm": 8.131992501603236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253740 + }, + { + "epoch": 1.2306416432157696, + "grad_norm": 9.686187496527054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253750 + }, + { + "epoch": 1.2306901414086058, + "grad_norm": 8.149049079975157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253760 + }, + { + "epoch": 1.230738639601442, + "grad_norm": 8.488316893817682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253770 + }, + { + "epoch": 1.230787137794278, + "grad_norm": 8.062525580498914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253780 + }, + { + "epoch": 1.230835635987114, + "grad_norm": 7.968236559463548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253790 + }, + { + "epoch": 1.2308841341799501, + "grad_norm": 8.828727970922046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253800 + }, + { + "epoch": 1.2309326323727863, + "grad_norm": 8.95941667522493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253810 + }, + { + "epoch": 1.2309811305656222, + "grad_norm": 8.589447020312946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253820 + }, + { + "epoch": 1.2310296287584583, + "grad_norm": 8.238850313091461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253830 + }, + { + "epoch": 1.2310781269512945, + "grad_norm": 8.108815308105477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253840 + }, + { + "epoch": 1.2311266251441306, + "grad_norm": 8.234956538899496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253850 + }, + { + "epoch": 1.2311751233369668, + "grad_norm": 9.195065331368824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253860 + }, + { + "epoch": 1.2312236215298027, + "grad_norm": 8.883214519528337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253870 + }, + { + "epoch": 1.2312721197226388, + "grad_norm": 8.267966222774703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253880 + }, + { + "epoch": 1.231320617915475, + "grad_norm": 7.605974161606355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253890 + }, + { + "epoch": 1.231369116108311, + "grad_norm": 8.535633355677419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253900 + }, + { + "epoch": 1.231417614301147, + "grad_norm": 9.232934417013894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253910 + }, + { + "epoch": 1.2314661124939832, + "grad_norm": 1.3446916113935004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253920 + }, + { + "epoch": 1.2315146106868193, + "grad_norm": 8.221332592484032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253930 + }, + { + "epoch": 1.2315631088796555, + "grad_norm": 7.458581308128487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253940 + }, + { + "epoch": 1.2316116070724914, + "grad_norm": 8.504204629389278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253950 + }, + { + "epoch": 1.2316601052653275, + "grad_norm": 8.538605555941103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253960 + }, + { + "epoch": 1.2317086034581637, + "grad_norm": 8.529795536560414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253970 + }, + { + "epoch": 1.2317571016509996, + "grad_norm": 7.649623512406833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253980 + }, + { + "epoch": 1.2318055998438358, + "grad_norm": 8.018853492330891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 253990 + }, + { + "epoch": 1.231854098036672, + "grad_norm": 8.958723185514827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254000 + }, + { + "epoch": 1.231902596229508, + "grad_norm": 9.035975523374873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254010 + }, + { + "epoch": 1.2319510944223442, + "grad_norm": 8.452905575495606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254020 + }, + { + "epoch": 1.23199959261518, + "grad_norm": 8.077508795167887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254030 + }, + { + "epoch": 1.2320480908080162, + "grad_norm": 7.622271169793748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254040 + }, + { + "epoch": 1.2320965890008524, + "grad_norm": 8.030529841107636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254050 + }, + { + "epoch": 1.2321450871936885, + "grad_norm": 8.026870546018472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254060 + }, + { + "epoch": 1.2321935853865245, + "grad_norm": 7.685389391554054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254070 + }, + { + "epoch": 1.2322420835793606, + "grad_norm": 7.333090934480424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254080 + }, + { + "epoch": 1.2322905817721967, + "grad_norm": 7.703162197003621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254090 + }, + { + "epoch": 1.232339079965033, + "grad_norm": 8.471440082757908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254100 + }, + { + "epoch": 1.2323875781578688, + "grad_norm": 8.201715928635167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254110 + }, + { + "epoch": 1.232436076350705, + "grad_norm": 7.8362013766764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254120 + }, + { + "epoch": 1.232484574543541, + "grad_norm": 7.814316660414988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254130 + }, + { + "epoch": 1.2325330727363772, + "grad_norm": 7.064286222657756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254140 + }, + { + "epoch": 1.2325815709292132, + "grad_norm": 8.050854916064054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254150 + }, + { + "epoch": 1.2326300691220493, + "grad_norm": 7.807457791386696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254160 + }, + { + "epoch": 1.2326785673148855, + "grad_norm": 7.513188648999858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254170 + }, + { + "epoch": 1.2327270655077216, + "grad_norm": 7.606944052440667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254180 + }, + { + "epoch": 1.2327755637005575, + "grad_norm": 7.398244861178682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254190 + }, + { + "epoch": 1.2328240618933937, + "grad_norm": 7.958003322983132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254200 + }, + { + "epoch": 1.2328725600862298, + "grad_norm": 7.759054199141246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254210 + }, + { + "epoch": 1.232921058279066, + "grad_norm": 7.791991407657406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254220 + }, + { + "epoch": 1.2329695564719019, + "grad_norm": 7.665799017786412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254230 + }, + { + "epoch": 1.233018054664738, + "grad_norm": 7.006063640346838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254240 + }, + { + "epoch": 1.2330665528575742, + "grad_norm": 7.953742198196778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254250 + }, + { + "epoch": 1.2331150510504103, + "grad_norm": 7.858673711780284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254260 + }, + { + "epoch": 1.2331635492432462, + "grad_norm": 7.405861168763295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254270 + }, + { + "epoch": 1.2332120474360824, + "grad_norm": 7.403891544299768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254280 + }, + { + "epoch": 1.2332605456289185, + "grad_norm": 7.222627118608216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254290 + }, + { + "epoch": 1.2333090438217547, + "grad_norm": 7.685454050943008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254300 + }, + { + "epoch": 1.2333575420145908, + "grad_norm": 7.472776530903502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254310 + }, + { + "epoch": 1.2334060402074267, + "grad_norm": 7.540812418938003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254320 + }, + { + "epoch": 1.2334545384002629, + "grad_norm": 7.31293283706691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254330 + }, + { + "epoch": 1.233503036593099, + "grad_norm": 7.193290940676889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254340 + }, + { + "epoch": 1.233551534785935, + "grad_norm": 7.717521555150597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254350 + }, + { + "epoch": 1.233600032978771, + "grad_norm": 7.522928058278922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254360 + }, + { + "epoch": 1.2336485311716072, + "grad_norm": 7.950225011654766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254370 + }, + { + "epoch": 1.2336970293644434, + "grad_norm": 7.475833996295478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254380 + }, + { + "epoch": 1.2337455275572795, + "grad_norm": 7.078509867142202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254390 + }, + { + "epoch": 1.2337940257501154, + "grad_norm": 7.743781083036083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254400 + }, + { + "epoch": 1.2338425239429516, + "grad_norm": 7.485748199087539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254410 + }, + { + "epoch": 1.2338910221357877, + "grad_norm": 7.140442193076524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254420 + }, + { + "epoch": 1.2339395203286236, + "grad_norm": 7.295087556258295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254430 + }, + { + "epoch": 1.2339880185214598, + "grad_norm": 6.866218171808214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254440 + }, + { + "epoch": 1.234036516714296, + "grad_norm": 7.369664700718204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254450 + }, + { + "epoch": 1.234085014907132, + "grad_norm": 7.485723330091787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254460 + }, + { + "epoch": 1.2341335130999682, + "grad_norm": 7.354354636390781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254470 + }, + { + "epoch": 1.2341820112928041, + "grad_norm": 7.420690195658608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254480 + }, + { + "epoch": 1.2342305094856403, + "grad_norm": 6.884178560540022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254490 + }, + { + "epoch": 1.2342790076784764, + "grad_norm": 7.965801529508099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254500 + }, + { + "epoch": 1.2343275058713123, + "grad_norm": 7.583738437233478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254510 + }, + { + "epoch": 1.2343760040641485, + "grad_norm": 7.397751744520065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254520 + }, + { + "epoch": 1.2344245022569846, + "grad_norm": 6.69899762328896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254530 + }, + { + "epoch": 1.2344730004498208, + "grad_norm": 7.010214631009148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254540 + }, + { + "epoch": 1.234521498642657, + "grad_norm": 7.420606351615788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254550 + }, + { + "epoch": 1.2345699968354928, + "grad_norm": 7.019565373411751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254560 + }, + { + "epoch": 1.234618495028329, + "grad_norm": 7.242031330179088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254570 + }, + { + "epoch": 1.2346669932211651, + "grad_norm": 6.70874769070906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254580 + }, + { + "epoch": 1.2347154914140013, + "grad_norm": 6.967100318888697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254590 + }, + { + "epoch": 1.2347639896068372, + "grad_norm": 7.273291657838854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254600 + }, + { + "epoch": 1.2348124877996733, + "grad_norm": 7.030661919316117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254610 + }, + { + "epoch": 1.2348609859925095, + "grad_norm": 7.040100769017954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254620 + }, + { + "epoch": 1.2349094841853456, + "grad_norm": 7.225659004461704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254630 + }, + { + "epoch": 1.2349579823781816, + "grad_norm": 6.717412759371655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254640 + }, + { + "epoch": 1.2350064805710177, + "grad_norm": 7.328933548933492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254650 + }, + { + "epoch": 1.2350549787638538, + "grad_norm": 7.29086195860873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254660 + }, + { + "epoch": 1.23510347695669, + "grad_norm": 9.690817392993267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254670 + }, + { + "epoch": 1.235151975149526, + "grad_norm": 6.85519836451931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254680 + }, + { + "epoch": 1.235200473342362, + "grad_norm": 6.900386750885446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254690 + }, + { + "epoch": 1.2352489715351982, + "grad_norm": 7.024171111424948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254700 + }, + { + "epoch": 1.2352974697280343, + "grad_norm": 7.29710620817059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254710 + }, + { + "epoch": 1.2353459679208703, + "grad_norm": 7.309136407229744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254720 + }, + { + "epoch": 1.2353944661137064, + "grad_norm": 6.817418807258946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254730 + }, + { + "epoch": 1.2354429643065425, + "grad_norm": 6.693597498497184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254740 + }, + { + "epoch": 1.2354914624993787, + "grad_norm": 7.296257820144092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254750 + }, + { + "epoch": 1.2355399606922146, + "grad_norm": 6.893747439562503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254760 + }, + { + "epoch": 1.2355884588850508, + "grad_norm": 7.036360472056913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254770 + }, + { + "epoch": 1.235636957077887, + "grad_norm": 7.107151844820692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254780 + }, + { + "epoch": 1.235685455270723, + "grad_norm": 6.706311950210875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254790 + }, + { + "epoch": 1.235733953463559, + "grad_norm": 7.002678614753677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254800 + }, + { + "epoch": 1.235782451656395, + "grad_norm": 7.15864416633849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254810 + }, + { + "epoch": 1.2358309498492313, + "grad_norm": 1.4090521460730088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254820 + }, + { + "epoch": 1.2358794480420674, + "grad_norm": 6.71852120603944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254830 + }, + { + "epoch": 1.2359279462349035, + "grad_norm": 6.538378727327654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254840 + }, + { + "epoch": 1.2359764444277395, + "grad_norm": 6.813805697447606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254850 + }, + { + "epoch": 1.2360249426205756, + "grad_norm": 7.002149970958271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254860 + }, + { + "epoch": 1.2360734408134118, + "grad_norm": 7.288167580554727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254870 + }, + { + "epoch": 1.2361219390062477, + "grad_norm": 7.258701373302756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254880 + }, + { + "epoch": 1.2361704371990838, + "grad_norm": 6.281328523982666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254890 + }, + { + "epoch": 1.23621893539192, + "grad_norm": 6.777266747803878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254900 + }, + { + "epoch": 1.236267433584756, + "grad_norm": 6.839362498567425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254910 + }, + { + "epoch": 1.2363159317775922, + "grad_norm": 7.014297409568826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254920 + }, + { + "epoch": 1.2363644299704282, + "grad_norm": 6.806189389862993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254930 + }, + { + "epoch": 1.2364129281632643, + "grad_norm": 6.498997606740886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254940 + }, + { + "epoch": 1.2364614263561005, + "grad_norm": 7.174093497042122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254950 + }, + { + "epoch": 1.2365099245489364, + "grad_norm": 6.903567850713443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254960 + }, + { + "epoch": 1.2365584227417725, + "grad_norm": 6.862624246650739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254970 + }, + { + "epoch": 1.2366069209346087, + "grad_norm": 7.239263766223303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254980 + }, + { + "epoch": 1.2366554191274448, + "grad_norm": 6.756714299172017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 254990 + }, + { + "epoch": 1.236703917320281, + "grad_norm": 9.363194664047114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255000 + }, + { + "epoch": 1.2367524155131169, + "grad_norm": 6.695778154153231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255010 + }, + { + "epoch": 1.236800913705953, + "grad_norm": 6.820309295108018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255020 + }, + { + "epoch": 1.2368494118987892, + "grad_norm": 6.694641996318751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255030 + }, + { + "epoch": 1.2368979100916253, + "grad_norm": 6.451033129906136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255040 + }, + { + "epoch": 1.2369464082844612, + "grad_norm": 6.895034232456965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255050 + }, + { + "epoch": 1.2369949064772974, + "grad_norm": 7.019809089570117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255060 + }, + { + "epoch": 1.2370434046701335, + "grad_norm": 6.563266197190387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255070 + }, + { + "epoch": 1.2370919028629697, + "grad_norm": 7.158524084616147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255080 + }, + { + "epoch": 1.2371404010558056, + "grad_norm": 6.500027183165002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255090 + }, + { + "epoch": 1.2371888992486417, + "grad_norm": 6.455515233483311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255100 + }, + { + "epoch": 1.2372373974414779, + "grad_norm": 6.945477792896781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255110 + }, + { + "epoch": 1.237285895634314, + "grad_norm": 6.606030211742109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255120 + }, + { + "epoch": 1.23733439382715, + "grad_norm": 6.701005617060218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255130 + }, + { + "epoch": 1.237382892019986, + "grad_norm": 6.999551516173597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255140 + }, + { + "epoch": 1.2374313902128222, + "grad_norm": 6.523015372295049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255150 + }, + { + "epoch": 1.2374798884056584, + "grad_norm": 6.605819180549588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255160 + }, + { + "epoch": 1.2375283865984943, + "grad_norm": 6.741518632225052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255170 + }, + { + "epoch": 1.2375768847913304, + "grad_norm": 6.727362489300504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255180 + }, + { + "epoch": 1.2376253829841666, + "grad_norm": 6.519643136471132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255190 + }, + { + "epoch": 1.2376738811770027, + "grad_norm": 6.86902410507173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255200 + }, + { + "epoch": 1.2377223793698386, + "grad_norm": 6.663656648697724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255210 + }, + { + "epoch": 1.2377708775626748, + "grad_norm": 6.561050014397551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255220 + }, + { + "epoch": 1.237819375755511, + "grad_norm": 6.43805648792295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255230 + }, + { + "epoch": 1.237867873948347, + "grad_norm": 6.263967122777103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255240 + }, + { + "epoch": 1.237916372141183, + "grad_norm": 6.695417908986201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255250 + }, + { + "epoch": 1.2379648703340191, + "grad_norm": 6.509170447088763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255260 + }, + { + "epoch": 1.2380133685268553, + "grad_norm": 6.743412228615853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255270 + }, + { + "epoch": 1.2380618667196914, + "grad_norm": 6.248606609915441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255280 + }, + { + "epoch": 1.2381103649125274, + "grad_norm": 7.264898727044056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255290 + }, + { + "epoch": 1.2381588631053635, + "grad_norm": 6.553708686851678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255300 + }, + { + "epoch": 1.2382073612981996, + "grad_norm": 6.575076838544192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255310 + }, + { + "epoch": 1.2382558594910358, + "grad_norm": 7.275295388353697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255320 + }, + { + "epoch": 1.2383043576838717, + "grad_norm": 6.423502441066375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255330 + }, + { + "epoch": 1.2383528558767078, + "grad_norm": 5.871004304935923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255340 + }, + { + "epoch": 1.238401354069544, + "grad_norm": 6.502893512561059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255350 + }, + { + "epoch": 1.2384498522623801, + "grad_norm": 6.537381125326647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255360 + }, + { + "epoch": 1.2384983504552163, + "grad_norm": 1.243538321205051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255370 + }, + { + "epoch": 1.2385468486480522, + "grad_norm": 6.279481112869689e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255380 + }, + { + "epoch": 1.2385953468408883, + "grad_norm": 6.291904952604455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255390 + }, + { + "epoch": 1.2386438450337245, + "grad_norm": 6.703415778019917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255400 + }, + { + "epoch": 1.2386923432265604, + "grad_norm": 6.35607051435727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255410 + }, + { + "epoch": 1.2387408414193966, + "grad_norm": 6.216171755113464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255420 + }, + { + "epoch": 1.2387893396122327, + "grad_norm": 6.225148041494322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255430 + }, + { + "epoch": 1.2388378378050688, + "grad_norm": 5.929639002033582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255440 + }, + { + "epoch": 1.238886335997905, + "grad_norm": 6.388154361047782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255450 + }, + { + "epoch": 1.238934834190741, + "grad_norm": 6.365013405229547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255460 + }, + { + "epoch": 1.238983332383577, + "grad_norm": 6.336390612204923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255470 + }, + { + "epoch": 1.2390318305764132, + "grad_norm": 6.107416794520759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255480 + }, + { + "epoch": 1.2390803287692491, + "grad_norm": 5.826612081705207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255490 + }, + { + "epoch": 1.2391288269620853, + "grad_norm": 6.3789073578846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255500 + }, + { + "epoch": 1.2391773251549214, + "grad_norm": 6.562481047467372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255510 + }, + { + "epoch": 1.2392258233477575, + "grad_norm": 6.385059236890811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255520 + }, + { + "epoch": 1.2392743215405937, + "grad_norm": 5.483540022055422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255530 + }, + { + "epoch": 1.2393228197334296, + "grad_norm": 6.426544274518164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255540 + }, + { + "epoch": 1.2393713179262658, + "grad_norm": 6.487799453225307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255550 + }, + { + "epoch": 1.239419816119102, + "grad_norm": 6.377831596182659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255560 + }, + { + "epoch": 1.239468314311938, + "grad_norm": 6.259903528871291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255570 + }, + { + "epoch": 1.239516812504774, + "grad_norm": 5.8548103254452144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255580 + }, + { + "epoch": 1.2395653106976101, + "grad_norm": 6.121460671693058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255590 + }, + { + "epoch": 1.2396138088904463, + "grad_norm": 6.121050688534524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255600 + }, + { + "epoch": 1.2396623070832824, + "grad_norm": 6.439084643261594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255610 + }, + { + "epoch": 1.2397108052761183, + "grad_norm": 6.051268286455525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255620 + }, + { + "epoch": 1.2397593034689545, + "grad_norm": 5.8762356758279566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255630 + }, + { + "epoch": 1.2398078016617906, + "grad_norm": 5.78001291273722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255640 + }, + { + "epoch": 1.2398562998546268, + "grad_norm": 6.52395542033446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255650 + }, + { + "epoch": 1.2399047980474627, + "grad_norm": 6.23148963541098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255660 + }, + { + "epoch": 1.2399532962402988, + "grad_norm": 6.385183581869569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255670 + }, + { + "epoch": 1.240001794433135, + "grad_norm": 7.943855706571412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255680 + }, + { + "epoch": 1.240050292625971, + "grad_norm": 6.391127271854202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255690 + }, + { + "epoch": 1.240098790818807, + "grad_norm": 5.891878274155715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255700 + }, + { + "epoch": 1.2401472890116432, + "grad_norm": 5.877819475585966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255710 + }, + { + "epoch": 1.2401957872044793, + "grad_norm": 6.085382153742103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255720 + }, + { + "epoch": 1.2402442853973155, + "grad_norm": 5.9212410974396334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255730 + }, + { + "epoch": 1.2402927835901514, + "grad_norm": 5.7824735222311574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255740 + }, + { + "epoch": 1.2403412817829875, + "grad_norm": 6.266383678621423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255750 + }, + { + "epoch": 1.2403897799758237, + "grad_norm": 6.333515756296038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255760 + }, + { + "epoch": 1.2404382781686598, + "grad_norm": 6.259791973661777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255770 + }, + { + "epoch": 1.2404867763614957, + "grad_norm": 6.070602864838293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255780 + }, + { + "epoch": 1.2405352745543319, + "grad_norm": 6.012538023014713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255790 + }, + { + "epoch": 1.240583772747168, + "grad_norm": 6.033748434219888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255800 + }, + { + "epoch": 1.2406322709400042, + "grad_norm": 5.819147474994679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255810 + }, + { + "epoch": 1.2406807691328403, + "grad_norm": 5.884811216105845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255820 + }, + { + "epoch": 1.2407292673256762, + "grad_norm": 6.23892049134156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255830 + }, + { + "epoch": 1.2407777655185124, + "grad_norm": 5.673138048223336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255840 + }, + { + "epoch": 1.2408262637113485, + "grad_norm": 5.860978902205716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255850 + }, + { + "epoch": 1.2408747619041844, + "grad_norm": 5.88863642292381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255860 + }, + { + "epoch": 1.2409232600970206, + "grad_norm": 5.8896496568650036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255870 + }, + { + "epoch": 1.2409717582898567, + "grad_norm": 6.120512097140818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255880 + }, + { + "epoch": 1.2410202564826929, + "grad_norm": 5.399002844796996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255890 + }, + { + "epoch": 1.241068754675529, + "grad_norm": 5.687142845545168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255900 + }, + { + "epoch": 1.241117252868365, + "grad_norm": 5.63134676667687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255910 + }, + { + "epoch": 1.241165751061201, + "grad_norm": 6.021744525241957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255920 + }, + { + "epoch": 1.2412142492540372, + "grad_norm": 5.694520766041933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255930 + }, + { + "epoch": 1.2412627474468731, + "grad_norm": 5.482328901962319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255940 + }, + { + "epoch": 1.2413112456397093, + "grad_norm": 5.7529348396201385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255950 + }, + { + "epoch": 1.2413597438325454, + "grad_norm": 6.18555802134324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255960 + }, + { + "epoch": 1.2414082420253816, + "grad_norm": 5.8222273224828314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255970 + }, + { + "epoch": 1.2414567402182177, + "grad_norm": 5.591791563119841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255980 + }, + { + "epoch": 1.2415052384110536, + "grad_norm": 6.108881933641896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 255990 + }, + { + "epoch": 1.2415537366038898, + "grad_norm": 6.249018014159446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256000 + }, + { + "epoch": 1.241602234796726, + "grad_norm": 5.925121726590987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256010 + }, + { + "epoch": 1.2416507329895619, + "grad_norm": 5.902946753622018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256020 + }, + { + "epoch": 1.241699231182398, + "grad_norm": 5.453581763958937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256030 + }, + { + "epoch": 1.2417477293752341, + "grad_norm": 5.343620657072279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256040 + }, + { + "epoch": 1.2417962275680703, + "grad_norm": 5.892436050203287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256050 + }, + { + "epoch": 1.2418447257609064, + "grad_norm": 5.9306891841970355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256060 + }, + { + "epoch": 1.2418932239537424, + "grad_norm": 5.847259387792292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256070 + }, + { + "epoch": 1.2419417221465785, + "grad_norm": 5.761724608532859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256080 + }, + { + "epoch": 1.2419902203394146, + "grad_norm": 5.377696865593862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256090 + }, + { + "epoch": 1.2420387185322508, + "grad_norm": 5.9006588060128706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256100 + }, + { + "epoch": 1.2420872167250867, + "grad_norm": 5.745327058548355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256110 + }, + { + "epoch": 1.2421357149179229, + "grad_norm": 6.079690706428664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256120 + }, + { + "epoch": 1.242184213110759, + "grad_norm": 5.6746007004448984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256130 + }, + { + "epoch": 1.2422327113035951, + "grad_norm": 5.367101962860943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256140 + }, + { + "epoch": 1.242281209496431, + "grad_norm": 5.740882258464808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256150 + }, + { + "epoch": 1.2423297076892672, + "grad_norm": 5.827153870541224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256160 + }, + { + "epoch": 1.2423782058821033, + "grad_norm": 5.569522798509752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256170 + }, + { + "epoch": 1.2424267040749395, + "grad_norm": 5.883526910110959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256180 + }, + { + "epoch": 1.2424752022677754, + "grad_norm": 5.3460826876516876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256190 + }, + { + "epoch": 1.2425237004606116, + "grad_norm": 6.131453744728788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256200 + }, + { + "epoch": 1.2425721986534477, + "grad_norm": 5.859588014800465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256210 + }, + { + "epoch": 1.2426206968462838, + "grad_norm": 6.102317229306209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256220 + }, + { + "epoch": 1.2426691950391198, + "grad_norm": 5.332788433065616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256230 + }, + { + "epoch": 1.242717693231956, + "grad_norm": 5.895889287899081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256240 + }, + { + "epoch": 1.242766191424792, + "grad_norm": 5.5580848368208535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256250 + }, + { + "epoch": 1.2428146896176282, + "grad_norm": 5.6350536681293306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256260 + }, + { + "epoch": 1.2428631878104641, + "grad_norm": 5.4569831320350204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256270 + }, + { + "epoch": 1.2429116860033003, + "grad_norm": 5.294119986842816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256280 + }, + { + "epoch": 1.2429601841961364, + "grad_norm": 5.740316666447143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256290 + }, + { + "epoch": 1.2430086823889726, + "grad_norm": 5.287567006462268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256300 + }, + { + "epoch": 1.2430571805818085, + "grad_norm": 5.515877177231232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256310 + }, + { + "epoch": 1.2431056787746446, + "grad_norm": 1.9589616329085402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256320 + }, + { + "epoch": 1.2431541769674808, + "grad_norm": 5.092279309337755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256330 + }, + { + "epoch": 1.243202675160317, + "grad_norm": 5.493701493719527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256340 + }, + { + "epoch": 1.243251173353153, + "grad_norm": 5.3908866703977765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256350 + }, + { + "epoch": 1.243299671545989, + "grad_norm": 5.76350416281457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256360 + }, + { + "epoch": 1.2433481697388251, + "grad_norm": 5.521785695350445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256370 + }, + { + "epoch": 1.2433966679316613, + "grad_norm": 5.437607342173578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256380 + }, + { + "epoch": 1.2434451661244972, + "grad_norm": 5.082892329255628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256390 + }, + { + "epoch": 1.2434936643173333, + "grad_norm": 5.97681122371796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256400 + }, + { + "epoch": 1.2435421625101695, + "grad_norm": 5.4530165272126396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256410 + }, + { + "epoch": 1.2435906607030056, + "grad_norm": 5.415213166770627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256420 + }, + { + "epoch": 1.2436391588958418, + "grad_norm": 5.217020415670959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256430 + }, + { + "epoch": 1.2436876570886777, + "grad_norm": 5.176951134444607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256440 + }, + { + "epoch": 1.2437361552815138, + "grad_norm": 5.2450133125603315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256450 + }, + { + "epoch": 1.24378465347435, + "grad_norm": 5.269624381298854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256460 + }, + { + "epoch": 1.2438331516671859, + "grad_norm": 5.655346058119903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256470 + }, + { + "epoch": 1.243881649860022, + "grad_norm": 5.3239020303408324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256480 + }, + { + "epoch": 1.2439301480528582, + "grad_norm": 5.024438110012852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256490 + }, + { + "epoch": 1.2439786462456943, + "grad_norm": 5.2640309888829506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256500 + }, + { + "epoch": 1.2440271444385305, + "grad_norm": 5.268276837000485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256510 + }, + { + "epoch": 1.2440756426313664, + "grad_norm": 5.7577800305352866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256520 + }, + { + "epoch": 1.2441241408242025, + "grad_norm": 5.651341794532527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256530 + }, + { + "epoch": 1.2441726390170387, + "grad_norm": 5.339441599971906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256540 + }, + { + "epoch": 1.2442211372098746, + "grad_norm": 5.169576766661521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256550 + }, + { + "epoch": 1.2442696354027107, + "grad_norm": 5.145483328306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256560 + }, + { + "epoch": 1.2443181335955469, + "grad_norm": 5.048393347806268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256570 + }, + { + "epoch": 1.244366631788383, + "grad_norm": 5.79295722502593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256580 + }, + { + "epoch": 1.2444151299812192, + "grad_norm": 4.94261520600503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256590 + }, + { + "epoch": 1.244463628174055, + "grad_norm": 5.350646148372107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256600 + }, + { + "epoch": 1.2445121263668912, + "grad_norm": 5.242408107619667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256610 + }, + { + "epoch": 1.2445606245597274, + "grad_norm": 5.202108965818297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256620 + }, + { + "epoch": 1.2446091227525635, + "grad_norm": 5.325574647940812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256630 + }, + { + "epoch": 1.2446576209453994, + "grad_norm": 4.8486231918332123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256640 + }, + { + "epoch": 1.2447061191382356, + "grad_norm": 5.182976536843853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256650 + }, + { + "epoch": 1.2447546173310717, + "grad_norm": 5.818080950348303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256660 + }, + { + "epoch": 1.2448031155239079, + "grad_norm": 5.2398334560166404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256670 + }, + { + "epoch": 1.2448516137167438, + "grad_norm": 5.1922508959023617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256680 + }, + { + "epoch": 1.24490011190958, + "grad_norm": 4.908276096671216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256690 + }, + { + "epoch": 1.244948610102416, + "grad_norm": 5.429119909194924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256700 + }, + { + "epoch": 1.2449971082952522, + "grad_norm": 5.051740004091698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256710 + }, + { + "epoch": 1.2450456064880882, + "grad_norm": 4.9531159618254605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256720 + }, + { + "epoch": 1.2450941046809243, + "grad_norm": 5.1376268572767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256730 + }, + { + "epoch": 1.2451426028737604, + "grad_norm": 4.8981529943148416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256740 + }, + { + "epoch": 1.2451911010665966, + "grad_norm": 5.277695080962985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256750 + }, + { + "epoch": 1.2452395992594325, + "grad_norm": 5.065480479515827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256760 + }, + { + "epoch": 1.2452880974522686, + "grad_norm": 5.481469500523417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256770 + }, + { + "epoch": 1.2453365956451048, + "grad_norm": 4.880335779944289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256780 + }, + { + "epoch": 1.245385093837941, + "grad_norm": 5.001389880021634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256790 + }, + { + "epoch": 1.2454335920307769, + "grad_norm": 5.278950609977073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256800 + }, + { + "epoch": 1.245482090223613, + "grad_norm": 5.203646225027114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256810 + }, + { + "epoch": 1.2455305884164491, + "grad_norm": 4.983832013749634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256820 + }, + { + "epoch": 1.2455790866092853, + "grad_norm": 5.1820919111378316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256830 + }, + { + "epoch": 1.2456275848021212, + "grad_norm": 4.757986005188286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256840 + }, + { + "epoch": 1.2456760829949574, + "grad_norm": 5.0348095470553744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256850 + }, + { + "epoch": 1.2457245811877935, + "grad_norm": 5.316058349080777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256860 + }, + { + "epoch": 1.2457730793806296, + "grad_norm": 5.026091898230334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256870 + }, + { + "epoch": 1.2458215775734658, + "grad_norm": 5.18856602127471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256880 + }, + { + "epoch": 1.2458700757663017, + "grad_norm": 5.051474616379892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256890 + }, + { + "epoch": 1.2459185739591379, + "grad_norm": 5.5894599171324444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256900 + }, + { + "epoch": 1.245967072151974, + "grad_norm": 6.13008594996245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256910 + }, + { + "epoch": 1.24601557034481, + "grad_norm": 5.904485078644939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256920 + }, + { + "epoch": 1.246064068537646, + "grad_norm": 6.147798359279477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256930 + }, + { + "epoch": 1.2461125667304822, + "grad_norm": 5.3242686703924846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256940 + }, + { + "epoch": 1.2461610649233184, + "grad_norm": 5.862316854177152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256950 + }, + { + "epoch": 1.2462095631161545, + "grad_norm": 5.773788203100594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256960 + }, + { + "epoch": 1.2462580613089904, + "grad_norm": 0.0004112987662665546, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 256970 + }, + { + "epoch": 1.2463065595018266, + "grad_norm": 3.8289792428258806e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256980 + }, + { + "epoch": 1.2463550576946627, + "grad_norm": 1.8376005755271763e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 256990 + }, + { + "epoch": 1.2464035558874986, + "grad_norm": 0.0036406766157597303, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 257000 + }, + { + "epoch": 1.2464520540803348, + "grad_norm": 0.012293219566345215, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 257010 + }, + { + "epoch": 1.246500552273171, + "grad_norm": 0.00010246434976579621, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 257020 + }, + { + "epoch": 1.246549050466007, + "grad_norm": 0.07520869374275208, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 257030 + }, + { + "epoch": 1.2465975486588432, + "grad_norm": 0.00010898813343374059, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257040 + }, + { + "epoch": 1.2466460468516791, + "grad_norm": 0.01484481617808342, + "learning_rate": 0.0002, + "loss": 0.0029, + "step": 257050 + }, + { + "epoch": 1.2466945450445153, + "grad_norm": 0.00016316029359586537, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 257060 + }, + { + "epoch": 1.2467430432373514, + "grad_norm": 7.055019523249939e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 257070 + }, + { + "epoch": 1.2467915414301873, + "grad_norm": 0.00024017441319301724, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 257080 + }, + { + "epoch": 1.2468400396230235, + "grad_norm": 0.030745387077331543, + "learning_rate": 0.0002, + "loss": 0.002, + "step": 257090 + }, + { + "epoch": 1.2468885378158596, + "grad_norm": 0.00040232090395875275, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 257100 + }, + { + "epoch": 1.2469370360086958, + "grad_norm": 4.6826880861772224e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257110 + }, + { + "epoch": 1.246985534201532, + "grad_norm": 2.5159008146147244e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257120 + }, + { + "epoch": 1.2470340323943678, + "grad_norm": 1.8502034436096437e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257130 + }, + { + "epoch": 1.247082530587204, + "grad_norm": 1.838752177718561e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257140 + }, + { + "epoch": 1.2471310287800401, + "grad_norm": 1.501860151620349e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257150 + }, + { + "epoch": 1.2471795269728763, + "grad_norm": 1.605268153070938e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257160 + }, + { + "epoch": 1.2472280251657122, + "grad_norm": 1.5422609067172743e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257170 + }, + { + "epoch": 1.2472765233585483, + "grad_norm": 1.1785484275605995e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257180 + }, + { + "epoch": 1.2473250215513845, + "grad_norm": 1.2107492693758104e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257190 + }, + { + "epoch": 1.2473735197442206, + "grad_norm": 1.4461676073551644e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257200 + }, + { + "epoch": 1.2474220179370565, + "grad_norm": 1.2875164429715369e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257210 + }, + { + "epoch": 1.2474705161298927, + "grad_norm": 1.2529425475804601e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257220 + }, + { + "epoch": 1.2475190143227288, + "grad_norm": 1.192666695715161e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257230 + }, + { + "epoch": 1.247567512515565, + "grad_norm": 1.1452031685621478e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257240 + }, + { + "epoch": 1.247616010708401, + "grad_norm": 1.0722890692704823e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257250 + }, + { + "epoch": 1.247664508901237, + "grad_norm": 1.0866168850043323e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257260 + }, + { + "epoch": 1.2477130070940732, + "grad_norm": 1.0835830835276283e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257270 + }, + { + "epoch": 1.2477615052869093, + "grad_norm": 9.572777344146743e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257280 + }, + { + "epoch": 1.2478100034797452, + "grad_norm": 9.513044460618403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257290 + }, + { + "epoch": 1.2478585016725814, + "grad_norm": 1.1331339010212105e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257300 + }, + { + "epoch": 1.2479069998654175, + "grad_norm": 9.594439688953571e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257310 + }, + { + "epoch": 1.2479554980582537, + "grad_norm": 9.489238436799496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257320 + }, + { + "epoch": 1.2480039962510896, + "grad_norm": 9.51446418184787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257330 + }, + { + "epoch": 1.2480524944439257, + "grad_norm": 6.414399649656843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257340 + }, + { + "epoch": 1.2481009926367619, + "grad_norm": 8.208477083826438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257350 + }, + { + "epoch": 1.248149490829598, + "grad_norm": 8.567616532673128e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257360 + }, + { + "epoch": 1.248197989022434, + "grad_norm": 8.27798339742003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257370 + }, + { + "epoch": 1.24824648721527, + "grad_norm": 7.567592547275126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257380 + }, + { + "epoch": 1.2482949854081062, + "grad_norm": 6.394631782313809e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257390 + }, + { + "epoch": 1.2483434836009424, + "grad_norm": 8.52183347888058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257400 + }, + { + "epoch": 1.2483919817937785, + "grad_norm": 8.726029591343831e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257410 + }, + { + "epoch": 1.2484404799866144, + "grad_norm": 7.5066654972033575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257420 + }, + { + "epoch": 1.2484889781794506, + "grad_norm": 6.296351330092875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257430 + }, + { + "epoch": 1.2485374763722867, + "grad_norm": 6.284577466431074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257440 + }, + { + "epoch": 1.2485859745651227, + "grad_norm": 7.019141776254401e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257450 + }, + { + "epoch": 1.2486344727579588, + "grad_norm": 6.909365311003057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257460 + }, + { + "epoch": 1.248682970950795, + "grad_norm": 7.690133315918501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257470 + }, + { + "epoch": 1.248731469143631, + "grad_norm": 5.883182893740013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257480 + }, + { + "epoch": 1.2487799673364672, + "grad_norm": 5.031579803471686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257490 + }, + { + "epoch": 1.2488284655293032, + "grad_norm": 6.548523288074648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257500 + }, + { + "epoch": 1.2488769637221393, + "grad_norm": 6.399740868801018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257510 + }, + { + "epoch": 1.2489254619149754, + "grad_norm": 6.307091098278761e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257520 + }, + { + "epoch": 1.2489739601078114, + "grad_norm": 6.26710288997856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257530 + }, + { + "epoch": 1.2490224583006475, + "grad_norm": 5.203906766837463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257540 + }, + { + "epoch": 1.2490709564934837, + "grad_norm": 6.218316229933407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257550 + }, + { + "epoch": 1.2491194546863198, + "grad_norm": 5.8519326557870954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257560 + }, + { + "epoch": 1.249167952879156, + "grad_norm": 5.678343768522609e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257570 + }, + { + "epoch": 1.2492164510719919, + "grad_norm": 5.6660533118702006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257580 + }, + { + "epoch": 1.249264949264828, + "grad_norm": 4.299861757317558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257590 + }, + { + "epoch": 1.2493134474576642, + "grad_norm": 5.335086825652979e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257600 + }, + { + "epoch": 1.2493619456505003, + "grad_norm": 5.227216206549201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257610 + }, + { + "epoch": 1.2494104438433362, + "grad_norm": 5.066915491624968e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257620 + }, + { + "epoch": 1.2494589420361724, + "grad_norm": 5.0735493459797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257630 + }, + { + "epoch": 1.2495074402290085, + "grad_norm": 3.675492735055741e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257640 + }, + { + "epoch": 1.2495559384218446, + "grad_norm": 4.87596116727218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257650 + }, + { + "epoch": 1.2496044366146806, + "grad_norm": 4.719463504443411e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257660 + }, + { + "epoch": 1.2496529348075167, + "grad_norm": 4.808308403880801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257670 + }, + { + "epoch": 1.2497014330003529, + "grad_norm": 4.6723912419111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257680 + }, + { + "epoch": 1.249749931193189, + "grad_norm": 3.683899649331579e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257690 + }, + { + "epoch": 1.249798429386025, + "grad_norm": 4.5814254008291755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257700 + }, + { + "epoch": 1.249846927578861, + "grad_norm": 4.123319740756415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257710 + }, + { + "epoch": 1.2498954257716972, + "grad_norm": 0.22149784862995148, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 257720 + }, + { + "epoch": 1.2499439239645334, + "grad_norm": 0.00016924977535381913, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257730 + }, + { + "epoch": 1.2499924221573693, + "grad_norm": 3.082248440477997e-06, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 257740 + }, + { + "epoch": 1.2500409203502054, + "grad_norm": 0.0004051739233545959, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257750 + }, + { + "epoch": 1.2500894185430416, + "grad_norm": 1.7465938071836717e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257760 + }, + { + "epoch": 1.2501379167358777, + "grad_norm": 1.749818147800397e-05, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 257770 + }, + { + "epoch": 1.2501864149287139, + "grad_norm": 5.02551811223384e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257780 + }, + { + "epoch": 1.2502349131215498, + "grad_norm": 5.43764945177827e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257790 + }, + { + "epoch": 1.250283411314386, + "grad_norm": 6.118417513789609e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257800 + }, + { + "epoch": 1.250331909507222, + "grad_norm": 4.484154123929329e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257810 + }, + { + "epoch": 1.250380407700058, + "grad_norm": 3.794677468249574e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257820 + }, + { + "epoch": 1.2504289058928941, + "grad_norm": 3.305191421532072e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257830 + }, + { + "epoch": 1.2504774040857303, + "grad_norm": 2.3276488718693145e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257840 + }, + { + "epoch": 1.2505259022785664, + "grad_norm": 2.480861803633161e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257850 + }, + { + "epoch": 1.2505744004714026, + "grad_norm": 2.1904834284214303e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257860 + }, + { + "epoch": 1.2506228986642385, + "grad_norm": 1.9518209228408523e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257870 + }, + { + "epoch": 1.2506713968570746, + "grad_norm": 1.724142930470407e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257880 + }, + { + "epoch": 1.2507198950499108, + "grad_norm": 0.053927723318338394, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 257890 + }, + { + "epoch": 1.2507683932427467, + "grad_norm": 0.00029220752185210586, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257900 + }, + { + "epoch": 1.2508168914355828, + "grad_norm": 0.0001784680353011936, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257910 + }, + { + "epoch": 1.250865389628419, + "grad_norm": 7.487928814953193e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257920 + }, + { + "epoch": 1.2509138878212551, + "grad_norm": 4.0335067751584575e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257930 + }, + { + "epoch": 1.2509623860140913, + "grad_norm": 0.00014248344814404845, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257940 + }, + { + "epoch": 1.2510108842069272, + "grad_norm": 2.7975200282526202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257950 + }, + { + "epoch": 1.2510593823997633, + "grad_norm": 2.0682973627117462e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257960 + }, + { + "epoch": 1.2511078805925995, + "grad_norm": 1.9540284483809955e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257970 + }, + { + "epoch": 1.2511563787854354, + "grad_norm": 1.770260678313207e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257980 + }, + { + "epoch": 1.2512048769782715, + "grad_norm": 1.5019574675534386e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 257990 + }, + { + "epoch": 1.2512533751711077, + "grad_norm": 1.4750716218259186e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258000 + }, + { + "epoch": 1.2513018733639438, + "grad_norm": 1.3392958862823434e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258010 + }, + { + "epoch": 1.25135037155678, + "grad_norm": 1.278547370020533e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258020 + }, + { + "epoch": 1.251398869749616, + "grad_norm": 1.19038331831689e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258030 + }, + { + "epoch": 1.251447367942452, + "grad_norm": 9.960505849448964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258040 + }, + { + "epoch": 1.2514958661352882, + "grad_norm": 1.019744377117604e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258050 + }, + { + "epoch": 1.251544364328124, + "grad_norm": 1.0089956049341708e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258060 + }, + { + "epoch": 1.2515928625209602, + "grad_norm": 1.0331784324080218e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258070 + }, + { + "epoch": 1.2516413607137964, + "grad_norm": 8.957775207818486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258080 + }, + { + "epoch": 1.2516898589066325, + "grad_norm": 7.75464650359936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258090 + }, + { + "epoch": 1.2517383570994687, + "grad_norm": 8.880545465217438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258100 + }, + { + "epoch": 1.2517868552923046, + "grad_norm": 7.701430149609223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258110 + }, + { + "epoch": 1.2518353534851407, + "grad_norm": 8.19423257780727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258120 + }, + { + "epoch": 1.2518838516779769, + "grad_norm": 7.238747912197141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258130 + }, + { + "epoch": 1.2519323498708128, + "grad_norm": 6.341652806440834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258140 + }, + { + "epoch": 1.251980848063649, + "grad_norm": 7.003695827734191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258150 + }, + { + "epoch": 1.252029346256485, + "grad_norm": 6.860498615424149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258160 + }, + { + "epoch": 1.2520778444493212, + "grad_norm": 7.13883218850242e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258170 + }, + { + "epoch": 1.2521263426421574, + "grad_norm": 6.748545729351463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258180 + }, + { + "epoch": 1.2521748408349933, + "grad_norm": 5.2953614613215905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258190 + }, + { + "epoch": 1.2522233390278295, + "grad_norm": 5.718072770832805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258200 + }, + { + "epoch": 1.2522718372206656, + "grad_norm": 6.0144880080770236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258210 + }, + { + "epoch": 1.2523203354135015, + "grad_norm": 6.062823558750097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258220 + }, + { + "epoch": 1.2523688336063377, + "grad_norm": 5.379226877266774e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258230 + }, + { + "epoch": 1.2524173317991738, + "grad_norm": 4.685668500314932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258240 + }, + { + "epoch": 1.25246582999201, + "grad_norm": 5.6823660088412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258250 + }, + { + "epoch": 1.252514328184846, + "grad_norm": 5.977524324407568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258260 + }, + { + "epoch": 1.252562826377682, + "grad_norm": 5.049404080637032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258270 + }, + { + "epoch": 1.2526113245705182, + "grad_norm": 5.046861588198226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258280 + }, + { + "epoch": 1.2526598227633543, + "grad_norm": 4.143858859606553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258290 + }, + { + "epoch": 1.2527083209561904, + "grad_norm": 4.68453345092712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258300 + }, + { + "epoch": 1.2527568191490266, + "grad_norm": 4.772990450874204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258310 + }, + { + "epoch": 1.2528053173418625, + "grad_norm": 4.482863459998043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258320 + }, + { + "epoch": 1.2528538155346987, + "grad_norm": 4.456008809938794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258330 + }, + { + "epoch": 1.2529023137275348, + "grad_norm": 4.171940418018494e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258340 + }, + { + "epoch": 1.2529508119203707, + "grad_norm": 4.203406660963083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258350 + }, + { + "epoch": 1.2529993101132069, + "grad_norm": 4.407441338116769e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258360 + }, + { + "epoch": 1.253047808306043, + "grad_norm": 1.5879390048212372e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258370 + }, + { + "epoch": 1.2530963064988792, + "grad_norm": 4.232179890095722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258380 + }, + { + "epoch": 1.2531448046917153, + "grad_norm": 3.824161922239e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258390 + }, + { + "epoch": 1.2531933028845512, + "grad_norm": 3.851128894893918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258400 + }, + { + "epoch": 1.2532418010773874, + "grad_norm": 6.759454663551878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258410 + }, + { + "epoch": 1.2532902992702235, + "grad_norm": 3.536817757776589e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258420 + }, + { + "epoch": 1.2533387974630594, + "grad_norm": 3.3163657917612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258430 + }, + { + "epoch": 1.2533872956558956, + "grad_norm": 7.888801519584376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258440 + }, + { + "epoch": 1.2534357938487317, + "grad_norm": 3.4140421121264808e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258450 + }, + { + "epoch": 1.2534842920415679, + "grad_norm": 3.319352344988147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258460 + }, + { + "epoch": 1.253532790234404, + "grad_norm": 3.3943620110221673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258470 + }, + { + "epoch": 1.25358128842724, + "grad_norm": 3.2841953725437634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258480 + }, + { + "epoch": 1.253629786620076, + "grad_norm": 2.8411695893737487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258490 + }, + { + "epoch": 1.2536782848129122, + "grad_norm": 2.948047722384217e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258500 + }, + { + "epoch": 1.2537267830057481, + "grad_norm": 3.199075308657484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258510 + }, + { + "epoch": 1.2537752811985843, + "grad_norm": 2.986557319673011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258520 + }, + { + "epoch": 1.2538237793914204, + "grad_norm": 2.9043246740911854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258530 + }, + { + "epoch": 1.2538722775842566, + "grad_norm": 2.4002349618967855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258540 + }, + { + "epoch": 1.2539207757770927, + "grad_norm": 5.425543349701911e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258550 + }, + { + "epoch": 1.2539692739699286, + "grad_norm": 2.6428483579366002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258560 + }, + { + "epoch": 1.2540177721627648, + "grad_norm": 2.905160044974764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258570 + }, + { + "epoch": 1.254066270355601, + "grad_norm": 2.7694641175912693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258580 + }, + { + "epoch": 1.2541147685484368, + "grad_norm": 2.169673734897515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258590 + }, + { + "epoch": 1.254163266741273, + "grad_norm": 2.7202920591662405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258600 + }, + { + "epoch": 1.2542117649341091, + "grad_norm": 2.767498244793387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258610 + }, + { + "epoch": 1.2542602631269453, + "grad_norm": 2.2760375486541307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258620 + }, + { + "epoch": 1.2543087613197814, + "grad_norm": 2.350666591155459e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258630 + }, + { + "epoch": 1.2543572595126173, + "grad_norm": 2.2221247490961105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258640 + }, + { + "epoch": 1.2544057577054535, + "grad_norm": 2.4628864139231155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258650 + }, + { + "epoch": 1.2544542558982896, + "grad_norm": 2.222063358203741e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258660 + }, + { + "epoch": 1.2545027540911255, + "grad_norm": 2.5834831376414513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258670 + }, + { + "epoch": 1.2545512522839617, + "grad_norm": 2.315166966582183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258680 + }, + { + "epoch": 1.2545997504767978, + "grad_norm": 1.8101027308148332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258690 + }, + { + "epoch": 1.254648248669634, + "grad_norm": 2.3710765617579455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258700 + }, + { + "epoch": 1.2546967468624701, + "grad_norm": 2.0692707494163187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258710 + }, + { + "epoch": 1.254745245055306, + "grad_norm": 2.0452564513107063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258720 + }, + { + "epoch": 1.2547937432481422, + "grad_norm": 2.0446136659302283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258730 + }, + { + "epoch": 1.2548422414409783, + "grad_norm": 2.037708554780693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258740 + }, + { + "epoch": 1.2548907396338145, + "grad_norm": 2.1656819626514334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258750 + }, + { + "epoch": 1.2549392378266506, + "grad_norm": 2.1947710138192633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258760 + }, + { + "epoch": 1.2549877360194865, + "grad_norm": 2.3200395844469313e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258770 + }, + { + "epoch": 1.2550362342123227, + "grad_norm": 1.8068317331199069e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258780 + }, + { + "epoch": 1.2550847324051588, + "grad_norm": 1.598498784005642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258790 + }, + { + "epoch": 1.2551332305979948, + "grad_norm": 1.924151092680404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258800 + }, + { + "epoch": 1.255181728790831, + "grad_norm": 1.826268203330983e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258810 + }, + { + "epoch": 1.255230226983667, + "grad_norm": 1.7636458551351097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258820 + }, + { + "epoch": 1.2552787251765032, + "grad_norm": 1.5623197668901412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258830 + }, + { + "epoch": 1.2553272233693393, + "grad_norm": 1.4457021961788996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258840 + }, + { + "epoch": 1.2553757215621753, + "grad_norm": 1.721456669656618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258850 + }, + { + "epoch": 1.2554242197550114, + "grad_norm": 1.804167823138414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258860 + }, + { + "epoch": 1.2554727179478475, + "grad_norm": 1.7630964066484012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258870 + }, + { + "epoch": 1.2555212161406835, + "grad_norm": 1.70830594470317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258880 + }, + { + "epoch": 1.2555697143335196, + "grad_norm": 1.2394631312417914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258890 + }, + { + "epoch": 1.2556182125263557, + "grad_norm": 1.5389205145766027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258900 + }, + { + "epoch": 1.255666710719192, + "grad_norm": 1.6321753264492145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258910 + }, + { + "epoch": 1.255715208912028, + "grad_norm": 1.7375969036947936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258920 + }, + { + "epoch": 1.255763707104864, + "grad_norm": 1.5797832020325586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258930 + }, + { + "epoch": 1.2558122052977, + "grad_norm": 1.1320606745357509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258940 + }, + { + "epoch": 1.2558607034905362, + "grad_norm": 1.5622484852428897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258950 + }, + { + "epoch": 1.2559092016833722, + "grad_norm": 1.5150634453675593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258960 + }, + { + "epoch": 1.2559576998762083, + "grad_norm": 1.4963088688091375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258970 + }, + { + "epoch": 1.2560061980690445, + "grad_norm": 2.6106554287252948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258980 + }, + { + "epoch": 1.2560546962618806, + "grad_norm": 1.1918534710275708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 258990 + }, + { + "epoch": 1.2561031944547167, + "grad_norm": 1.4678023489977932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259000 + }, + { + "epoch": 1.2561516926475527, + "grad_norm": 1.5151939578572637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259010 + }, + { + "epoch": 1.2562001908403888, + "grad_norm": 1.3347720368983573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259020 + }, + { + "epoch": 1.256248689033225, + "grad_norm": 1.388708710692299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259030 + }, + { + "epoch": 1.2562971872260609, + "grad_norm": 1.0694105867514736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259040 + }, + { + "epoch": 1.256345685418897, + "grad_norm": 1.454154698876664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259050 + }, + { + "epoch": 1.2563941836117332, + "grad_norm": 1.4287085150499479e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259060 + }, + { + "epoch": 1.2564426818045693, + "grad_norm": 1.2921924508191296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259070 + }, + { + "epoch": 1.2564911799974054, + "grad_norm": 1.3219263337305165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259080 + }, + { + "epoch": 1.2565396781902414, + "grad_norm": 1.0325937864763546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259090 + }, + { + "epoch": 1.2565881763830775, + "grad_norm": 1.4069976259634132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259100 + }, + { + "epoch": 1.2566366745759137, + "grad_norm": 1.3597241377283353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259110 + }, + { + "epoch": 1.2566851727687496, + "grad_norm": 1.2478542430471862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259120 + }, + { + "epoch": 1.2567336709615857, + "grad_norm": 1.372513111164153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259130 + }, + { + "epoch": 1.2567821691544219, + "grad_norm": 1.0702397048589773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259140 + }, + { + "epoch": 1.256830667347258, + "grad_norm": 1.2774490869560395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259150 + }, + { + "epoch": 1.2568791655400942, + "grad_norm": 1.415005613125686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259160 + }, + { + "epoch": 1.25692766373293, + "grad_norm": 1.215798420162173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259170 + }, + { + "epoch": 1.2569761619257662, + "grad_norm": 1.253173877557856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259180 + }, + { + "epoch": 1.2570246601186024, + "grad_norm": 9.210273788085033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259190 + }, + { + "epoch": 1.2570731583114383, + "grad_norm": 1.3734291997025139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259200 + }, + { + "epoch": 1.2571216565042744, + "grad_norm": 1.1965403245994821e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259210 + }, + { + "epoch": 1.2571701546971106, + "grad_norm": 1.250825675924716e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259220 + }, + { + "epoch": 1.2572186528899467, + "grad_norm": 1.18340028620878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259230 + }, + { + "epoch": 1.2572671510827829, + "grad_norm": 9.071184763342899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259240 + }, + { + "epoch": 1.2573156492756188, + "grad_norm": 1.18904574719636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259250 + }, + { + "epoch": 1.257364147468455, + "grad_norm": 1.1355086826370098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259260 + }, + { + "epoch": 1.257412645661291, + "grad_norm": 1.1767999694711762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259270 + }, + { + "epoch": 1.2574611438541272, + "grad_norm": 1.0662180329745752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259280 + }, + { + "epoch": 1.2575096420469634, + "grad_norm": 8.64654509769025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259290 + }, + { + "epoch": 1.2575581402397993, + "grad_norm": 1.0810433650476625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259300 + }, + { + "epoch": 1.2576066384326354, + "grad_norm": 1.8961579826282104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259310 + }, + { + "epoch": 1.2576551366254716, + "grad_norm": 1.1333805787216988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259320 + }, + { + "epoch": 1.2577036348183075, + "grad_norm": 1.1497671721372171e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259330 + }, + { + "epoch": 1.2577521330111436, + "grad_norm": 7.543283686572977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259340 + }, + { + "epoch": 1.2578006312039798, + "grad_norm": 1.1574843483685981e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259350 + }, + { + "epoch": 1.257849129396816, + "grad_norm": 1.2897144188173115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259360 + }, + { + "epoch": 1.257897627589652, + "grad_norm": 1.0758687949419254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259370 + }, + { + "epoch": 1.257946125782488, + "grad_norm": 8.264911457445123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259380 + }, + { + "epoch": 1.2579946239753241, + "grad_norm": 8.043693355830328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259390 + }, + { + "epoch": 1.2580431221681603, + "grad_norm": 9.367942084281822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259400 + }, + { + "epoch": 1.2580916203609962, + "grad_norm": 1.0926478353212588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259410 + }, + { + "epoch": 1.2581401185538323, + "grad_norm": 1.1715594609995605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259420 + }, + { + "epoch": 1.2581886167466685, + "grad_norm": 1.0099698783960775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259430 + }, + { + "epoch": 1.2582371149395046, + "grad_norm": 1.437733658349316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259440 + }, + { + "epoch": 1.2582856131323408, + "grad_norm": 1.039496055454947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259450 + }, + { + "epoch": 1.2583341113251767, + "grad_norm": 9.565577556713833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259460 + }, + { + "epoch": 1.2583826095180128, + "grad_norm": 9.96643393591512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259470 + }, + { + "epoch": 1.258431107710849, + "grad_norm": 8.58505927681108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259480 + }, + { + "epoch": 1.258479605903685, + "grad_norm": 8.056941283030028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259490 + }, + { + "epoch": 1.258528104096521, + "grad_norm": 9.047995490618632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259500 + }, + { + "epoch": 1.2585766022893572, + "grad_norm": 8.877387926986557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259510 + }, + { + "epoch": 1.2586251004821933, + "grad_norm": 8.452048518847732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259520 + }, + { + "epoch": 1.2586735986750295, + "grad_norm": 7.612504191456537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259530 + }, + { + "epoch": 1.2587220968678654, + "grad_norm": 7.530016432610864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259540 + }, + { + "epoch": 1.2587705950607015, + "grad_norm": 8.677446885485551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259550 + }, + { + "epoch": 1.2588190932535377, + "grad_norm": 9.139683356806927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259560 + }, + { + "epoch": 1.2588675914463736, + "grad_norm": 9.15143516522221e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259570 + }, + { + "epoch": 1.2589160896392098, + "grad_norm": 8.817774528324662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259580 + }, + { + "epoch": 1.258964587832046, + "grad_norm": 5.91004663874628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259590 + }, + { + "epoch": 1.259013086024882, + "grad_norm": 8.475153094877896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259600 + }, + { + "epoch": 1.2590615842177182, + "grad_norm": 8.0467185625821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259610 + }, + { + "epoch": 1.259110082410554, + "grad_norm": 8.447310051451495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259620 + }, + { + "epoch": 1.2591585806033903, + "grad_norm": 9.613339670977439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259630 + }, + { + "epoch": 1.2592070787962264, + "grad_norm": 6.408869808183226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259640 + }, + { + "epoch": 1.2592555769890623, + "grad_norm": 8.34212130484957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259650 + }, + { + "epoch": 1.2593040751818985, + "grad_norm": 7.928165928205999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259660 + }, + { + "epoch": 1.2593525733747346, + "grad_norm": 7.854242767280084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259670 + }, + { + "epoch": 1.2594010715675708, + "grad_norm": 6.42979387066589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259680 + }, + { + "epoch": 1.259449569760407, + "grad_norm": 7.536697239629575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259690 + }, + { + "epoch": 1.2594980679532428, + "grad_norm": 7.934569907774858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259700 + }, + { + "epoch": 1.259546566146079, + "grad_norm": 6.899562663420511e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259710 + }, + { + "epoch": 1.259595064338915, + "grad_norm": 7.230966616589285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259720 + }, + { + "epoch": 1.259643562531751, + "grad_norm": 9.371539704261522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259730 + }, + { + "epoch": 1.2596920607245872, + "grad_norm": 5.990405043121427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259740 + }, + { + "epoch": 1.2597405589174233, + "grad_norm": 7.505724397560698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259750 + }, + { + "epoch": 1.2597890571102595, + "grad_norm": 7.094422471709549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259760 + }, + { + "epoch": 1.2598375553030956, + "grad_norm": 7.829427772776398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259770 + }, + { + "epoch": 1.2598860534959315, + "grad_norm": 6.453302034969965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259780 + }, + { + "epoch": 1.2599345516887677, + "grad_norm": 7.296305852833029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259790 + }, + { + "epoch": 1.2599830498816038, + "grad_norm": 7.184743253674242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259800 + }, + { + "epoch": 1.26003154807444, + "grad_norm": 7.62172419399576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259810 + }, + { + "epoch": 1.260080046267276, + "grad_norm": 7.609547765241587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259820 + }, + { + "epoch": 1.260128544460112, + "grad_norm": 6.815600386289589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259830 + }, + { + "epoch": 1.2601770426529482, + "grad_norm": 5.596577921096468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259840 + }, + { + "epoch": 1.2602255408457843, + "grad_norm": 6.956501010790817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259850 + }, + { + "epoch": 1.2602740390386202, + "grad_norm": 7.351508202191326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259860 + }, + { + "epoch": 1.2603225372314564, + "grad_norm": 7.443359208991751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259870 + }, + { + "epoch": 1.2603710354242925, + "grad_norm": 6.626150934607722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259880 + }, + { + "epoch": 1.2604195336171287, + "grad_norm": 4.805864364243462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259890 + }, + { + "epoch": 1.2604680318099648, + "grad_norm": 7.920638154246262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259900 + }, + { + "epoch": 1.2605165300028007, + "grad_norm": 2.271822950206115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259910 + }, + { + "epoch": 1.2605650281956369, + "grad_norm": 8.905301456252346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259920 + }, + { + "epoch": 1.260613526388473, + "grad_norm": 6.754630703653675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259930 + }, + { + "epoch": 1.260662024581309, + "grad_norm": 5.123067694512429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259940 + }, + { + "epoch": 1.260710522774145, + "grad_norm": 6.961565190977126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259950 + }, + { + "epoch": 1.2607590209669812, + "grad_norm": 6.529770644192467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259960 + }, + { + "epoch": 1.2608075191598174, + "grad_norm": 7.139373110476299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259970 + }, + { + "epoch": 1.2608560173526535, + "grad_norm": 6.360803581628716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259980 + }, + { + "epoch": 1.2609045155454894, + "grad_norm": 4.833523234992754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 259990 + }, + { + "epoch": 1.2609530137383256, + "grad_norm": 5.796538289359887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260000 + }, + { + "epoch": 1.2610015119311617, + "grad_norm": 6.114656230238324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260010 + }, + { + "epoch": 1.2610500101239976, + "grad_norm": 5.982571451568219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260020 + }, + { + "epoch": 1.2610985083168338, + "grad_norm": 6.390962994373695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260030 + }, + { + "epoch": 1.26114700650967, + "grad_norm": 4.865086111749406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260040 + }, + { + "epoch": 1.261195504702506, + "grad_norm": 5.866913852514699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260050 + }, + { + "epoch": 1.2612440028953422, + "grad_norm": 5.614581368718063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260060 + }, + { + "epoch": 1.2612925010881781, + "grad_norm": 5.491719434758124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260070 + }, + { + "epoch": 1.2613409992810143, + "grad_norm": 5.987290023767855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260080 + }, + { + "epoch": 1.2613894974738504, + "grad_norm": 4.2622912133083446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260090 + }, + { + "epoch": 1.2614379956666864, + "grad_norm": 5.596590995082806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260100 + }, + { + "epoch": 1.2614864938595225, + "grad_norm": 5.20917012636346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260110 + }, + { + "epoch": 1.2615349920523586, + "grad_norm": 5.380454126679979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260120 + }, + { + "epoch": 1.2615834902451948, + "grad_norm": 5.330208523446345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260130 + }, + { + "epoch": 1.261631988438031, + "grad_norm": 5.827609470543393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260140 + }, + { + "epoch": 1.2616804866308668, + "grad_norm": 1.1978804650425445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260150 + }, + { + "epoch": 1.261728984823703, + "grad_norm": 6.06163382599334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260160 + }, + { + "epoch": 1.2617774830165391, + "grad_norm": 5.485916858560813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260170 + }, + { + "epoch": 1.261825981209375, + "grad_norm": 5.56372299342911e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260180 + }, + { + "epoch": 1.2618744794022112, + "grad_norm": 4.211419764033053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260190 + }, + { + "epoch": 1.2619229775950473, + "grad_norm": 5.939106131336302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260200 + }, + { + "epoch": 1.2619714757878835, + "grad_norm": 7.520548592765408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260210 + }, + { + "epoch": 1.2620199739807196, + "grad_norm": 5.4093140988698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260220 + }, + { + "epoch": 1.2620684721735556, + "grad_norm": 5.284091457724571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260230 + }, + { + "epoch": 1.2621169703663917, + "grad_norm": 4.5976042883921764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260240 + }, + { + "epoch": 1.2621654685592278, + "grad_norm": 5.533892135645146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260250 + }, + { + "epoch": 1.2622139667520638, + "grad_norm": 5.057409566688875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260260 + }, + { + "epoch": 1.2622624649449, + "grad_norm": 5.060970238446316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260270 + }, + { + "epoch": 1.262310963137736, + "grad_norm": 5.52220285499061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260280 + }, + { + "epoch": 1.2623594613305722, + "grad_norm": 3.649562074770074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260290 + }, + { + "epoch": 1.2624079595234083, + "grad_norm": 5.628942858493247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260300 + }, + { + "epoch": 1.2624564577162443, + "grad_norm": 4.990035336049914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260310 + }, + { + "epoch": 1.2625049559090804, + "grad_norm": 5.062166223979148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260320 + }, + { + "epoch": 1.2625534541019166, + "grad_norm": 4.647253319944866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260330 + }, + { + "epoch": 1.2626019522947527, + "grad_norm": 3.983589920153463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260340 + }, + { + "epoch": 1.2626504504875888, + "grad_norm": 4.5968806716700783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260350 + }, + { + "epoch": 1.2626989486804248, + "grad_norm": 4.6261325792329444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260360 + }, + { + "epoch": 1.262747446873261, + "grad_norm": 4.5920555180600786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260370 + }, + { + "epoch": 1.262795945066097, + "grad_norm": 4.3230522805970395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260380 + }, + { + "epoch": 1.262844443258933, + "grad_norm": 4.0350155927626474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260390 + }, + { + "epoch": 1.2628929414517691, + "grad_norm": 4.701539637608221e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260400 + }, + { + "epoch": 1.2629414396446053, + "grad_norm": 4.625489964382723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260410 + }, + { + "epoch": 1.2629899378374414, + "grad_norm": 4.692513186910219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260420 + }, + { + "epoch": 1.2630384360302775, + "grad_norm": 4.7460011387556733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260430 + }, + { + "epoch": 1.2630869342231135, + "grad_norm": 3.4927981573673605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260440 + }, + { + "epoch": 1.2631354324159496, + "grad_norm": 4.2774945541168563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260450 + }, + { + "epoch": 1.2631839306087858, + "grad_norm": 4.585019723890582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260460 + }, + { + "epoch": 1.2632324288016217, + "grad_norm": 4.843369652007823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260470 + }, + { + "epoch": 1.2632809269944578, + "grad_norm": 4.183487760656135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260480 + }, + { + "epoch": 1.263329425187294, + "grad_norm": 3.8921501754884957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260490 + }, + { + "epoch": 1.26337792338013, + "grad_norm": 4.450743062989204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260500 + }, + { + "epoch": 1.2634264215729663, + "grad_norm": 4.1659765770418744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260510 + }, + { + "epoch": 1.2634749197658022, + "grad_norm": 4.778570428243256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260520 + }, + { + "epoch": 1.2635234179586383, + "grad_norm": 3.778448558477976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260530 + }, + { + "epoch": 1.2635719161514745, + "grad_norm": 3.5573862078308593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260540 + }, + { + "epoch": 1.2636204143443104, + "grad_norm": 4.3613061961877975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260550 + }, + { + "epoch": 1.2636689125371465, + "grad_norm": 4.4497002704702027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260560 + }, + { + "epoch": 1.2637174107299827, + "grad_norm": 4.197265752736712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260570 + }, + { + "epoch": 1.2637659089228188, + "grad_norm": 3.5748195159612806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260580 + }, + { + "epoch": 1.263814407115655, + "grad_norm": 3.487310209493444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260590 + }, + { + "epoch": 1.2638629053084909, + "grad_norm": 4.18600137663816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260600 + }, + { + "epoch": 1.263911403501327, + "grad_norm": 4.5597690245813283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260610 + }, + { + "epoch": 1.2639599016941632, + "grad_norm": 4.68027764100043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260620 + }, + { + "epoch": 1.264008399886999, + "grad_norm": 4.671994702221127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260630 + }, + { + "epoch": 1.2640568980798352, + "grad_norm": 3.2318277476406365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260640 + }, + { + "epoch": 1.2641053962726714, + "grad_norm": 3.633842311501212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260650 + }, + { + "epoch": 1.2641538944655075, + "grad_norm": 3.9310518218371726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260660 + }, + { + "epoch": 1.2642023926583437, + "grad_norm": 4.810557925338799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260670 + }, + { + "epoch": 1.2642508908511796, + "grad_norm": 3.936998496101296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260680 + }, + { + "epoch": 1.2642993890440157, + "grad_norm": 2.9260783662721224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260690 + }, + { + "epoch": 1.2643478872368519, + "grad_norm": 4.2336992578384525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260700 + }, + { + "epoch": 1.2643963854296878, + "grad_norm": 4.1235207959289255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260710 + }, + { + "epoch": 1.264444883622524, + "grad_norm": 5.127652116243553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260720 + }, + { + "epoch": 1.26449338181536, + "grad_norm": 4.577763945690094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260730 + }, + { + "epoch": 1.2645418800081962, + "grad_norm": 3.0870202749611053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260740 + }, + { + "epoch": 1.2645903782010324, + "grad_norm": 3.4854627983804676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260750 + }, + { + "epoch": 1.2646388763938683, + "grad_norm": 3.6890344290441135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260760 + }, + { + "epoch": 1.2646873745867044, + "grad_norm": 3.9078958025129396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260770 + }, + { + "epoch": 1.2647358727795406, + "grad_norm": 4.058287856878451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260780 + }, + { + "epoch": 1.2647843709723765, + "grad_norm": 3.5276681842333346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260790 + }, + { + "epoch": 1.2648328691652129, + "grad_norm": 5.82023972128809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260800 + }, + { + "epoch": 1.2648813673580488, + "grad_norm": 3.669544526019308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260810 + }, + { + "epoch": 1.264929865550885, + "grad_norm": 3.5310858947923407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260820 + }, + { + "epoch": 1.264978363743721, + "grad_norm": 4.3055069909314625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260830 + }, + { + "epoch": 1.265026861936557, + "grad_norm": 3.0051768362682196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260840 + }, + { + "epoch": 1.2650753601293931, + "grad_norm": 3.804130699336383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260850 + }, + { + "epoch": 1.2651238583222293, + "grad_norm": 3.5559756383918284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260860 + }, + { + "epoch": 1.2651723565150654, + "grad_norm": 3.4802744153239473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260870 + }, + { + "epoch": 1.2652208547079016, + "grad_norm": 3.310150589186378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260880 + }, + { + "epoch": 1.2652693529007375, + "grad_norm": 2.579249667178374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260890 + }, + { + "epoch": 1.2653178510935736, + "grad_norm": 3.6577719697561406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260900 + }, + { + "epoch": 1.2653663492864098, + "grad_norm": 3.900202045770129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260910 + }, + { + "epoch": 1.2654148474792457, + "grad_norm": 3.55601997625854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260920 + }, + { + "epoch": 1.2654633456720819, + "grad_norm": 3.1564650271320716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260930 + }, + { + "epoch": 1.265511843864918, + "grad_norm": 3.3021208878381003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260940 + }, + { + "epoch": 1.2655603420577541, + "grad_norm": 3.1789031140760926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260950 + }, + { + "epoch": 1.2656088402505903, + "grad_norm": 3.9206631186061713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260960 + }, + { + "epoch": 1.2656573384434262, + "grad_norm": 3.135232304884994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260970 + }, + { + "epoch": 1.2657058366362623, + "grad_norm": 2.861435746126517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260980 + }, + { + "epoch": 1.2657543348290985, + "grad_norm": 2.4650140062476567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 260990 + }, + { + "epoch": 1.2658028330219344, + "grad_norm": 4.1179697518600733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261000 + }, + { + "epoch": 1.2658513312147706, + "grad_norm": 3.241669617182197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261010 + }, + { + "epoch": 1.2658998294076067, + "grad_norm": 3.5783222074314835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261020 + }, + { + "epoch": 1.2659483276004428, + "grad_norm": 3.263683368004422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261030 + }, + { + "epoch": 1.265996825793279, + "grad_norm": 2.5902352263074135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261040 + }, + { + "epoch": 1.266045323986115, + "grad_norm": 3.861454160869471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261050 + }, + { + "epoch": 1.266093822178951, + "grad_norm": 3.942886621643993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261060 + }, + { + "epoch": 1.2661423203717872, + "grad_norm": 3.2936461025201424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261070 + }, + { + "epoch": 1.2661908185646231, + "grad_norm": 3.308700513571239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261080 + }, + { + "epoch": 1.2662393167574593, + "grad_norm": 3.299891204733285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261090 + }, + { + "epoch": 1.2662878149502954, + "grad_norm": 3.532581160925474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261100 + }, + { + "epoch": 1.2663363131431316, + "grad_norm": 3.0056150990276365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261110 + }, + { + "epoch": 1.2663848113359677, + "grad_norm": 3.1593339144819765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261120 + }, + { + "epoch": 1.2664333095288036, + "grad_norm": 2.8438793719942623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261130 + }, + { + "epoch": 1.2664818077216398, + "grad_norm": 2.78581666179889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261140 + }, + { + "epoch": 1.266530305914476, + "grad_norm": 2.943320680515171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261150 + }, + { + "epoch": 1.2665788041073118, + "grad_norm": 3.4804483561856614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261160 + }, + { + "epoch": 1.266627302300148, + "grad_norm": 3.2331018928744015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261170 + }, + { + "epoch": 1.2666758004929841, + "grad_norm": 3.0105061910035147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261180 + }, + { + "epoch": 1.2667242986858203, + "grad_norm": 2.8173982968837663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261190 + }, + { + "epoch": 1.2667727968786564, + "grad_norm": 2.935477425580757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261200 + }, + { + "epoch": 1.2668212950714923, + "grad_norm": 3.095674401265569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261210 + }, + { + "epoch": 1.2668697932643285, + "grad_norm": 3.311990042220714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261220 + }, + { + "epoch": 1.2669182914571646, + "grad_norm": 3.700531294725806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261230 + }, + { + "epoch": 1.2669667896500005, + "grad_norm": 2.482838965534029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261240 + }, + { + "epoch": 1.2670152878428367, + "grad_norm": 3.5535603615244327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261250 + }, + { + "epoch": 1.2670637860356728, + "grad_norm": 2.727182106809778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261260 + }, + { + "epoch": 1.267112284228509, + "grad_norm": 3.473878393833729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261270 + }, + { + "epoch": 1.267160782421345, + "grad_norm": 3.7198634572632727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261280 + }, + { + "epoch": 1.267209280614181, + "grad_norm": 2.786746620131453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261290 + }, + { + "epoch": 1.2672577788070172, + "grad_norm": 4.059293985392287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261300 + }, + { + "epoch": 1.2673062769998533, + "grad_norm": 3.087389472966606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261310 + }, + { + "epoch": 1.2673547751926895, + "grad_norm": 3.7644792882929323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261320 + }, + { + "epoch": 1.2674032733855256, + "grad_norm": 2.825356091307185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261330 + }, + { + "epoch": 1.2674517715783615, + "grad_norm": 2.3757627332088305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261340 + }, + { + "epoch": 1.2675002697711977, + "grad_norm": 3.1203705930238357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261350 + }, + { + "epoch": 1.2675487679640338, + "grad_norm": 3.033613609204622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261360 + }, + { + "epoch": 1.2675972661568697, + "grad_norm": 3.0216028790164273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261370 + }, + { + "epoch": 1.2676457643497059, + "grad_norm": 4.825078576686792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261380 + }, + { + "epoch": 1.267694262542542, + "grad_norm": 2.463449675360607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261390 + }, + { + "epoch": 1.2677427607353782, + "grad_norm": 2.8275272256905737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261400 + }, + { + "epoch": 1.2677912589282143, + "grad_norm": 4.029836588870239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261410 + }, + { + "epoch": 1.2678397571210502, + "grad_norm": 2.8557647624438687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261420 + }, + { + "epoch": 1.2678882553138864, + "grad_norm": 2.8265287710382836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261430 + }, + { + "epoch": 1.2679367535067225, + "grad_norm": 2.4413100163656054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261440 + }, + { + "epoch": 1.2679852516995584, + "grad_norm": 3.211160901628318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261450 + }, + { + "epoch": 1.2680337498923946, + "grad_norm": 2.64397556293261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261460 + }, + { + "epoch": 1.2680822480852307, + "grad_norm": 6.187527787915315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261470 + }, + { + "epoch": 1.2681307462780669, + "grad_norm": 2.667038074832817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261480 + }, + { + "epoch": 1.268179244470903, + "grad_norm": 2.42458497723419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261490 + }, + { + "epoch": 1.268227742663739, + "grad_norm": 2.4885559923859546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261500 + }, + { + "epoch": 1.268276240856575, + "grad_norm": 2.829413006111281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261510 + }, + { + "epoch": 1.2683247390494112, + "grad_norm": 2.562403835781879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261520 + }, + { + "epoch": 1.2683732372422472, + "grad_norm": 2.5825985972005583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261530 + }, + { + "epoch": 1.2684217354350833, + "grad_norm": 2.2830099055681785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261540 + }, + { + "epoch": 1.2684702336279194, + "grad_norm": 2.7696609095073654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261550 + }, + { + "epoch": 1.2685187318207556, + "grad_norm": 2.925580702139996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261560 + }, + { + "epoch": 1.2685672300135917, + "grad_norm": 2.658331084148813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261570 + }, + { + "epoch": 1.2686157282064277, + "grad_norm": 2.479410170508345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261580 + }, + { + "epoch": 1.2686642263992638, + "grad_norm": 2.242136218910673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261590 + }, + { + "epoch": 1.2687127245921, + "grad_norm": 2.5364784050907474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261600 + }, + { + "epoch": 1.2687612227849359, + "grad_norm": 2.8255067263671663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261610 + }, + { + "epoch": 1.268809720977772, + "grad_norm": 2.494766135896498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261620 + }, + { + "epoch": 1.2688582191706081, + "grad_norm": 2.1744065747952845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261630 + }, + { + "epoch": 1.2689067173634443, + "grad_norm": 7.94086190580856e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 261640 + }, + { + "epoch": 1.2689552155562804, + "grad_norm": 3.0489249184029177e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261650 + }, + { + "epoch": 1.2690037137491164, + "grad_norm": 2.693000715225935e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261660 + }, + { + "epoch": 1.2690522119419525, + "grad_norm": 1.1716493645508308e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261670 + }, + { + "epoch": 1.2691007101347886, + "grad_norm": 7.486933554901043e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261680 + }, + { + "epoch": 1.2691492083276246, + "grad_norm": 3.764484745261143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261690 + }, + { + "epoch": 1.2691977065204607, + "grad_norm": 5.2415857680898625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261700 + }, + { + "epoch": 1.2692462047132969, + "grad_norm": 3.586299271773896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261710 + }, + { + "epoch": 1.269294702906133, + "grad_norm": 2.935405063908547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261720 + }, + { + "epoch": 1.2693432010989691, + "grad_norm": 2.2175663616508245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261730 + }, + { + "epoch": 1.269391699291805, + "grad_norm": 1.5387931853183545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261740 + }, + { + "epoch": 1.2694401974846412, + "grad_norm": 1.5523391994065605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261750 + }, + { + "epoch": 1.2694886956774774, + "grad_norm": 1.934836291184183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261760 + }, + { + "epoch": 1.2695371938703133, + "grad_norm": 3.2481716516485903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261770 + }, + { + "epoch": 1.2695856920631494, + "grad_norm": 1.7098232092394028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261780 + }, + { + "epoch": 1.2696341902559856, + "grad_norm": 1.1032277598133078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261790 + }, + { + "epoch": 1.2696826884488217, + "grad_norm": 1.2646436289287522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261800 + }, + { + "epoch": 1.2697311866416578, + "grad_norm": 1.288695330003975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261810 + }, + { + "epoch": 1.2697796848344938, + "grad_norm": 1.3401773912846693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261820 + }, + { + "epoch": 1.26982818302733, + "grad_norm": 1.0368870562160737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261830 + }, + { + "epoch": 1.269876681220166, + "grad_norm": 1.1873928542627255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261840 + }, + { + "epoch": 1.2699251794130022, + "grad_norm": 1.0554696245890227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261850 + }, + { + "epoch": 1.2699736776058383, + "grad_norm": 1.0321890613340656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261860 + }, + { + "epoch": 1.2700221757986743, + "grad_norm": 9.109107850235887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261870 + }, + { + "epoch": 1.2700706739915104, + "grad_norm": 9.450654374631995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261880 + }, + { + "epoch": 1.2701191721843466, + "grad_norm": 7.904332051111851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261890 + }, + { + "epoch": 1.2701676703771825, + "grad_norm": 1.0647215731296455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261900 + }, + { + "epoch": 1.2702161685700186, + "grad_norm": 9.14196505164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261910 + }, + { + "epoch": 1.2702646667628548, + "grad_norm": 8.243683851105743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261920 + }, + { + "epoch": 1.270313164955691, + "grad_norm": 9.531731279821543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261930 + }, + { + "epoch": 1.270361663148527, + "grad_norm": 8.119417884699942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261940 + }, + { + "epoch": 1.270410161341363, + "grad_norm": 7.467384079973272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261950 + }, + { + "epoch": 1.2704586595341991, + "grad_norm": 7.814888363100181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261960 + }, + { + "epoch": 1.2705071577270353, + "grad_norm": 2.1471328182087746e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261970 + }, + { + "epoch": 1.2705556559198712, + "grad_norm": 7.945154720800929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261980 + }, + { + "epoch": 1.2706041541127073, + "grad_norm": 8.576192271902983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 261990 + }, + { + "epoch": 1.2706526523055435, + "grad_norm": 6.22018376361666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262000 + }, + { + "epoch": 1.2707011504983796, + "grad_norm": 7.041643357297289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262010 + }, + { + "epoch": 1.2707496486912158, + "grad_norm": 6.326856691885041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262020 + }, + { + "epoch": 1.2707981468840517, + "grad_norm": 6.813759227952687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262030 + }, + { + "epoch": 1.2708466450768878, + "grad_norm": 5.781989784736652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262040 + }, + { + "epoch": 1.270895143269724, + "grad_norm": 5.53374775336124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262050 + }, + { + "epoch": 1.27094364146256, + "grad_norm": 5.597927952294413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262060 + }, + { + "epoch": 1.270992139655396, + "grad_norm": 4.829799422623182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262070 + }, + { + "epoch": 1.2710406378482322, + "grad_norm": 5.14431519604841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262080 + }, + { + "epoch": 1.2710891360410683, + "grad_norm": 5.468686481435725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262090 + }, + { + "epoch": 1.2711376342339045, + "grad_norm": 6.932068004061875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262100 + }, + { + "epoch": 1.2711861324267404, + "grad_norm": 6.710977800139517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262110 + }, + { + "epoch": 1.2712346306195765, + "grad_norm": 5.267471578918048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262120 + }, + { + "epoch": 1.2712831288124127, + "grad_norm": 9.95057689578971e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262130 + }, + { + "epoch": 1.2713316270052486, + "grad_norm": 4.25541998083645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262140 + }, + { + "epoch": 1.2713801251980847, + "grad_norm": 4.246033427079965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262150 + }, + { + "epoch": 1.2714286233909209, + "grad_norm": 4.3626684487207967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262160 + }, + { + "epoch": 1.271477121583757, + "grad_norm": 4.1369631276211294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262170 + }, + { + "epoch": 1.2715256197765932, + "grad_norm": 4.3687558104466007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262180 + }, + { + "epoch": 1.271574117969429, + "grad_norm": 4.3446740960462193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262190 + }, + { + "epoch": 1.2716226161622652, + "grad_norm": 4.097746000297775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262200 + }, + { + "epoch": 1.2716711143551014, + "grad_norm": 4.6423869548561925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262210 + }, + { + "epoch": 1.2717196125479373, + "grad_norm": 4.968208031641552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262220 + }, + { + "epoch": 1.2717681107407734, + "grad_norm": 4.3006590999539185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262230 + }, + { + "epoch": 1.2718166089336096, + "grad_norm": 3.9368072179968294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262240 + }, + { + "epoch": 1.2718651071264457, + "grad_norm": 3.697398369695293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262250 + }, + { + "epoch": 1.2719136053192819, + "grad_norm": 3.9223084513650974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262260 + }, + { + "epoch": 1.2719621035121178, + "grad_norm": 4.389376044855453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262270 + }, + { + "epoch": 1.272010601704954, + "grad_norm": 3.696980570566666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262280 + }, + { + "epoch": 1.27205909989779, + "grad_norm": 3.759296305361204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262290 + }, + { + "epoch": 1.272107598090626, + "grad_norm": 3.4150508554375847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262300 + }, + { + "epoch": 1.2721560962834622, + "grad_norm": 3.5125981412420515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262310 + }, + { + "epoch": 1.2722045944762983, + "grad_norm": 3.8615914377260196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262320 + }, + { + "epoch": 1.2722530926691344, + "grad_norm": 3.5059068181908515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262330 + }, + { + "epoch": 1.2723015908619706, + "grad_norm": 3.620643838075921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262340 + }, + { + "epoch": 1.2723500890548065, + "grad_norm": 3.7724581147813296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262350 + }, + { + "epoch": 1.2723985872476427, + "grad_norm": 3.2416974704574386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262360 + }, + { + "epoch": 1.2724470854404788, + "grad_norm": 3.363775533671287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262370 + }, + { + "epoch": 1.272495583633315, + "grad_norm": 2.9699307901864813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262380 + }, + { + "epoch": 1.272544081826151, + "grad_norm": 4.591144602272834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262390 + }, + { + "epoch": 1.272592580018987, + "grad_norm": 3.187882100519346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262400 + }, + { + "epoch": 1.2726410782118232, + "grad_norm": 2.781484624847508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262410 + }, + { + "epoch": 1.2726895764046593, + "grad_norm": 3.52296552819098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262420 + }, + { + "epoch": 1.2727380745974952, + "grad_norm": 3.0112977356111514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262430 + }, + { + "epoch": 1.2727865727903314, + "grad_norm": 3.7641308381353156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262440 + }, + { + "epoch": 1.2728350709831675, + "grad_norm": 3.1308448456002225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262450 + }, + { + "epoch": 1.2728835691760036, + "grad_norm": 2.425496859359555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262460 + }, + { + "epoch": 1.2729320673688398, + "grad_norm": 2.798255138714012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262470 + }, + { + "epoch": 1.2729805655616757, + "grad_norm": 2.5492050781394937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262480 + }, + { + "epoch": 1.2730290637545119, + "grad_norm": 3.0210225077098585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262490 + }, + { + "epoch": 1.273077561947348, + "grad_norm": 3.2908343428061926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262500 + }, + { + "epoch": 1.273126060140184, + "grad_norm": 2.8203831448081473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262510 + }, + { + "epoch": 1.27317455833302, + "grad_norm": 3.2072142630568123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262520 + }, + { + "epoch": 1.2732230565258562, + "grad_norm": 3.046598351374996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262530 + }, + { + "epoch": 1.2732715547186924, + "grad_norm": 2.86881203237499e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262540 + }, + { + "epoch": 1.2733200529115285, + "grad_norm": 2.946156598682137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262550 + }, + { + "epoch": 1.2733685511043644, + "grad_norm": 2.938806176189246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262560 + }, + { + "epoch": 1.2734170492972006, + "grad_norm": 2.836154635588173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262570 + }, + { + "epoch": 1.2734655474900367, + "grad_norm": 3.1152075052887085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262580 + }, + { + "epoch": 1.2735140456828726, + "grad_norm": 3.719946732871904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262590 + }, + { + "epoch": 1.2735625438757088, + "grad_norm": 8.497802923557174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262600 + }, + { + "epoch": 1.273611042068545, + "grad_norm": 2.5162623273899953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262610 + }, + { + "epoch": 1.273659540261381, + "grad_norm": 2.5538659542689857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262620 + }, + { + "epoch": 1.2737080384542172, + "grad_norm": 2.8330759960226715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262630 + }, + { + "epoch": 1.2737565366470531, + "grad_norm": 2.468686943757348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262640 + }, + { + "epoch": 1.2738050348398893, + "grad_norm": 2.5003109271892754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262650 + }, + { + "epoch": 1.2738535330327254, + "grad_norm": 2.3528141923634394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262660 + }, + { + "epoch": 1.2739020312255613, + "grad_norm": 4.587574267134187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262670 + }, + { + "epoch": 1.2739505294183975, + "grad_norm": 3.0410646445488965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262680 + }, + { + "epoch": 1.2739990276112336, + "grad_norm": 2.797197851123201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262690 + }, + { + "epoch": 1.2740475258040698, + "grad_norm": 2.325533614566666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262700 + }, + { + "epoch": 1.274096023996906, + "grad_norm": 2.866432282644382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262710 + }, + { + "epoch": 1.2741445221897418, + "grad_norm": 2.256278861523242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262720 + }, + { + "epoch": 1.274193020382578, + "grad_norm": 2.2112649844530097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262730 + }, + { + "epoch": 1.2742415185754141, + "grad_norm": 2.7920916068069346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262740 + }, + { + "epoch": 1.27429001676825, + "grad_norm": 2.697856587019487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262750 + }, + { + "epoch": 1.2743385149610862, + "grad_norm": 2.533280962779827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262760 + }, + { + "epoch": 1.2743870131539223, + "grad_norm": 2.633728968248761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262770 + }, + { + "epoch": 1.2744355113467585, + "grad_norm": 2.8361870363369235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262780 + }, + { + "epoch": 1.2744840095395946, + "grad_norm": 3.8839144167468476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262790 + }, + { + "epoch": 1.2745325077324305, + "grad_norm": 2.441143465148343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262800 + }, + { + "epoch": 1.2745810059252667, + "grad_norm": 1.986931721376095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262810 + }, + { + "epoch": 1.2746295041181028, + "grad_norm": 2.3354294853561441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262820 + }, + { + "epoch": 1.2746780023109388, + "grad_norm": 2.480300338447705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262830 + }, + { + "epoch": 1.274726500503775, + "grad_norm": 2.564848387009988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262840 + }, + { + "epoch": 1.274774998696611, + "grad_norm": 2.2866426263590256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262850 + }, + { + "epoch": 1.2748234968894472, + "grad_norm": 2.2640428198883455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262860 + }, + { + "epoch": 1.2748719950822833, + "grad_norm": 2.0432136693671055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262870 + }, + { + "epoch": 1.2749204932751192, + "grad_norm": 2.0569947878357198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262880 + }, + { + "epoch": 1.2749689914679554, + "grad_norm": 2.0383249932365288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262890 + }, + { + "epoch": 1.2750174896607915, + "grad_norm": 2.1675377581686917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262900 + }, + { + "epoch": 1.2750659878536277, + "grad_norm": 1.9089070235622785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262910 + }, + { + "epoch": 1.2751144860464638, + "grad_norm": 2.063375887928487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262920 + }, + { + "epoch": 1.2751629842392997, + "grad_norm": 2.0136681655458233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262930 + }, + { + "epoch": 1.275211482432136, + "grad_norm": 2.143999893178261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262940 + }, + { + "epoch": 1.275259980624972, + "grad_norm": 2.3610249400007888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262950 + }, + { + "epoch": 1.275308478817808, + "grad_norm": 1.9031297426863603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262960 + }, + { + "epoch": 1.275356977010644, + "grad_norm": 2.1254585647056956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262970 + }, + { + "epoch": 1.2754054752034802, + "grad_norm": 2.0031302483403124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262980 + }, + { + "epoch": 1.2754539733963164, + "grad_norm": 1.945570460293311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 262990 + }, + { + "epoch": 1.2755024715891525, + "grad_norm": 1.7660481432812958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263000 + }, + { + "epoch": 1.2755509697819885, + "grad_norm": 1.8737223683729098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263010 + }, + { + "epoch": 1.2755994679748246, + "grad_norm": 1.7770626925539545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263020 + }, + { + "epoch": 1.2756479661676607, + "grad_norm": 1.8201993157163088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263030 + }, + { + "epoch": 1.2756964643604967, + "grad_norm": 2.0426453772870445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263040 + }, + { + "epoch": 1.2757449625533328, + "grad_norm": 1.8436625737194845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263050 + }, + { + "epoch": 1.275793460746169, + "grad_norm": 1.7447727884700726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263060 + }, + { + "epoch": 1.275841958939005, + "grad_norm": 2.2829985368844063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263070 + }, + { + "epoch": 1.2758904571318412, + "grad_norm": 1.8597057760416646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263080 + }, + { + "epoch": 1.2759389553246772, + "grad_norm": 1.8240092458654544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263090 + }, + { + "epoch": 1.2759874535175133, + "grad_norm": 2.118734983014292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263100 + }, + { + "epoch": 1.2760359517103494, + "grad_norm": 1.6634453459118959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263110 + }, + { + "epoch": 1.2760844499031854, + "grad_norm": 1.9293372588435886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263120 + }, + { + "epoch": 1.2761329480960215, + "grad_norm": 2.3899642087599204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263130 + }, + { + "epoch": 1.2761814462888577, + "grad_norm": 1.7063028678876435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263140 + }, + { + "epoch": 1.2762299444816938, + "grad_norm": 1.7737102098180912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263150 + }, + { + "epoch": 1.27627844267453, + "grad_norm": 1.71187664932404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263160 + }, + { + "epoch": 1.2763269408673659, + "grad_norm": 1.9226176561915054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263170 + }, + { + "epoch": 1.276375439060202, + "grad_norm": 1.6642233902075532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263180 + }, + { + "epoch": 1.2764239372530382, + "grad_norm": 1.674704179777109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263190 + }, + { + "epoch": 1.276472435445874, + "grad_norm": 1.6981009309802175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263200 + }, + { + "epoch": 1.2765209336387102, + "grad_norm": 1.6114870504679857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263210 + }, + { + "epoch": 1.2765694318315464, + "grad_norm": 1.5135205444494204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263220 + }, + { + "epoch": 1.2766179300243825, + "grad_norm": 1.6704493077668303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263230 + }, + { + "epoch": 1.2766664282172187, + "grad_norm": 1.6403579650159372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263240 + }, + { + "epoch": 1.2767149264100546, + "grad_norm": 1.489275973654003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263250 + }, + { + "epoch": 1.2767634246028907, + "grad_norm": 1.5377617046397063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263260 + }, + { + "epoch": 1.2768119227957269, + "grad_norm": 1.5199744041183294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263270 + }, + { + "epoch": 1.2768604209885628, + "grad_norm": 1.5928871732739935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263280 + }, + { + "epoch": 1.276908919181399, + "grad_norm": 1.6288328197333612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263290 + }, + { + "epoch": 1.276957417374235, + "grad_norm": 2.2185879799963004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263300 + }, + { + "epoch": 1.2770059155670712, + "grad_norm": 1.8054574013603997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263310 + }, + { + "epoch": 1.2770544137599074, + "grad_norm": 1.5260793873039802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263320 + }, + { + "epoch": 1.2771029119527433, + "grad_norm": 1.5850345391754672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263330 + }, + { + "epoch": 1.2771514101455794, + "grad_norm": 1.503531734670105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263340 + }, + { + "epoch": 1.2771999083384156, + "grad_norm": 1.461772285438201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263350 + }, + { + "epoch": 1.2772484065312517, + "grad_norm": 1.8785752331496042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263360 + }, + { + "epoch": 1.2772969047240879, + "grad_norm": 6.017439773131628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263370 + }, + { + "epoch": 1.2773454029169238, + "grad_norm": 1.3919499508574518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263380 + }, + { + "epoch": 1.27739390110976, + "grad_norm": 1.522405455034459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263390 + }, + { + "epoch": 1.277442399302596, + "grad_norm": 1.5829228061647882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263400 + }, + { + "epoch": 1.277490897495432, + "grad_norm": 1.5343159986969113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263410 + }, + { + "epoch": 1.2775393956882681, + "grad_norm": 1.6870927765921806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263420 + }, + { + "epoch": 1.2775878938811043, + "grad_norm": 1.3906935691920808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263430 + }, + { + "epoch": 1.2776363920739404, + "grad_norm": 1.2915820946091117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263440 + }, + { + "epoch": 1.2776848902667766, + "grad_norm": 1.3063262827017752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263450 + }, + { + "epoch": 1.2777333884596125, + "grad_norm": 1.452601878781934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263460 + }, + { + "epoch": 1.2777818866524486, + "grad_norm": 1.51863261521612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263470 + }, + { + "epoch": 1.2778303848452848, + "grad_norm": 1.6054589480063441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263480 + }, + { + "epoch": 1.2778788830381207, + "grad_norm": 1.5615117376910348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263490 + }, + { + "epoch": 1.2779273812309568, + "grad_norm": 7.011609000073804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263500 + }, + { + "epoch": 1.277975879423793, + "grad_norm": 1.7440649457967083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263510 + }, + { + "epoch": 1.2780243776166291, + "grad_norm": 1.4156935890241584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263520 + }, + { + "epoch": 1.2780728758094653, + "grad_norm": 1.4511330448385706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263530 + }, + { + "epoch": 1.2781213740023012, + "grad_norm": 1.301207106507718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263540 + }, + { + "epoch": 1.2781698721951373, + "grad_norm": 1.5256938468155568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263550 + }, + { + "epoch": 1.2782183703879735, + "grad_norm": 1.9000562190285564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263560 + }, + { + "epoch": 1.2782668685808094, + "grad_norm": 1.3765087203410076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263570 + }, + { + "epoch": 1.2783153667736455, + "grad_norm": 1.4357939903675287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263580 + }, + { + "epoch": 1.2783638649664817, + "grad_norm": 1.5755342985812604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263590 + }, + { + "epoch": 1.2784123631593178, + "grad_norm": 1.2907939606066066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263600 + }, + { + "epoch": 1.278460861352154, + "grad_norm": 1.32917833184365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263610 + }, + { + "epoch": 1.27850935954499, + "grad_norm": 1.3370765827858122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263620 + }, + { + "epoch": 1.278557857737826, + "grad_norm": 1.4456753660851973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263630 + }, + { + "epoch": 1.2786063559306622, + "grad_norm": 1.179636228698655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263640 + }, + { + "epoch": 1.278654854123498, + "grad_norm": 1.2090738721326488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263650 + }, + { + "epoch": 1.2787033523163343, + "grad_norm": 1.345871822877598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263660 + }, + { + "epoch": 1.2787518505091704, + "grad_norm": 1.5929383323509683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263670 + }, + { + "epoch": 1.2788003487020065, + "grad_norm": 1.3061229253708007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263680 + }, + { + "epoch": 1.2788488468948427, + "grad_norm": 1.3506395646345482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263690 + }, + { + "epoch": 1.2788973450876786, + "grad_norm": 1.25606376855103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263700 + }, + { + "epoch": 1.2789458432805147, + "grad_norm": 1.1825365930917542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263710 + }, + { + "epoch": 1.278994341473351, + "grad_norm": 1.2157723006112064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263720 + }, + { + "epoch": 1.2790428396661868, + "grad_norm": 1.1225436935546895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263730 + }, + { + "epoch": 1.279091337859023, + "grad_norm": 1.2725915610189986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263740 + }, + { + "epoch": 1.279139836051859, + "grad_norm": 1.409422623055434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263750 + }, + { + "epoch": 1.2791883342446952, + "grad_norm": 1.462753971281927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263760 + }, + { + "epoch": 1.2792368324375314, + "grad_norm": 1.1878910299856216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263770 + }, + { + "epoch": 1.2792853306303673, + "grad_norm": 1.1538161714952366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263780 + }, + { + "epoch": 1.2793338288232035, + "grad_norm": 1.4519919488975574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263790 + }, + { + "epoch": 1.2793823270160396, + "grad_norm": 1.3364250150971202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263800 + }, + { + "epoch": 1.2794308252088755, + "grad_norm": 1.2204408506022446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263810 + }, + { + "epoch": 1.2794793234017117, + "grad_norm": 1.1102172692289969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263820 + }, + { + "epoch": 1.2795278215945478, + "grad_norm": 1.2141423155753728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263830 + }, + { + "epoch": 1.279576319787384, + "grad_norm": 1.0775763570336494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263840 + }, + { + "epoch": 1.27962481798022, + "grad_norm": 1.1542371680661745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263850 + }, + { + "epoch": 1.279673316173056, + "grad_norm": 1.2691850770352175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263860 + }, + { + "epoch": 1.2797218143658922, + "grad_norm": 1.2974423668765667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263870 + }, + { + "epoch": 1.2797703125587283, + "grad_norm": 1.068527524239471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263880 + }, + { + "epoch": 1.2798188107515645, + "grad_norm": 1.2594472309501725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263890 + }, + { + "epoch": 1.2798673089444006, + "grad_norm": 1.079665707948152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263900 + }, + { + "epoch": 1.2799158071372365, + "grad_norm": 1.0694777330400029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263910 + }, + { + "epoch": 1.2799643053300727, + "grad_norm": 1.0634413882826266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263920 + }, + { + "epoch": 1.2800128035229088, + "grad_norm": 1.1374305586286937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263930 + }, + { + "epoch": 1.2800613017157447, + "grad_norm": 1.0305298303592281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263940 + }, + { + "epoch": 1.2801097999085809, + "grad_norm": 1.0854823528916313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263950 + }, + { + "epoch": 1.280158298101417, + "grad_norm": 1.0721986143380491e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263960 + }, + { + "epoch": 1.2802067962942532, + "grad_norm": 1.1595984972245788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263970 + }, + { + "epoch": 1.2802552944870893, + "grad_norm": 1.0374080972042066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263980 + }, + { + "epoch": 1.2803037926799252, + "grad_norm": 1.06517866527156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 263990 + }, + { + "epoch": 1.2803522908727614, + "grad_norm": 1.0679768536192569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264000 + }, + { + "epoch": 1.2804007890655975, + "grad_norm": 1.878874087424265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264010 + }, + { + "epoch": 1.2804492872584334, + "grad_norm": 1.2723340603315592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264020 + }, + { + "epoch": 1.2804977854512696, + "grad_norm": 1.0618070689361048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264030 + }, + { + "epoch": 1.2805462836441057, + "grad_norm": 1.438427943867282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264040 + }, + { + "epoch": 1.2805947818369419, + "grad_norm": 1.1167220748120599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264050 + }, + { + "epoch": 1.280643280029778, + "grad_norm": 1.0691088192515963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264060 + }, + { + "epoch": 1.280691778222614, + "grad_norm": 9.665797762181683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264070 + }, + { + "epoch": 1.28074027641545, + "grad_norm": 1.0337515732317115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264080 + }, + { + "epoch": 1.2807887746082862, + "grad_norm": 1.1826843149265187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264090 + }, + { + "epoch": 1.2808372728011221, + "grad_norm": 1.047812361321121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264100 + }, + { + "epoch": 1.2808857709939583, + "grad_norm": 9.88571642324132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264110 + }, + { + "epoch": 1.2809342691867944, + "grad_norm": 1.0244311710039256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264120 + }, + { + "epoch": 1.2809827673796306, + "grad_norm": 9.524652000436618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264130 + }, + { + "epoch": 1.2810312655724667, + "grad_norm": 9.697684077991653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264140 + }, + { + "epoch": 1.2810797637653026, + "grad_norm": 1.0080542978130325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264150 + }, + { + "epoch": 1.2811282619581388, + "grad_norm": 9.842899828527152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264160 + }, + { + "epoch": 1.281176760150975, + "grad_norm": 1.1755769691035312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264170 + }, + { + "epoch": 1.2812252583438108, + "grad_norm": 9.850489846030541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264180 + }, + { + "epoch": 1.281273756536647, + "grad_norm": 9.390391397801068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264190 + }, + { + "epoch": 1.2813222547294831, + "grad_norm": 1.644388731847357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264200 + }, + { + "epoch": 1.2813707529223193, + "grad_norm": 9.910436205018414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264210 + }, + { + "epoch": 1.2814192511151554, + "grad_norm": 9.381347609860313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264220 + }, + { + "epoch": 1.2814677493079913, + "grad_norm": 9.845923898410547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264230 + }, + { + "epoch": 1.2815162475008275, + "grad_norm": 9.717263083075522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264240 + }, + { + "epoch": 1.2815647456936636, + "grad_norm": 9.75700444882932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264250 + }, + { + "epoch": 1.2816132438864996, + "grad_norm": 9.845430071209194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264260 + }, + { + "epoch": 1.2816617420793357, + "grad_norm": 9.240008580491121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264270 + }, + { + "epoch": 1.2817102402721718, + "grad_norm": 9.557696500905877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264280 + }, + { + "epoch": 1.281758738465008, + "grad_norm": 9.643115106428013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264290 + }, + { + "epoch": 1.2818072366578441, + "grad_norm": 9.202538819863548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264300 + }, + { + "epoch": 1.28185573485068, + "grad_norm": 9.952568547078045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264310 + }, + { + "epoch": 1.2819042330435162, + "grad_norm": 9.885874874271394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264320 + }, + { + "epoch": 1.2819527312363523, + "grad_norm": 1.0242883519140378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264330 + }, + { + "epoch": 1.2820012294291883, + "grad_norm": 9.415825985570336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264340 + }, + { + "epoch": 1.2820497276220244, + "grad_norm": 9.77193792550679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264350 + }, + { + "epoch": 1.2820982258148605, + "grad_norm": 9.17965152780198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264360 + }, + { + "epoch": 1.2821467240076967, + "grad_norm": 9.847069293300592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264370 + }, + { + "epoch": 1.2821952222005328, + "grad_norm": 9.135530376624956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264380 + }, + { + "epoch": 1.2822437203933688, + "grad_norm": 1.0687667639786014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264390 + }, + { + "epoch": 1.282292218586205, + "grad_norm": 1.1672878486024274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264400 + }, + { + "epoch": 1.282340716779041, + "grad_norm": 3.3884546724038955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264410 + }, + { + "epoch": 1.2823892149718772, + "grad_norm": 9.709231818533226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264420 + }, + { + "epoch": 1.2824377131647133, + "grad_norm": 9.105812637244526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264430 + }, + { + "epoch": 1.2824862113575493, + "grad_norm": 8.809588081248876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264440 + }, + { + "epoch": 1.2825347095503854, + "grad_norm": 9.655089172611042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264450 + }, + { + "epoch": 1.2825832077432215, + "grad_norm": 9.677417267539568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264460 + }, + { + "epoch": 1.2826317059360575, + "grad_norm": 0.00012115056597394869, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 264470 + }, + { + "epoch": 1.2826802041288936, + "grad_norm": 8.371877629542723e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264480 + }, + { + "epoch": 1.2827287023217298, + "grad_norm": 1.2177872122265399e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264490 + }, + { + "epoch": 1.282777200514566, + "grad_norm": 9.5977375167422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264500 + }, + { + "epoch": 1.282825698707402, + "grad_norm": 6.358304290188244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264510 + }, + { + "epoch": 1.282874196900238, + "grad_norm": 5.479940682562301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264520 + }, + { + "epoch": 1.282922695093074, + "grad_norm": 4.374796844786033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264530 + }, + { + "epoch": 1.2829711932859102, + "grad_norm": 3.5923403629567474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264540 + }, + { + "epoch": 1.2830196914787462, + "grad_norm": 3.89090200769715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264550 + }, + { + "epoch": 1.2830681896715823, + "grad_norm": 3.4385716389806475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264560 + }, + { + "epoch": 1.2831166878644185, + "grad_norm": 3.31126921082614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264570 + }, + { + "epoch": 1.2831651860572546, + "grad_norm": 3.175420260959072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264580 + }, + { + "epoch": 1.2832136842500907, + "grad_norm": 2.867995590349892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264590 + }, + { + "epoch": 1.2832621824429267, + "grad_norm": 2.9179507237131475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264600 + }, + { + "epoch": 1.2833106806357628, + "grad_norm": 2.850484179361956e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264610 + }, + { + "epoch": 1.283359178828599, + "grad_norm": 2.682946160348365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264620 + }, + { + "epoch": 1.2834076770214349, + "grad_norm": 2.5850308702501934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264630 + }, + { + "epoch": 1.283456175214271, + "grad_norm": 2.25260646402603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264640 + }, + { + "epoch": 1.2835046734071072, + "grad_norm": 2.3054369648889406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264650 + }, + { + "epoch": 1.2835531715999433, + "grad_norm": 2.1998760075803148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264660 + }, + { + "epoch": 1.2836016697927795, + "grad_norm": 2.040344952547457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264670 + }, + { + "epoch": 1.2836501679856154, + "grad_norm": 1.970109451576718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264680 + }, + { + "epoch": 1.2836986661784515, + "grad_norm": 1.7558589888722054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264690 + }, + { + "epoch": 1.2837471643712877, + "grad_norm": 1.759534598022583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264700 + }, + { + "epoch": 1.2837956625641236, + "grad_norm": 1.6618711242699646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264710 + }, + { + "epoch": 1.2838441607569597, + "grad_norm": 1.6176396684386418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264720 + }, + { + "epoch": 1.2838926589497959, + "grad_norm": 1.4659162843599916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264730 + }, + { + "epoch": 1.283941157142632, + "grad_norm": 1.2630206356334384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264740 + }, + { + "epoch": 1.2839896553354682, + "grad_norm": 1.2559910373965977e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264750 + }, + { + "epoch": 1.284038153528304, + "grad_norm": 1.317534383815655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264760 + }, + { + "epoch": 1.2840866517211402, + "grad_norm": 1.3364120832193294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264770 + }, + { + "epoch": 1.2841351499139764, + "grad_norm": 1.218290321958193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264780 + }, + { + "epoch": 1.2841836481068123, + "grad_norm": 1.0511689652048517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264790 + }, + { + "epoch": 1.2842321462996484, + "grad_norm": 1.091305762201955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264800 + }, + { + "epoch": 1.2842806444924846, + "grad_norm": 9.875594741970417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264810 + }, + { + "epoch": 1.2843291426853207, + "grad_norm": 9.815429393711383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264820 + }, + { + "epoch": 1.2843776408781569, + "grad_norm": 9.91385491033725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264830 + }, + { + "epoch": 1.2844261390709928, + "grad_norm": 9.386965871271968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264840 + }, + { + "epoch": 1.284474637263829, + "grad_norm": 8.810175131657161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264850 + }, + { + "epoch": 1.284523135456665, + "grad_norm": 9.479069831286324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264860 + }, + { + "epoch": 1.284571633649501, + "grad_norm": 9.513469763078319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264870 + }, + { + "epoch": 1.2846201318423371, + "grad_norm": 8.136416909110267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264880 + }, + { + "epoch": 1.2846686300351733, + "grad_norm": 7.881077408455894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264890 + }, + { + "epoch": 1.2847171282280094, + "grad_norm": 8.325623639393598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264900 + }, + { + "epoch": 1.2847656264208456, + "grad_norm": 8.432883191744622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264910 + }, + { + "epoch": 1.2848141246136815, + "grad_norm": 8.739712029637303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264920 + }, + { + "epoch": 1.2848626228065176, + "grad_norm": 7.398635943900445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264930 + }, + { + "epoch": 1.2849111209993538, + "grad_norm": 6.918565986779868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264940 + }, + { + "epoch": 1.28495961919219, + "grad_norm": 7.360422387137078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264950 + }, + { + "epoch": 1.285008117385026, + "grad_norm": 7.139440185710555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264960 + }, + { + "epoch": 1.285056615577862, + "grad_norm": 7.174516554186994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264970 + }, + { + "epoch": 1.2851051137706981, + "grad_norm": 6.719170073665737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264980 + }, + { + "epoch": 1.2851536119635343, + "grad_norm": 6.196482331688458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 264990 + }, + { + "epoch": 1.2852021101563702, + "grad_norm": 6.43392240817775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265000 + }, + { + "epoch": 1.2852506083492063, + "grad_norm": 6.584440939150227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265010 + }, + { + "epoch": 1.2852991065420425, + "grad_norm": 8.017607910915103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265020 + }, + { + "epoch": 1.2853476047348786, + "grad_norm": 7.690399570492445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265030 + }, + { + "epoch": 1.2853961029277148, + "grad_norm": 5.525342885448481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265040 + }, + { + "epoch": 1.2854446011205507, + "grad_norm": 6.060442956368206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265050 + }, + { + "epoch": 1.2854930993133868, + "grad_norm": 5.458462055685231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265060 + }, + { + "epoch": 1.285541597506223, + "grad_norm": 6.751444061592338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265070 + }, + { + "epoch": 1.285590095699059, + "grad_norm": 5.425600875241798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265080 + }, + { + "epoch": 1.285638593891895, + "grad_norm": 5.30685497324157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265090 + }, + { + "epoch": 1.2856870920847312, + "grad_norm": 5.505429498953163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265100 + }, + { + "epoch": 1.2857355902775673, + "grad_norm": 4.87713975871884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265110 + }, + { + "epoch": 1.2857840884704035, + "grad_norm": 5.240364657765895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265120 + }, + { + "epoch": 1.2858325866632394, + "grad_norm": 5.109923222335055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265130 + }, + { + "epoch": 1.2858810848560756, + "grad_norm": 4.869900749326916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265140 + }, + { + "epoch": 1.2859295830489117, + "grad_norm": 5.626542929348943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265150 + }, + { + "epoch": 1.2859780812417476, + "grad_norm": 5.066893891125801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265160 + }, + { + "epoch": 1.2860265794345838, + "grad_norm": 4.960184583069349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265170 + }, + { + "epoch": 1.28607507762742, + "grad_norm": 5.152620019543974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265180 + }, + { + "epoch": 1.286123575820256, + "grad_norm": 4.3161955431969545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265190 + }, + { + "epoch": 1.2861720740130922, + "grad_norm": 4.6175765078260156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265200 + }, + { + "epoch": 1.2862205722059281, + "grad_norm": 4.827766133530531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265210 + }, + { + "epoch": 1.2862690703987643, + "grad_norm": 4.7030798100422544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265220 + }, + { + "epoch": 1.2863175685916004, + "grad_norm": 4.6652280616399366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265230 + }, + { + "epoch": 1.2863660667844363, + "grad_norm": 4.180516270935186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265240 + }, + { + "epoch": 1.2864145649772725, + "grad_norm": 4.617863851308357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265250 + }, + { + "epoch": 1.2864630631701086, + "grad_norm": 4.966523761140706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265260 + }, + { + "epoch": 1.2865115613629448, + "grad_norm": 4.688935177910025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265270 + }, + { + "epoch": 1.286560059555781, + "grad_norm": 4.4535542542689655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265280 + }, + { + "epoch": 1.2866085577486168, + "grad_norm": 3.802589674251067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265290 + }, + { + "epoch": 1.286657055941453, + "grad_norm": 4.0873214857128914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265300 + }, + { + "epoch": 1.286705554134289, + "grad_norm": 4.251376708452881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265310 + }, + { + "epoch": 1.286754052327125, + "grad_norm": 3.97776346972023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265320 + }, + { + "epoch": 1.2868025505199612, + "grad_norm": 4.042083219246706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265330 + }, + { + "epoch": 1.2868510487127973, + "grad_norm": 3.825209375918348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265340 + }, + { + "epoch": 1.2868995469056335, + "grad_norm": 3.838416944290657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265350 + }, + { + "epoch": 1.2869480450984696, + "grad_norm": 3.6855644225397555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265360 + }, + { + "epoch": 1.2869965432913055, + "grad_norm": 3.794773135723517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265370 + }, + { + "epoch": 1.2870450414841417, + "grad_norm": 3.752399493350822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265380 + }, + { + "epoch": 1.2870935396769778, + "grad_norm": 3.325010311527876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265390 + }, + { + "epoch": 1.2871420378698137, + "grad_norm": 3.8445975292233925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265400 + }, + { + "epoch": 1.28719053606265, + "grad_norm": 3.40981728186307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265410 + }, + { + "epoch": 1.287239034255486, + "grad_norm": 3.6379259427121724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265420 + }, + { + "epoch": 1.2872875324483222, + "grad_norm": 3.3829326184786623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265430 + }, + { + "epoch": 1.2873360306411583, + "grad_norm": 3.9647017047172994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265440 + }, + { + "epoch": 1.2873845288339942, + "grad_norm": 3.726985937646532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265450 + }, + { + "epoch": 1.2874330270268304, + "grad_norm": 3.5960243849331164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265460 + }, + { + "epoch": 1.2874815252196665, + "grad_norm": 3.563169741482852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265470 + }, + { + "epoch": 1.2875300234125027, + "grad_norm": 3.6679657000604493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265480 + }, + { + "epoch": 1.2875785216053388, + "grad_norm": 3.185051582477172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265490 + }, + { + "epoch": 1.2876270197981747, + "grad_norm": 3.175975678004761e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265500 + }, + { + "epoch": 1.2876755179910109, + "grad_norm": 3.4901378853646747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265510 + }, + { + "epoch": 1.287724016183847, + "grad_norm": 3.334330926918483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265520 + }, + { + "epoch": 1.287772514376683, + "grad_norm": 3.2383815096181934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265530 + }, + { + "epoch": 1.287821012569519, + "grad_norm": 2.886295646931103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265540 + }, + { + "epoch": 1.2878695107623552, + "grad_norm": 3.2453280596200784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265550 + }, + { + "epoch": 1.2879180089551914, + "grad_norm": 3.0850031862428295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265560 + }, + { + "epoch": 1.2879665071480275, + "grad_norm": 4.5371393753157463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265570 + }, + { + "epoch": 1.2880150053408634, + "grad_norm": 3.1243649800671847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265580 + }, + { + "epoch": 1.2880635035336996, + "grad_norm": 3.09466742010045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265590 + }, + { + "epoch": 1.2881120017265357, + "grad_norm": 3.230824177080649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265600 + }, + { + "epoch": 1.2881604999193716, + "grad_norm": 2.9438240289891837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265610 + }, + { + "epoch": 1.2882089981122078, + "grad_norm": 2.8521188255581365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265620 + }, + { + "epoch": 1.288257496305044, + "grad_norm": 3.69733413663198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265630 + }, + { + "epoch": 1.28830599449788, + "grad_norm": 2.6680208975449204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265640 + }, + { + "epoch": 1.2883544926907162, + "grad_norm": 2.920417614404869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265650 + }, + { + "epoch": 1.2884029908835521, + "grad_norm": 3.2390826731898414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265660 + }, + { + "epoch": 1.2884514890763883, + "grad_norm": 2.941855825611128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265670 + }, + { + "epoch": 1.2884999872692244, + "grad_norm": 2.812406023622316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265680 + }, + { + "epoch": 1.2885484854620604, + "grad_norm": 2.686972493393114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265690 + }, + { + "epoch": 1.2885969836548965, + "grad_norm": 2.6872388048104767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265700 + }, + { + "epoch": 1.2886454818477326, + "grad_norm": 2.7482354880703497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265710 + }, + { + "epoch": 1.2886939800405688, + "grad_norm": 2.72867595185744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265720 + }, + { + "epoch": 1.288742478233405, + "grad_norm": 2.41475106577127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265730 + }, + { + "epoch": 1.2887909764262409, + "grad_norm": 2.1869047373002104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265740 + }, + { + "epoch": 1.288839474619077, + "grad_norm": 2.5003438963722147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265750 + }, + { + "epoch": 1.2888879728119131, + "grad_norm": 2.589631549199112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265760 + }, + { + "epoch": 1.288936471004749, + "grad_norm": 2.4861719793989323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265770 + }, + { + "epoch": 1.2889849691975852, + "grad_norm": 3.3530164955664077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265780 + }, + { + "epoch": 1.2890334673904214, + "grad_norm": 2.3435728735421435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265790 + }, + { + "epoch": 1.2890819655832575, + "grad_norm": 2.3336960452979838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265800 + }, + { + "epoch": 1.2891304637760936, + "grad_norm": 2.4806044507386105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265810 + }, + { + "epoch": 1.2891789619689296, + "grad_norm": 2.611914453609643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265820 + }, + { + "epoch": 1.2892274601617657, + "grad_norm": 2.4196995696001977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265830 + }, + { + "epoch": 1.2892759583546018, + "grad_norm": 2.682759543404245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265840 + }, + { + "epoch": 1.2893244565474378, + "grad_norm": 2.448208249461459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265850 + }, + { + "epoch": 1.289372954740274, + "grad_norm": 2.3030500528875564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265860 + }, + { + "epoch": 1.28942145293311, + "grad_norm": 2.3366261814317113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265870 + }, + { + "epoch": 1.2894699511259462, + "grad_norm": 2.3114813529900857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265880 + }, + { + "epoch": 1.2895184493187823, + "grad_norm": 2.0138534750913095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265890 + }, + { + "epoch": 1.2895669475116183, + "grad_norm": 2.3082087352577219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265900 + }, + { + "epoch": 1.2896154457044544, + "grad_norm": 2.0791379995444004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265910 + }, + { + "epoch": 1.2896639438972906, + "grad_norm": 2.1142869854884339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265920 + }, + { + "epoch": 1.2897124420901267, + "grad_norm": 2.170853861116484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265930 + }, + { + "epoch": 1.2897609402829628, + "grad_norm": 2.005545383099161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265940 + }, + { + "epoch": 1.2898094384757988, + "grad_norm": 2.038837436657559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265950 + }, + { + "epoch": 1.289857936668635, + "grad_norm": 2.209743428238653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265960 + }, + { + "epoch": 1.289906434861471, + "grad_norm": 2.0660817767748085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265970 + }, + { + "epoch": 1.289954933054307, + "grad_norm": 2.2481128780782456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265980 + }, + { + "epoch": 1.2900034312471431, + "grad_norm": 1.8642130328316853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 265990 + }, + { + "epoch": 1.2900519294399793, + "grad_norm": 2.1021011775701481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266000 + }, + { + "epoch": 1.2901004276328154, + "grad_norm": 2.1784383363865345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266010 + }, + { + "epoch": 1.2901489258256515, + "grad_norm": 2.091636872592062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266020 + }, + { + "epoch": 1.2901974240184875, + "grad_norm": 1.8319244077247276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266030 + }, + { + "epoch": 1.2902459222113236, + "grad_norm": 1.9208441415230482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266040 + }, + { + "epoch": 1.2902944204041598, + "grad_norm": 1.9291755393169296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266050 + }, + { + "epoch": 1.2903429185969957, + "grad_norm": 2.2201012939149223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266060 + }, + { + "epoch": 1.2903914167898318, + "grad_norm": 1.9834452302802674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266070 + }, + { + "epoch": 1.290439914982668, + "grad_norm": 2.073203546615332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266080 + }, + { + "epoch": 1.2904884131755041, + "grad_norm": 2.6637582095645485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266090 + }, + { + "epoch": 1.2905369113683403, + "grad_norm": 1.9782082461006212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266100 + }, + { + "epoch": 1.2905854095611762, + "grad_norm": 1.9267527306965349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266110 + }, + { + "epoch": 1.2906339077540123, + "grad_norm": 2.023956398033988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266120 + }, + { + "epoch": 1.2906824059468485, + "grad_norm": 1.8347093089232658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266130 + }, + { + "epoch": 1.2907309041396844, + "grad_norm": 1.9244025395437347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266140 + }, + { + "epoch": 1.2907794023325205, + "grad_norm": 2.119056574656497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266150 + }, + { + "epoch": 1.2908279005253567, + "grad_norm": 1.9574683562950668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266160 + }, + { + "epoch": 1.2908763987181928, + "grad_norm": 1.8253504663334752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266170 + }, + { + "epoch": 1.290924896911029, + "grad_norm": 1.9928637584598619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266180 + }, + { + "epoch": 1.2909733951038649, + "grad_norm": 1.8783975974656641e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266190 + }, + { + "epoch": 1.291021893296701, + "grad_norm": 1.872093093879812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266200 + }, + { + "epoch": 1.2910703914895372, + "grad_norm": 1.878213282680008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266210 + }, + { + "epoch": 1.291118889682373, + "grad_norm": 1.8507465426864655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266220 + }, + { + "epoch": 1.2911673878752092, + "grad_norm": 1.8053364669867733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266230 + }, + { + "epoch": 1.2912158860680454, + "grad_norm": 1.6440456818145321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266240 + }, + { + "epoch": 1.2912643842608815, + "grad_norm": 1.720821813933071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266250 + }, + { + "epoch": 1.2913128824537177, + "grad_norm": 1.7538420138407673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266260 + }, + { + "epoch": 1.2913613806465536, + "grad_norm": 1.887983813730898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266270 + }, + { + "epoch": 1.2914098788393897, + "grad_norm": 1.7405324115316034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266280 + }, + { + "epoch": 1.2914583770322259, + "grad_norm": 1.687696169483388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266290 + }, + { + "epoch": 1.2915068752250618, + "grad_norm": 1.6919153722483316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266300 + }, + { + "epoch": 1.291555373417898, + "grad_norm": 1.6973781669094024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266310 + }, + { + "epoch": 1.291603871610734, + "grad_norm": 1.728666632061504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266320 + }, + { + "epoch": 1.2916523698035702, + "grad_norm": 1.7014561137784767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266330 + }, + { + "epoch": 1.2917008679964064, + "grad_norm": 1.792288202295822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266340 + }, + { + "epoch": 1.2917493661892423, + "grad_norm": 1.662868669427553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266350 + }, + { + "epoch": 1.2917978643820784, + "grad_norm": 1.777116409584778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266360 + }, + { + "epoch": 1.2918463625749146, + "grad_norm": 1.7516204309231398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266370 + }, + { + "epoch": 1.2918948607677505, + "grad_norm": 2.894748263315705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266380 + }, + { + "epoch": 1.2919433589605867, + "grad_norm": 1.6057546758929675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266390 + }, + { + "epoch": 1.2919918571534228, + "grad_norm": 1.7779822769625753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266400 + }, + { + "epoch": 1.292040355346259, + "grad_norm": 1.7535933238832513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266410 + }, + { + "epoch": 1.292088853539095, + "grad_norm": 1.7625929160658416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266420 + }, + { + "epoch": 1.292137351731931, + "grad_norm": 1.6969332250482694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266430 + }, + { + "epoch": 1.2921858499247671, + "grad_norm": 1.5181674939412915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266440 + }, + { + "epoch": 1.2922343481176033, + "grad_norm": 1.7174568256450584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266450 + }, + { + "epoch": 1.2922828463104394, + "grad_norm": 1.6265214242139336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266460 + }, + { + "epoch": 1.2923313445032756, + "grad_norm": 1.7028942522756552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266470 + }, + { + "epoch": 1.2923798426961115, + "grad_norm": 1.5470958203422924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266480 + }, + { + "epoch": 1.2924283408889476, + "grad_norm": 1.58188882437571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266490 + }, + { + "epoch": 1.2924768390817838, + "grad_norm": 1.6518869472292863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266500 + }, + { + "epoch": 1.2925253372746197, + "grad_norm": 1.649744945098064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266510 + }, + { + "epoch": 1.2925738354674559, + "grad_norm": 1.599728562950986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266520 + }, + { + "epoch": 1.292622333660292, + "grad_norm": 1.7320218148597633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266530 + }, + { + "epoch": 1.2926708318531281, + "grad_norm": 1.5792038254858198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266540 + }, + { + "epoch": 1.2927193300459643, + "grad_norm": 1.5604278758019063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266550 + }, + { + "epoch": 1.2927678282388002, + "grad_norm": 1.7110924943608552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266560 + }, + { + "epoch": 1.2928163264316364, + "grad_norm": 1.6742670538860693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266570 + }, + { + "epoch": 1.2928648246244725, + "grad_norm": 1.7071742774987797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266580 + }, + { + "epoch": 1.2929133228173084, + "grad_norm": 1.5289514010419225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266590 + }, + { + "epoch": 1.2929618210101446, + "grad_norm": 1.6101557775982656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266600 + }, + { + "epoch": 1.2930103192029807, + "grad_norm": 1.5509959894188796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266610 + }, + { + "epoch": 1.2930588173958169, + "grad_norm": 1.5834612554499472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266620 + }, + { + "epoch": 1.293107315588653, + "grad_norm": 1.6750129816500703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266630 + }, + { + "epoch": 1.293155813781489, + "grad_norm": 1.573704651036678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266640 + }, + { + "epoch": 1.293204311974325, + "grad_norm": 1.563406755167307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266650 + }, + { + "epoch": 1.2932528101671612, + "grad_norm": 1.5802999087100034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266660 + }, + { + "epoch": 1.2933013083599971, + "grad_norm": 1.6585568118898664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266670 + }, + { + "epoch": 1.2933498065528333, + "grad_norm": 1.5306385137137113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266680 + }, + { + "epoch": 1.2933983047456694, + "grad_norm": 1.509600622284779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266690 + }, + { + "epoch": 1.2934468029385056, + "grad_norm": 1.5493031924052048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266700 + }, + { + "epoch": 1.2934953011313417, + "grad_norm": 1.550697135144219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266710 + }, + { + "epoch": 1.2935437993241776, + "grad_norm": 1.5014629184406658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266720 + }, + { + "epoch": 1.2935922975170138, + "grad_norm": 1.6548062831134303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266730 + }, + { + "epoch": 1.29364079570985, + "grad_norm": 1.4626479583057517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266740 + }, + { + "epoch": 1.2936892939026858, + "grad_norm": 1.5369383277175075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266750 + }, + { + "epoch": 1.293737792095522, + "grad_norm": 1.5259740848705405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266760 + }, + { + "epoch": 1.2937862902883581, + "grad_norm": 1.6123181012517307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266770 + }, + { + "epoch": 1.2938347884811943, + "grad_norm": 1.7450221889703244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266780 + }, + { + "epoch": 1.2938832866740304, + "grad_norm": 1.467867321025551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266790 + }, + { + "epoch": 1.2939317848668663, + "grad_norm": 1.5696116406616056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266800 + }, + { + "epoch": 1.2939802830597025, + "grad_norm": 1.6482621845170797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266810 + }, + { + "epoch": 1.2940287812525386, + "grad_norm": 1.4789675617521425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266820 + }, + { + "epoch": 1.2940772794453745, + "grad_norm": 1.4508226797715906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266830 + }, + { + "epoch": 1.2941257776382107, + "grad_norm": 1.4897710798322805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266840 + }, + { + "epoch": 1.2941742758310468, + "grad_norm": 1.4508691492665093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266850 + }, + { + "epoch": 1.294222774023883, + "grad_norm": 1.4791584135309677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266860 + }, + { + "epoch": 1.2942712722167191, + "grad_norm": 1.6436634098226932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266870 + }, + { + "epoch": 1.294319770409555, + "grad_norm": 1.4890332522554672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266880 + }, + { + "epoch": 1.2943682686023912, + "grad_norm": 1.3975940760246885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266890 + }, + { + "epoch": 1.2944167667952273, + "grad_norm": 1.4940812320674013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266900 + }, + { + "epoch": 1.2944652649880632, + "grad_norm": 1.446499595658679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266910 + }, + { + "epoch": 1.2945137631808994, + "grad_norm": 1.4711299911596143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266920 + }, + { + "epoch": 1.2945622613737355, + "grad_norm": 1.4089586386489827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266930 + }, + { + "epoch": 1.2946107595665717, + "grad_norm": 1.3231458240170468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266940 + }, + { + "epoch": 1.2946592577594078, + "grad_norm": 1.4127998326785018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266950 + }, + { + "epoch": 1.2947077559522437, + "grad_norm": 1.6198094954233966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266960 + }, + { + "epoch": 1.2947562541450799, + "grad_norm": 1.3736163850808225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266970 + }, + { + "epoch": 1.294804752337916, + "grad_norm": 1.4357071620452189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266980 + }, + { + "epoch": 1.2948532505307522, + "grad_norm": 1.488214138589683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 266990 + }, + { + "epoch": 1.2949017487235883, + "grad_norm": 1.3508383744920138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267000 + }, + { + "epoch": 1.2949502469164242, + "grad_norm": 1.327994709754421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267010 + }, + { + "epoch": 1.2949987451092604, + "grad_norm": 1.5587357893309672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267020 + }, + { + "epoch": 1.2950472433020965, + "grad_norm": 1.3364262940740446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267030 + }, + { + "epoch": 1.2950957414949325, + "grad_norm": 1.2886151523616718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267040 + }, + { + "epoch": 1.2951442396877686, + "grad_norm": 1.277941663602178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267050 + }, + { + "epoch": 1.2951927378806047, + "grad_norm": 1.2830135176500335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267060 + }, + { + "epoch": 1.2952412360734409, + "grad_norm": 1.3329933779004932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267070 + }, + { + "epoch": 1.295289734266277, + "grad_norm": 1.2354510658951767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267080 + }, + { + "epoch": 1.295338232459113, + "grad_norm": 1.2791228698461055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267090 + }, + { + "epoch": 1.295386730651949, + "grad_norm": 1.2893194423213572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267100 + }, + { + "epoch": 1.2954352288447852, + "grad_norm": 1.286560689095495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267110 + }, + { + "epoch": 1.2954837270376212, + "grad_norm": 1.2299223328682274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267120 + }, + { + "epoch": 1.2955322252304573, + "grad_norm": 1.45821161368076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267130 + }, + { + "epoch": 1.2955807234232934, + "grad_norm": 1.2882561861715658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267140 + }, + { + "epoch": 1.2956292216161296, + "grad_norm": 1.5669485264879768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267150 + }, + { + "epoch": 1.2956777198089657, + "grad_norm": 1.2058016807259264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267160 + }, + { + "epoch": 1.2957262180018017, + "grad_norm": 1.3591447611815966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267170 + }, + { + "epoch": 1.2957747161946378, + "grad_norm": 1.0739623235167528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267180 + }, + { + "epoch": 1.295823214387474, + "grad_norm": 1.0903875136136776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267190 + }, + { + "epoch": 1.2958717125803099, + "grad_norm": 1.19867806347429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267200 + }, + { + "epoch": 1.295920210773146, + "grad_norm": 1.1398660859640586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267210 + }, + { + "epoch": 1.2959687089659822, + "grad_norm": 1.1347342621093048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267220 + }, + { + "epoch": 1.2960172071588183, + "grad_norm": 1.048016642357652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267230 + }, + { + "epoch": 1.2960657053516544, + "grad_norm": 1.0312857057215297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267240 + }, + { + "epoch": 1.2961142035444904, + "grad_norm": 1.185188978070073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267250 + }, + { + "epoch": 1.2961627017373265, + "grad_norm": 1.935352997861628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267260 + }, + { + "epoch": 1.2962111999301626, + "grad_norm": 1.0682238382742071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267270 + }, + { + "epoch": 1.2962596981229986, + "grad_norm": 1.0791084292804953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267280 + }, + { + "epoch": 1.2963081963158347, + "grad_norm": 9.395744626772284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267290 + }, + { + "epoch": 1.2963566945086709, + "grad_norm": 1.1689758139254991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267300 + }, + { + "epoch": 1.296405192701507, + "grad_norm": 1.0248262327650082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267310 + }, + { + "epoch": 1.2964536908943431, + "grad_norm": 9.961257063650919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267320 + }, + { + "epoch": 1.296502189087179, + "grad_norm": 1.0983850273760254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267330 + }, + { + "epoch": 1.2965506872800152, + "grad_norm": 8.955941410704327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267340 + }, + { + "epoch": 1.2965991854728514, + "grad_norm": 1.0492684765495142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267350 + }, + { + "epoch": 1.2966476836656873, + "grad_norm": 9.266256739692835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267360 + }, + { + "epoch": 1.2966961818585234, + "grad_norm": 9.538560874489121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267370 + }, + { + "epoch": 1.2967446800513596, + "grad_norm": 9.536685041666715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267380 + }, + { + "epoch": 1.2967931782441957, + "grad_norm": 7.731399165322728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267390 + }, + { + "epoch": 1.2968416764370319, + "grad_norm": 9.35131367896247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267400 + }, + { + "epoch": 1.2968901746298678, + "grad_norm": 8.739541357272174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267410 + }, + { + "epoch": 1.296938672822704, + "grad_norm": 9.446120685652204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267420 + }, + { + "epoch": 1.29698717101554, + "grad_norm": 8.822207320235975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267430 + }, + { + "epoch": 1.297035669208376, + "grad_norm": 7.769911292143661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267440 + }, + { + "epoch": 1.2970841674012121, + "grad_norm": 9.497674113845278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267450 + }, + { + "epoch": 1.2971326655940483, + "grad_norm": 8.72493686188136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267460 + }, + { + "epoch": 1.2971811637868844, + "grad_norm": 9.600469041970427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267470 + }, + { + "epoch": 1.2972296619797206, + "grad_norm": 9.117768229316425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267480 + }, + { + "epoch": 1.2972781601725565, + "grad_norm": 9.239128218041515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267490 + }, + { + "epoch": 1.2973266583653926, + "grad_norm": 8.289180897236292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267500 + }, + { + "epoch": 1.2973751565582288, + "grad_norm": 8.453919519979536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267510 + }, + { + "epoch": 1.297423654751065, + "grad_norm": 8.955385766284962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267520 + }, + { + "epoch": 1.297472152943901, + "grad_norm": 9.101143660927846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267530 + }, + { + "epoch": 1.297520651136737, + "grad_norm": 7.334590179652878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267540 + }, + { + "epoch": 1.2975691493295731, + "grad_norm": 8.242484028642139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267550 + }, + { + "epoch": 1.2976176475224093, + "grad_norm": 8.226152914403428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267560 + }, + { + "epoch": 1.2976661457152452, + "grad_norm": 7.814041680376249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267570 + }, + { + "epoch": 1.2977146439080813, + "grad_norm": 7.748032260224136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267580 + }, + { + "epoch": 1.2977631421009175, + "grad_norm": 6.557506537774316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267590 + }, + { + "epoch": 1.2978116402937536, + "grad_norm": 9.566199565824718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267600 + }, + { + "epoch": 1.2978601384865898, + "grad_norm": 8.801034567795796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267610 + }, + { + "epoch": 1.2979086366794257, + "grad_norm": 7.741648033743331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267620 + }, + { + "epoch": 1.2979571348722618, + "grad_norm": 7.63200418418819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267630 + }, + { + "epoch": 1.298005633065098, + "grad_norm": 6.630521198758288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267640 + }, + { + "epoch": 1.298054131257934, + "grad_norm": 9.950624502153005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267650 + }, + { + "epoch": 1.29810262945077, + "grad_norm": 8.161024567243658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267660 + }, + { + "epoch": 1.2981511276436062, + "grad_norm": 8.12960863072476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267670 + }, + { + "epoch": 1.2981996258364423, + "grad_norm": 7.93289274270137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267680 + }, + { + "epoch": 1.2982481240292785, + "grad_norm": 6.083807591039658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267690 + }, + { + "epoch": 1.2982966222221144, + "grad_norm": 7.338358187780614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267700 + }, + { + "epoch": 1.2983451204149505, + "grad_norm": 7.928493062081543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267710 + }, + { + "epoch": 1.2983936186077867, + "grad_norm": 7.74339454778783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267720 + }, + { + "epoch": 1.2984421168006226, + "grad_norm": 8.128643713689598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267730 + }, + { + "epoch": 1.2984906149934587, + "grad_norm": 6.378987649213741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267740 + }, + { + "epoch": 1.298539113186295, + "grad_norm": 7.48559543239935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267750 + }, + { + "epoch": 1.298587611379131, + "grad_norm": 7.652764821841629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267760 + }, + { + "epoch": 1.2986361095719672, + "grad_norm": 8.939860407508604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267770 + }, + { + "epoch": 1.298684607764803, + "grad_norm": 7.370978494236624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267780 + }, + { + "epoch": 1.2987331059576392, + "grad_norm": 5.975249450784759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267790 + }, + { + "epoch": 1.2987816041504754, + "grad_norm": 7.278309510638792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267800 + }, + { + "epoch": 1.2988301023433113, + "grad_norm": 8.244501259468962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267810 + }, + { + "epoch": 1.2988786005361475, + "grad_norm": 7.302402593722945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267820 + }, + { + "epoch": 1.2989270987289836, + "grad_norm": 6.597456803092427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267830 + }, + { + "epoch": 1.2989755969218197, + "grad_norm": 5.963265437003429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267840 + }, + { + "epoch": 1.2990240951146559, + "grad_norm": 9.431572323137516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267850 + }, + { + "epoch": 1.2990725933074918, + "grad_norm": 7.149952097051937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267860 + }, + { + "epoch": 1.299121091500328, + "grad_norm": 7.291025383437955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267870 + }, + { + "epoch": 1.299169589693164, + "grad_norm": 1.0553655016565244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267880 + }, + { + "epoch": 1.299218087886, + "grad_norm": 5.384234924576958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267890 + }, + { + "epoch": 1.2992665860788362, + "grad_norm": 6.961403897776108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267900 + }, + { + "epoch": 1.2993150842716723, + "grad_norm": 6.684841480364412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267910 + }, + { + "epoch": 1.2993635824645084, + "grad_norm": 6.895837145748374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267920 + }, + { + "epoch": 1.2994120806573446, + "grad_norm": 6.927031392933714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267930 + }, + { + "epoch": 1.2994605788501805, + "grad_norm": 5.771821065536642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267940 + }, + { + "epoch": 1.2995090770430167, + "grad_norm": 6.49055067469817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267950 + }, + { + "epoch": 1.2995575752358528, + "grad_norm": 6.477215208633424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267960 + }, + { + "epoch": 1.299606073428689, + "grad_norm": 6.43544382228356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267970 + }, + { + "epoch": 1.299654571621525, + "grad_norm": 5.677074454979447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267980 + }, + { + "epoch": 1.299703069814361, + "grad_norm": 5.2625974689135546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 267990 + }, + { + "epoch": 1.2997515680071972, + "grad_norm": 6.407871211422389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268000 + }, + { + "epoch": 1.2998000662000333, + "grad_norm": 7.064758733577037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268010 + }, + { + "epoch": 1.2998485643928692, + "grad_norm": 6.920252815234562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268020 + }, + { + "epoch": 1.2998970625857054, + "grad_norm": 5.504423583602147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268030 + }, + { + "epoch": 1.2999455607785415, + "grad_norm": 5.208610076579134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268040 + }, + { + "epoch": 1.2999940589713777, + "grad_norm": 1.4416571048059268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268050 + }, + { + "epoch": 1.3000425571642138, + "grad_norm": 6.504506444571234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268060 + }, + { + "epoch": 1.3000910553570497, + "grad_norm": 6.450748202269097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268070 + }, + { + "epoch": 1.3001395535498859, + "grad_norm": 6.183270073734093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268080 + }, + { + "epoch": 1.300188051742722, + "grad_norm": 5.413124881670228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268090 + }, + { + "epoch": 1.300236549935558, + "grad_norm": 5.96482720993663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268100 + }, + { + "epoch": 1.300285048128394, + "grad_norm": 6.366359173171077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268110 + }, + { + "epoch": 1.3003335463212302, + "grad_norm": 6.032616539641822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268120 + }, + { + "epoch": 1.3003820445140664, + "grad_norm": 6.075762115642647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268130 + }, + { + "epoch": 1.3004305427069025, + "grad_norm": 5.229156840869109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268140 + }, + { + "epoch": 1.3004790408997384, + "grad_norm": 5.990580120851519e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268150 + }, + { + "epoch": 1.3005275390925746, + "grad_norm": 6.533627328053626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268160 + }, + { + "epoch": 1.3005760372854107, + "grad_norm": 6.441272404345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268170 + }, + { + "epoch": 1.3006245354782466, + "grad_norm": 6.643025329822194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268180 + }, + { + "epoch": 1.3006730336710828, + "grad_norm": 4.950513599055739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268190 + }, + { + "epoch": 1.300721531863919, + "grad_norm": 6.023910970043289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268200 + }, + { + "epoch": 1.300770030056755, + "grad_norm": 5.789416945845005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268210 + }, + { + "epoch": 1.3008185282495912, + "grad_norm": 6.477064573573443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268220 + }, + { + "epoch": 1.3008670264424271, + "grad_norm": 6.413817743577965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268230 + }, + { + "epoch": 1.3009155246352633, + "grad_norm": 5.4038650887378026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268240 + }, + { + "epoch": 1.3009640228280994, + "grad_norm": 6.147175923842951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268250 + }, + { + "epoch": 1.3010125210209353, + "grad_norm": 5.644221801048843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268260 + }, + { + "epoch": 1.3010610192137715, + "grad_norm": 7.074978469745474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268270 + }, + { + "epoch": 1.3011095174066076, + "grad_norm": 5.909353006927631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268280 + }, + { + "epoch": 1.3011580155994438, + "grad_norm": 4.988023505347883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268290 + }, + { + "epoch": 1.30120651379228, + "grad_norm": 5.821886972512402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268300 + }, + { + "epoch": 1.3012550119851158, + "grad_norm": 8.282937358217168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268310 + }, + { + "epoch": 1.301303510177952, + "grad_norm": 5.856971441176029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268320 + }, + { + "epoch": 1.3013520083707881, + "grad_norm": 5.951234882672907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268330 + }, + { + "epoch": 1.301400506563624, + "grad_norm": 5.13120674838774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268340 + }, + { + "epoch": 1.3014490047564602, + "grad_norm": 5.5989463731975775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268350 + }, + { + "epoch": 1.3014975029492963, + "grad_norm": 5.393989610524841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268360 + }, + { + "epoch": 1.3015460011421325, + "grad_norm": 5.760650978459125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268370 + }, + { + "epoch": 1.3015944993349686, + "grad_norm": 4.851855095466817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268380 + }, + { + "epoch": 1.3016429975278045, + "grad_norm": 4.980907775120613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268390 + }, + { + "epoch": 1.3016914957206407, + "grad_norm": 6.07860215495748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268400 + }, + { + "epoch": 1.3017399939134768, + "grad_norm": 5.125349034074134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268410 + }, + { + "epoch": 1.3017884921063128, + "grad_norm": 6.056160373191233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268420 + }, + { + "epoch": 1.301836990299149, + "grad_norm": 6.062368385073569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268430 + }, + { + "epoch": 1.301885488491985, + "grad_norm": 4.715226253892979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268440 + }, + { + "epoch": 1.3019339866848212, + "grad_norm": 5.413317083480251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268450 + }, + { + "epoch": 1.3019824848776573, + "grad_norm": 6.431489651959055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268460 + }, + { + "epoch": 1.3020309830704933, + "grad_norm": 5.2111179371649996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268470 + }, + { + "epoch": 1.3020794812633294, + "grad_norm": 4.789873742083728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268480 + }, + { + "epoch": 1.3021279794561655, + "grad_norm": 4.7174605555255766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268490 + }, + { + "epoch": 1.3021764776490017, + "grad_norm": 5.556043447541015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268500 + }, + { + "epoch": 1.3022249758418378, + "grad_norm": 5.6455156993706623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268510 + }, + { + "epoch": 1.3022734740346738, + "grad_norm": 5.412974601881615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268520 + }, + { + "epoch": 1.30232197222751, + "grad_norm": 6.064111346404388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268530 + }, + { + "epoch": 1.302370470420346, + "grad_norm": 5.4379501790435825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268540 + }, + { + "epoch": 1.302418968613182, + "grad_norm": 5.1899615272077426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268550 + }, + { + "epoch": 1.302467466806018, + "grad_norm": 5.636729483171621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268560 + }, + { + "epoch": 1.3025159649988542, + "grad_norm": 5.299905581068742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268570 + }, + { + "epoch": 1.3025644631916904, + "grad_norm": 5.0365571269139764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268580 + }, + { + "epoch": 1.3026129613845265, + "grad_norm": 4.69523655510784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268590 + }, + { + "epoch": 1.3026614595773625, + "grad_norm": 5.933100766242205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268600 + }, + { + "epoch": 1.3027099577701986, + "grad_norm": 6.516606276818493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268610 + }, + { + "epoch": 1.3027584559630347, + "grad_norm": 5.432096017443655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268620 + }, + { + "epoch": 1.3028069541558707, + "grad_norm": 5.3545623046602486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268630 + }, + { + "epoch": 1.3028554523487068, + "grad_norm": 4.36483809096444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268640 + }, + { + "epoch": 1.302903950541543, + "grad_norm": 5.493199495276713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268650 + }, + { + "epoch": 1.302952448734379, + "grad_norm": 5.104307376768702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268660 + }, + { + "epoch": 1.3030009469272152, + "grad_norm": 5.491548904501542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268670 + }, + { + "epoch": 1.3030494451200512, + "grad_norm": 4.9764608434088586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268680 + }, + { + "epoch": 1.3030979433128873, + "grad_norm": 4.5330924791642246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268690 + }, + { + "epoch": 1.3031464415057235, + "grad_norm": 5.151571969008728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268700 + }, + { + "epoch": 1.3031949396985594, + "grad_norm": 7.122073242271654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268710 + }, + { + "epoch": 1.3032434378913955, + "grad_norm": 5.3374908048908765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268720 + }, + { + "epoch": 1.3032919360842317, + "grad_norm": 5.152143245368279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268730 + }, + { + "epoch": 1.3033404342770678, + "grad_norm": 4.373508133426185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268740 + }, + { + "epoch": 1.303388932469904, + "grad_norm": 4.8891315884702635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268750 + }, + { + "epoch": 1.3034374306627399, + "grad_norm": 4.943773390664319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268760 + }, + { + "epoch": 1.303485928855576, + "grad_norm": 5.4255988146678646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268770 + }, + { + "epoch": 1.3035344270484122, + "grad_norm": 5.188903529074196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268780 + }, + { + "epoch": 1.303582925241248, + "grad_norm": 4.364456529515337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268790 + }, + { + "epoch": 1.3036314234340842, + "grad_norm": 5.066033637035616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268800 + }, + { + "epoch": 1.3036799216269204, + "grad_norm": 5.3685901946209924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268810 + }, + { + "epoch": 1.3037284198197565, + "grad_norm": 5.0175977150956896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268820 + }, + { + "epoch": 1.3037769180125927, + "grad_norm": 4.777421125368164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268830 + }, + { + "epoch": 1.3038254162054286, + "grad_norm": 4.337874415227816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268840 + }, + { + "epoch": 1.3038739143982647, + "grad_norm": 5.196890384695507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268850 + }, + { + "epoch": 1.3039224125911009, + "grad_norm": 5.106057088255511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268860 + }, + { + "epoch": 1.3039709107839368, + "grad_norm": 5.005565739679696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268870 + }, + { + "epoch": 1.304019408976773, + "grad_norm": 5.3414481726576923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268880 + }, + { + "epoch": 1.304067907169609, + "grad_norm": 4.058322389255409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268890 + }, + { + "epoch": 1.3041164053624452, + "grad_norm": 5.385316370620785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268900 + }, + { + "epoch": 1.3041649035552814, + "grad_norm": 4.871504444281527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268910 + }, + { + "epoch": 1.3042134017481173, + "grad_norm": 4.713313828119681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268920 + }, + { + "epoch": 1.3042618999409534, + "grad_norm": 4.3012523320840046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268930 + }, + { + "epoch": 1.3043103981337896, + "grad_norm": 4.2491869578498154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268940 + }, + { + "epoch": 1.3043588963266255, + "grad_norm": 4.978962309110102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268950 + }, + { + "epoch": 1.3044073945194616, + "grad_norm": 5.086334553539018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268960 + }, + { + "epoch": 1.3044558927122978, + "grad_norm": 5.373961542431971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268970 + }, + { + "epoch": 1.304504390905134, + "grad_norm": 4.379461771009119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268980 + }, + { + "epoch": 1.30455288909797, + "grad_norm": 4.1413048990079915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 268990 + }, + { + "epoch": 1.304601387290806, + "grad_norm": 4.870695491376864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269000 + }, + { + "epoch": 1.3046498854836421, + "grad_norm": 5.015408177655445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269010 + }, + { + "epoch": 1.3046983836764783, + "grad_norm": 5.090455701406427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269020 + }, + { + "epoch": 1.3047468818693144, + "grad_norm": 4.3703611396495035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269030 + }, + { + "epoch": 1.3047953800621506, + "grad_norm": 4.048986212978889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269040 + }, + { + "epoch": 1.3048438782549865, + "grad_norm": 4.534525288590885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269050 + }, + { + "epoch": 1.3048923764478226, + "grad_norm": 4.5149491256779584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269060 + }, + { + "epoch": 1.3049408746406588, + "grad_norm": 4.9612285835110015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269070 + }, + { + "epoch": 1.3049893728334947, + "grad_norm": 4.706859613179404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269080 + }, + { + "epoch": 1.3050378710263308, + "grad_norm": 3.663670256059959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269090 + }, + { + "epoch": 1.305086369219167, + "grad_norm": 4.7483943887982605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269100 + }, + { + "epoch": 1.3051348674120031, + "grad_norm": 4.921828633541736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269110 + }, + { + "epoch": 1.3051833656048393, + "grad_norm": 4.750928184193981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269120 + }, + { + "epoch": 1.3052318637976752, + "grad_norm": 5.123049717781214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269130 + }, + { + "epoch": 1.3052803619905113, + "grad_norm": 3.98636466059088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269140 + }, + { + "epoch": 1.3053288601833475, + "grad_norm": 4.3741074762237986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269150 + }, + { + "epoch": 1.3053773583761834, + "grad_norm": 4.4878593286057367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269160 + }, + { + "epoch": 1.3054258565690195, + "grad_norm": 4.928833163830859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269170 + }, + { + "epoch": 1.3054743547618557, + "grad_norm": 4.499261763157847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269180 + }, + { + "epoch": 1.3055228529546918, + "grad_norm": 4.019314303604915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269190 + }, + { + "epoch": 1.305571351147528, + "grad_norm": 4.3994578646788796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269200 + }, + { + "epoch": 1.305619849340364, + "grad_norm": 4.5811901827619295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269210 + }, + { + "epoch": 1.3056683475332, + "grad_norm": 4.920213925174721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269220 + }, + { + "epoch": 1.3057168457260362, + "grad_norm": 3.892107258707256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269230 + }, + { + "epoch": 1.3057653439188721, + "grad_norm": 3.948332150116585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269240 + }, + { + "epoch": 1.3058138421117083, + "grad_norm": 4.538042830404265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269250 + }, + { + "epoch": 1.3058623403045444, + "grad_norm": 4.505362127815715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269260 + }, + { + "epoch": 1.3059108384973805, + "grad_norm": 4.6176978685252834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269270 + }, + { + "epoch": 1.3059593366902167, + "grad_norm": 4.480572712850517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269280 + }, + { + "epoch": 1.3060078348830526, + "grad_norm": 4.570774336798422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269290 + }, + { + "epoch": 1.3060563330758888, + "grad_norm": 4.289987742822632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269300 + }, + { + "epoch": 1.306104831268725, + "grad_norm": 4.638998163386532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269310 + }, + { + "epoch": 1.3061533294615608, + "grad_norm": 4.853940183124905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269320 + }, + { + "epoch": 1.306201827654397, + "grad_norm": 5.8987847495473034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269330 + }, + { + "epoch": 1.306250325847233, + "grad_norm": 3.854666630331849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269340 + }, + { + "epoch": 1.3062988240400693, + "grad_norm": 4.3011045391949665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269350 + }, + { + "epoch": 1.3063473222329054, + "grad_norm": 4.286009769316479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269360 + }, + { + "epoch": 1.3063958204257413, + "grad_norm": 4.23768149460102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269370 + }, + { + "epoch": 1.3064443186185775, + "grad_norm": 4.0515612198532835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269380 + }, + { + "epoch": 1.3064928168114136, + "grad_norm": 3.692212402484074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269390 + }, + { + "epoch": 1.3065413150042495, + "grad_norm": 4.2995825566549684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269400 + }, + { + "epoch": 1.3065898131970857, + "grad_norm": 4.4193736670194994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269410 + }, + { + "epoch": 1.3066383113899218, + "grad_norm": 8.128014883368451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269420 + }, + { + "epoch": 1.306686809582758, + "grad_norm": 4.8970218102795116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269430 + }, + { + "epoch": 1.306735307775594, + "grad_norm": 3.715679497418023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269440 + }, + { + "epoch": 1.30678380596843, + "grad_norm": 4.453135815651876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269450 + }, + { + "epoch": 1.3068323041612662, + "grad_norm": 5.659833846038964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269460 + }, + { + "epoch": 1.3068808023541023, + "grad_norm": 4.8007983366460394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269470 + }, + { + "epoch": 1.3069293005469382, + "grad_norm": 3.986831131896906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269480 + }, + { + "epoch": 1.3069777987397744, + "grad_norm": 3.7329396462837394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269490 + }, + { + "epoch": 1.3070262969326105, + "grad_norm": 4.066689029968984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269500 + }, + { + "epoch": 1.3070747951254467, + "grad_norm": 4.549847076873448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269510 + }, + { + "epoch": 1.3071232933182828, + "grad_norm": 4.304803269405966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269520 + }, + { + "epoch": 1.3071717915111187, + "grad_norm": 4.00158128854855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269530 + }, + { + "epoch": 1.3072202897039549, + "grad_norm": 3.8508350286292625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269540 + }, + { + "epoch": 1.307268787896791, + "grad_norm": 4.202954784204849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269550 + }, + { + "epoch": 1.3073172860896272, + "grad_norm": 3.994204078594521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269560 + }, + { + "epoch": 1.3073657842824633, + "grad_norm": 3.9091268888569175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269570 + }, + { + "epoch": 1.3074142824752992, + "grad_norm": 3.4996450182234184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269580 + }, + { + "epoch": 1.3074627806681354, + "grad_norm": 3.523373592884127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269590 + }, + { + "epoch": 1.3075112788609715, + "grad_norm": 4.2230276164900715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269600 + }, + { + "epoch": 1.3075597770538074, + "grad_norm": 4.1181909438137154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269610 + }, + { + "epoch": 1.3076082752466436, + "grad_norm": 3.840256113107898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269620 + }, + { + "epoch": 1.3076567734394797, + "grad_norm": 4.384378726740579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269630 + }, + { + "epoch": 1.3077052716323159, + "grad_norm": 3.445904894761043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269640 + }, + { + "epoch": 1.307753769825152, + "grad_norm": 3.960619210374716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269650 + }, + { + "epoch": 1.307802268017988, + "grad_norm": 3.939943127306833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269660 + }, + { + "epoch": 1.307850766210824, + "grad_norm": 3.896559164218161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269670 + }, + { + "epoch": 1.3078992644036602, + "grad_norm": 4.1225568736535934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269680 + }, + { + "epoch": 1.3079477625964961, + "grad_norm": 3.4590620145991124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269690 + }, + { + "epoch": 1.3079962607893323, + "grad_norm": 4.406642517551518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269700 + }, + { + "epoch": 1.3080447589821684, + "grad_norm": 4.579553447570106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269710 + }, + { + "epoch": 1.3080932571750046, + "grad_norm": 3.9099234072637046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269720 + }, + { + "epoch": 1.3081417553678407, + "grad_norm": 4.1387341553900114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269730 + }, + { + "epoch": 1.3081902535606766, + "grad_norm": 3.59143257355754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269740 + }, + { + "epoch": 1.3082387517535128, + "grad_norm": 4.344393644828415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269750 + }, + { + "epoch": 1.308287249946349, + "grad_norm": 4.116406060461486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269760 + }, + { + "epoch": 1.3083357481391849, + "grad_norm": 4.68909959749908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269770 + }, + { + "epoch": 1.308384246332021, + "grad_norm": 3.858151842450752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269780 + }, + { + "epoch": 1.3084327445248571, + "grad_norm": 3.575234330810417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269790 + }, + { + "epoch": 1.3084812427176933, + "grad_norm": 3.8632041565733743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269800 + }, + { + "epoch": 1.3085297409105294, + "grad_norm": 3.964894546015785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269810 + }, + { + "epoch": 1.3085782391033653, + "grad_norm": 3.920667879242501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269820 + }, + { + "epoch": 1.3086267372962015, + "grad_norm": 3.676065674085294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269830 + }, + { + "epoch": 1.3086752354890376, + "grad_norm": 3.4233419654583486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269840 + }, + { + "epoch": 1.3087237336818736, + "grad_norm": 4.166401623706406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269850 + }, + { + "epoch": 1.3087722318747097, + "grad_norm": 3.964562367286817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269860 + }, + { + "epoch": 1.3088207300675458, + "grad_norm": 3.89259753319493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269870 + }, + { + "epoch": 1.308869228260382, + "grad_norm": 3.8838685156861175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269880 + }, + { + "epoch": 1.3089177264532181, + "grad_norm": 4.1426261532251374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269890 + }, + { + "epoch": 1.308966224646054, + "grad_norm": 3.84196354730193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269900 + }, + { + "epoch": 1.3090147228388902, + "grad_norm": 3.768061773712361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269910 + }, + { + "epoch": 1.3090632210317263, + "grad_norm": 4.1358429569982036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269920 + }, + { + "epoch": 1.3091117192245623, + "grad_norm": 3.7679100728382764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269930 + }, + { + "epoch": 1.3091602174173984, + "grad_norm": 3.492960587436755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269940 + }, + { + "epoch": 1.3092087156102346, + "grad_norm": 4.217353932745027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269950 + }, + { + "epoch": 1.3092572138030707, + "grad_norm": 4.046824741976707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269960 + }, + { + "epoch": 1.3093057119959068, + "grad_norm": 3.674880844073414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269970 + }, + { + "epoch": 1.3093542101887428, + "grad_norm": 3.596486664037002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269980 + }, + { + "epoch": 1.309402708381579, + "grad_norm": 3.307787821427155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 269990 + }, + { + "epoch": 1.309451206574415, + "grad_norm": 3.8384509792877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270000 + }, + { + "epoch": 1.309499704767251, + "grad_norm": 3.9175834132265663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270010 + }, + { + "epoch": 1.3095482029600873, + "grad_norm": 4.510226503384729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270020 + }, + { + "epoch": 1.3095967011529233, + "grad_norm": 3.4081484301395903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270030 + }, + { + "epoch": 1.3096451993457594, + "grad_norm": 3.4534554771425974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270040 + }, + { + "epoch": 1.3096936975385955, + "grad_norm": 3.621779498530486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270050 + }, + { + "epoch": 1.3097421957314315, + "grad_norm": 3.560902683830136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270060 + }, + { + "epoch": 1.3097906939242676, + "grad_norm": 3.4660750714010646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270070 + }, + { + "epoch": 1.3098391921171038, + "grad_norm": 3.9373023952293806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270080 + }, + { + "epoch": 1.30988769030994, + "grad_norm": 3.467602738282949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270090 + }, + { + "epoch": 1.309936188502776, + "grad_norm": 3.7328682367387955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270100 + }, + { + "epoch": 1.309984686695612, + "grad_norm": 3.813508797634313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270110 + }, + { + "epoch": 1.310033184888448, + "grad_norm": 3.417308391817642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270120 + }, + { + "epoch": 1.3100816830812843, + "grad_norm": 3.8508293442873764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270130 + }, + { + "epoch": 1.3101301812741202, + "grad_norm": 3.374393386934571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270140 + }, + { + "epoch": 1.3101786794669563, + "grad_norm": 3.8009901004443236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270150 + }, + { + "epoch": 1.3102271776597925, + "grad_norm": 3.6374768086488984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270160 + }, + { + "epoch": 1.3102756758526286, + "grad_norm": 3.9391633066543363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270170 + }, + { + "epoch": 1.3103241740454648, + "grad_norm": 3.466693243581176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270180 + }, + { + "epoch": 1.3103726722383007, + "grad_norm": 3.4500697410067005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270190 + }, + { + "epoch": 1.3104211704311368, + "grad_norm": 3.701747530726607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270200 + }, + { + "epoch": 1.310469668623973, + "grad_norm": 3.745729770798789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270210 + }, + { + "epoch": 1.3105181668168089, + "grad_norm": 3.760142419650947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270220 + }, + { + "epoch": 1.310566665009645, + "grad_norm": 3.484224819771953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270230 + }, + { + "epoch": 1.3106151632024812, + "grad_norm": 3.635289047565493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270240 + }, + { + "epoch": 1.3106636613953173, + "grad_norm": 3.555675220923149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270250 + }, + { + "epoch": 1.3107121595881535, + "grad_norm": 3.593751785047061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270260 + }, + { + "epoch": 1.3107606577809894, + "grad_norm": 3.615652133248659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270270 + }, + { + "epoch": 1.3108091559738255, + "grad_norm": 3.399764736400357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270280 + }, + { + "epoch": 1.3108576541666617, + "grad_norm": 3.386697144946993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270290 + }, + { + "epoch": 1.3109061523594976, + "grad_norm": 3.586601948768475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270300 + }, + { + "epoch": 1.3109546505523337, + "grad_norm": 3.4266243176261924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270310 + }, + { + "epoch": 1.3110031487451699, + "grad_norm": 3.387163971524387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270320 + }, + { + "epoch": 1.311051646938006, + "grad_norm": 3.373779478010874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270330 + }, + { + "epoch": 1.3111001451308422, + "grad_norm": 3.362217881885954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270340 + }, + { + "epoch": 1.311148643323678, + "grad_norm": 3.429782680086646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270350 + }, + { + "epoch": 1.3111971415165142, + "grad_norm": 3.7154642029690876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270360 + }, + { + "epoch": 1.3112456397093504, + "grad_norm": 3.771479839542735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270370 + }, + { + "epoch": 1.3112941379021863, + "grad_norm": 3.553889627028184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270380 + }, + { + "epoch": 1.3113426360950224, + "grad_norm": 3.874954401794639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270390 + }, + { + "epoch": 1.3113911342878586, + "grad_norm": 3.378794843911237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270400 + }, + { + "epoch": 1.3114396324806947, + "grad_norm": 3.462921682739761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270410 + }, + { + "epoch": 1.3114881306735309, + "grad_norm": 3.565302009178595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270420 + }, + { + "epoch": 1.3115366288663668, + "grad_norm": 3.5405562925916456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270430 + }, + { + "epoch": 1.311585127059203, + "grad_norm": 3.4162244588742396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270440 + }, + { + "epoch": 1.311633625252039, + "grad_norm": 3.51652218455456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270450 + }, + { + "epoch": 1.311682123444875, + "grad_norm": 3.3650618291858336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270460 + }, + { + "epoch": 1.3117306216377111, + "grad_norm": 3.531307513071624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270470 + }, + { + "epoch": 1.3117791198305473, + "grad_norm": 3.401092030230757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270480 + }, + { + "epoch": 1.3118276180233834, + "grad_norm": 3.4662683390251914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270490 + }, + { + "epoch": 1.3118761162162196, + "grad_norm": 3.475821941378854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270500 + }, + { + "epoch": 1.3119246144090555, + "grad_norm": 3.513162027957151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270510 + }, + { + "epoch": 1.3119731126018916, + "grad_norm": 3.4068058596403716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270520 + }, + { + "epoch": 1.3120216107947278, + "grad_norm": 3.278476867762947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270530 + }, + { + "epoch": 1.312070108987564, + "grad_norm": 3.6368749789517096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270540 + }, + { + "epoch": 1.3121186071804, + "grad_norm": 3.309445872901051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270550 + }, + { + "epoch": 1.312167105373236, + "grad_norm": 3.7600134561444065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270560 + }, + { + "epoch": 1.3122156035660721, + "grad_norm": 3.6860239305269715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270570 + }, + { + "epoch": 1.3122641017589083, + "grad_norm": 3.2968745955486156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270580 + }, + { + "epoch": 1.3123125999517442, + "grad_norm": 3.70827777373961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270590 + }, + { + "epoch": 1.3123610981445804, + "grad_norm": 3.34300658266784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270600 + }, + { + "epoch": 1.3124095963374165, + "grad_norm": 3.4993693276419435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270610 + }, + { + "epoch": 1.3124580945302526, + "grad_norm": 3.338377041472995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270620 + }, + { + "epoch": 1.3125065927230888, + "grad_norm": 3.464983677758937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270630 + }, + { + "epoch": 1.3125550909159247, + "grad_norm": 3.68978483322735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270640 + }, + { + "epoch": 1.3126035891087608, + "grad_norm": 3.439977547259332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270650 + }, + { + "epoch": 1.312652087301597, + "grad_norm": 3.325479269733478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270660 + }, + { + "epoch": 1.312700585494433, + "grad_norm": 3.189491337707295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270670 + }, + { + "epoch": 1.312749083687269, + "grad_norm": 3.237230572494809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270680 + }, + { + "epoch": 1.3127975818801052, + "grad_norm": 3.797961056761778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270690 + }, + { + "epoch": 1.3128460800729413, + "grad_norm": 3.4073131871537043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270700 + }, + { + "epoch": 1.3128945782657775, + "grad_norm": 3.4774728874253924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270710 + }, + { + "epoch": 1.3129430764586134, + "grad_norm": 3.5624797334321556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270720 + }, + { + "epoch": 1.3129915746514496, + "grad_norm": 3.415290450448083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270730 + }, + { + "epoch": 1.3130400728442857, + "grad_norm": 3.6822715543394224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270740 + }, + { + "epoch": 1.3130885710371216, + "grad_norm": 3.7768874250332374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270750 + }, + { + "epoch": 1.3131370692299578, + "grad_norm": 3.413966354059994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270760 + }, + { + "epoch": 1.313185567422794, + "grad_norm": 3.8519786471624684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270770 + }, + { + "epoch": 1.31323406561563, + "grad_norm": 3.356975142310148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270780 + }, + { + "epoch": 1.3132825638084662, + "grad_norm": 3.972357021098105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270790 + }, + { + "epoch": 1.3133310620013021, + "grad_norm": 3.341951781976604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270800 + }, + { + "epoch": 1.3133795601941383, + "grad_norm": 3.4147113581184385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270810 + }, + { + "epoch": 1.3134280583869744, + "grad_norm": 3.306483975507035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270820 + }, + { + "epoch": 1.3134765565798103, + "grad_norm": 3.1537030764638985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270830 + }, + { + "epoch": 1.3135250547726465, + "grad_norm": 3.949794802338147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270840 + }, + { + "epoch": 1.3135735529654826, + "grad_norm": 3.6047339335709694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270850 + }, + { + "epoch": 1.3136220511583188, + "grad_norm": 3.425798666967239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270860 + }, + { + "epoch": 1.313670549351155, + "grad_norm": 3.362311673527074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270870 + }, + { + "epoch": 1.3137190475439908, + "grad_norm": 3.088226563363605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270880 + }, + { + "epoch": 1.313767545736827, + "grad_norm": 4.0646849441827726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270890 + }, + { + "epoch": 1.3138160439296631, + "grad_norm": 3.464412756670754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270900 + }, + { + "epoch": 1.313864542122499, + "grad_norm": 3.266582027094955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270910 + }, + { + "epoch": 1.3139130403153352, + "grad_norm": 3.649731183941185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270920 + }, + { + "epoch": 1.3139615385081713, + "grad_norm": 3.3492458584305496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270930 + }, + { + "epoch": 1.3140100367010075, + "grad_norm": 4.2550215795245094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270940 + }, + { + "epoch": 1.3140585348938436, + "grad_norm": 3.240476686983129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270950 + }, + { + "epoch": 1.3141070330866795, + "grad_norm": 3.4413439209401986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270960 + }, + { + "epoch": 1.3141555312795157, + "grad_norm": 3.488965916176312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270970 + }, + { + "epoch": 1.3142040294723518, + "grad_norm": 3.387469860172132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270980 + }, + { + "epoch": 1.3142525276651877, + "grad_norm": 4.452529012155537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 270990 + }, + { + "epoch": 1.3143010258580239, + "grad_norm": 9.878161222331983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271000 + }, + { + "epoch": 1.31434952405086, + "grad_norm": 4.5487489330753306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271010 + }, + { + "epoch": 1.3143980222436962, + "grad_norm": 1.0419681757412036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271020 + }, + { + "epoch": 1.3144465204365323, + "grad_norm": 3.883778276758676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271030 + }, + { + "epoch": 1.3144950186293682, + "grad_norm": 4.455485935750403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271040 + }, + { + "epoch": 1.3145435168222044, + "grad_norm": 3.259465941596318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271050 + }, + { + "epoch": 1.3145920150150405, + "grad_norm": 3.287395955453576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271060 + }, + { + "epoch": 1.3146405132078767, + "grad_norm": 3.370795198520682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271070 + }, + { + "epoch": 1.3146890114007128, + "grad_norm": 3.654813340858709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271080 + }, + { + "epoch": 1.3147375095935487, + "grad_norm": 4.390500762951888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271090 + }, + { + "epoch": 1.3147860077863849, + "grad_norm": 3.320500496784007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271100 + }, + { + "epoch": 1.314834505979221, + "grad_norm": 3.237378365383847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271110 + }, + { + "epoch": 1.314883004172057, + "grad_norm": 3.432178274920261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271120 + }, + { + "epoch": 1.314931502364893, + "grad_norm": 3.2335268684846596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271130 + }, + { + "epoch": 1.3149800005577292, + "grad_norm": 4.582648926998445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271140 + }, + { + "epoch": 1.3150284987505654, + "grad_norm": 3.323841823998919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271150 + }, + { + "epoch": 1.3150769969434015, + "grad_norm": 3.256529623740789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271160 + }, + { + "epoch": 1.3151254951362374, + "grad_norm": 3.3395252785339835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271170 + }, + { + "epoch": 1.3151739933290736, + "grad_norm": 3.391545178033084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271180 + }, + { + "epoch": 1.3152224915219097, + "grad_norm": 3.762733769008264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271190 + }, + { + "epoch": 1.3152709897147457, + "grad_norm": 3.304563023220908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271200 + }, + { + "epoch": 1.3153194879075818, + "grad_norm": 3.3261557064179215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271210 + }, + { + "epoch": 1.315367986100418, + "grad_norm": 3.321584784998777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271220 + }, + { + "epoch": 1.315416484293254, + "grad_norm": 3.18319273162615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271230 + }, + { + "epoch": 1.3154649824860902, + "grad_norm": 3.578362850475969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271240 + }, + { + "epoch": 1.3155134806789262, + "grad_norm": 3.196964115659284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271250 + }, + { + "epoch": 1.3155619788717623, + "grad_norm": 3.326803010850199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271260 + }, + { + "epoch": 1.3156104770645984, + "grad_norm": 3.2675057326514434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271270 + }, + { + "epoch": 1.3156589752574344, + "grad_norm": 2.986332248156032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271280 + }, + { + "epoch": 1.3157074734502705, + "grad_norm": 3.263259884533909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271290 + }, + { + "epoch": 1.3157559716431066, + "grad_norm": 3.068122467198009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271300 + }, + { + "epoch": 1.3158044698359428, + "grad_norm": 2.9573818949302222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271310 + }, + { + "epoch": 1.315852968028779, + "grad_norm": 3.225487432700902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271320 + }, + { + "epoch": 1.3159014662216149, + "grad_norm": 3.166442397173341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271330 + }, + { + "epoch": 1.315949964414451, + "grad_norm": 3.045861873829381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271340 + }, + { + "epoch": 1.3159984626072871, + "grad_norm": 2.9580586868860337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271350 + }, + { + "epoch": 1.316046960800123, + "grad_norm": 3.026120865001758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271360 + }, + { + "epoch": 1.3160954589929592, + "grad_norm": 3.005121484989104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271370 + }, + { + "epoch": 1.3161439571857954, + "grad_norm": 3.214694999087442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271380 + }, + { + "epoch": 1.3161924553786315, + "grad_norm": 3.1501194541760924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271390 + }, + { + "epoch": 1.3162409535714676, + "grad_norm": 3.14760271180603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271400 + }, + { + "epoch": 1.3162894517643036, + "grad_norm": 2.796296882934257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271410 + }, + { + "epoch": 1.3163379499571397, + "grad_norm": 2.909394503092244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271420 + }, + { + "epoch": 1.3163864481499759, + "grad_norm": 2.9862672334957097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271430 + }, + { + "epoch": 1.3164349463428118, + "grad_norm": 3.038882567807377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271440 + }, + { + "epoch": 1.316483444535648, + "grad_norm": 2.9113030208804958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271450 + }, + { + "epoch": 1.316531942728484, + "grad_norm": 3.098415390923037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271460 + }, + { + "epoch": 1.3165804409213202, + "grad_norm": 2.9887729624533677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271470 + }, + { + "epoch": 1.3166289391141563, + "grad_norm": 2.806210375183582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271480 + }, + { + "epoch": 1.3166774373069923, + "grad_norm": 3.0083313617979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271490 + }, + { + "epoch": 1.3167259354998284, + "grad_norm": 2.967663270680987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271500 + }, + { + "epoch": 1.3167744336926646, + "grad_norm": 3.5266861431182406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271510 + }, + { + "epoch": 1.3168229318855005, + "grad_norm": 2.755311534485827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271520 + }, + { + "epoch": 1.3168714300783366, + "grad_norm": 5.2724786314684025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271530 + }, + { + "epoch": 1.3169199282711728, + "grad_norm": 2.91877242375449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271540 + }, + { + "epoch": 1.316968426464009, + "grad_norm": 2.702902079931846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271550 + }, + { + "epoch": 1.317016924656845, + "grad_norm": 2.8662688222880206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271560 + }, + { + "epoch": 1.317065422849681, + "grad_norm": 2.8114850891824972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271570 + }, + { + "epoch": 1.3171139210425171, + "grad_norm": 2.8460311440881014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271580 + }, + { + "epoch": 1.3171624192353533, + "grad_norm": 3.9111561989102483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271590 + }, + { + "epoch": 1.3172109174281894, + "grad_norm": 3.0340004286699696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271600 + }, + { + "epoch": 1.3172594156210256, + "grad_norm": 2.8437289856242387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271610 + }, + { + "epoch": 1.3173079138138615, + "grad_norm": 3.338303145028476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271620 + }, + { + "epoch": 1.3173564120066976, + "grad_norm": 2.830885570404007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271630 + }, + { + "epoch": 1.3174049101995338, + "grad_norm": 3.195835773794897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271640 + }, + { + "epoch": 1.3174534083923697, + "grad_norm": 2.5653106305867368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271650 + }, + { + "epoch": 1.3175019065852058, + "grad_norm": 3.336277032417456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271660 + }, + { + "epoch": 1.317550404778042, + "grad_norm": 2.7699652349610915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271670 + }, + { + "epoch": 1.3175989029708781, + "grad_norm": 2.6949154019462185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271680 + }, + { + "epoch": 1.3176474011637143, + "grad_norm": 2.9723626226996203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271690 + }, + { + "epoch": 1.3176958993565502, + "grad_norm": 3.0006173545871206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271700 + }, + { + "epoch": 1.3177443975493863, + "grad_norm": 2.842916124734529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271710 + }, + { + "epoch": 1.3177928957422225, + "grad_norm": 2.7421922510484364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271720 + }, + { + "epoch": 1.3178413939350584, + "grad_norm": 2.6830951682654813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271730 + }, + { + "epoch": 1.3178898921278945, + "grad_norm": 3.074772436661988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271740 + }, + { + "epoch": 1.3179383903207307, + "grad_norm": 2.794623554791542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271750 + }, + { + "epoch": 1.3179868885135668, + "grad_norm": 2.639272977944529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271760 + }, + { + "epoch": 1.318035386706403, + "grad_norm": 2.8312209465752858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271770 + }, + { + "epoch": 1.3180838848992389, + "grad_norm": 2.9810042434519346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271780 + }, + { + "epoch": 1.318132383092075, + "grad_norm": 3.0266580353099926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271790 + }, + { + "epoch": 1.3181808812849112, + "grad_norm": 2.7537753410911137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271800 + }, + { + "epoch": 1.318229379477747, + "grad_norm": 2.748232574845133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271810 + }, + { + "epoch": 1.3182778776705832, + "grad_norm": 2.6499005656432928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271820 + }, + { + "epoch": 1.3183263758634194, + "grad_norm": 2.6252790163994177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271830 + }, + { + "epoch": 1.3183748740562555, + "grad_norm": 2.9841501714145124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271840 + }, + { + "epoch": 1.3184233722490917, + "grad_norm": 2.6398986108233657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271850 + }, + { + "epoch": 1.3184718704419276, + "grad_norm": 2.6741211911485152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271860 + }, + { + "epoch": 1.3185203686347637, + "grad_norm": 2.750608274482147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271870 + }, + { + "epoch": 1.3185688668275999, + "grad_norm": 2.6848681500268867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271880 + }, + { + "epoch": 1.3186173650204358, + "grad_norm": 2.875922966438793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271890 + }, + { + "epoch": 1.318665863213272, + "grad_norm": 3.024571526566433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271900 + }, + { + "epoch": 1.318714361406108, + "grad_norm": 2.697025358600058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271910 + }, + { + "epoch": 1.3187628595989442, + "grad_norm": 2.738980420247117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271920 + }, + { + "epoch": 1.3188113577917804, + "grad_norm": 3.424735695034542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271930 + }, + { + "epoch": 1.3188598559846163, + "grad_norm": 2.981604652063652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271940 + }, + { + "epoch": 1.3189083541774524, + "grad_norm": 2.8403057683590305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271950 + }, + { + "epoch": 1.3189568523702886, + "grad_norm": 2.7080561793013658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271960 + }, + { + "epoch": 1.3190053505631245, + "grad_norm": 2.7691829274090196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271970 + }, + { + "epoch": 1.3190538487559607, + "grad_norm": 2.755921890695845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271980 + }, + { + "epoch": 1.3191023469487968, + "grad_norm": 2.7854943240868124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 271990 + }, + { + "epoch": 1.319150845141633, + "grad_norm": 4.441775658392544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272000 + }, + { + "epoch": 1.319199343334469, + "grad_norm": 2.5431758032823382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272010 + }, + { + "epoch": 1.319247841527305, + "grad_norm": 2.757639805395229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272020 + }, + { + "epoch": 1.3192963397201412, + "grad_norm": 2.6201465885833386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272030 + }, + { + "epoch": 1.3193448379129773, + "grad_norm": 3.1101905051400536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272040 + }, + { + "epoch": 1.3193933361058132, + "grad_norm": 2.8202444823932638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272050 + }, + { + "epoch": 1.3194418342986496, + "grad_norm": 2.9600288442566125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272060 + }, + { + "epoch": 1.3194903324914855, + "grad_norm": 2.5443915419032237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272070 + }, + { + "epoch": 1.3195388306843217, + "grad_norm": 2.6271491648799383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272080 + }, + { + "epoch": 1.3195873288771578, + "grad_norm": 2.9645303101233367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272090 + }, + { + "epoch": 1.3196358270699937, + "grad_norm": 2.947557575794235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272100 + }, + { + "epoch": 1.3196843252628299, + "grad_norm": 2.6107340289627246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272110 + }, + { + "epoch": 1.319732823455666, + "grad_norm": 2.555076861199268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272120 + }, + { + "epoch": 1.3197813216485021, + "grad_norm": 2.7806283497966433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272130 + }, + { + "epoch": 1.3198298198413383, + "grad_norm": 2.8087434600365668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272140 + }, + { + "epoch": 1.3198783180341742, + "grad_norm": 2.646834929009856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272150 + }, + { + "epoch": 1.3199268162270104, + "grad_norm": 2.573363744318158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272160 + }, + { + "epoch": 1.3199753144198465, + "grad_norm": 2.994364223241064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272170 + }, + { + "epoch": 1.3200238126126824, + "grad_norm": 2.4674978504890532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272180 + }, + { + "epoch": 1.3200723108055186, + "grad_norm": 2.6859503066134494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272190 + }, + { + "epoch": 1.3201208089983547, + "grad_norm": 2.7480005826419074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272200 + }, + { + "epoch": 1.3201693071911909, + "grad_norm": 2.5308480644525844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272210 + }, + { + "epoch": 1.320217805384027, + "grad_norm": 2.6821291498890787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272220 + }, + { + "epoch": 1.320266303576863, + "grad_norm": 2.5068221276569602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272230 + }, + { + "epoch": 1.320314801769699, + "grad_norm": 3.799910786028704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272240 + }, + { + "epoch": 1.3203632999625352, + "grad_norm": 2.5060190367298674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272250 + }, + { + "epoch": 1.3204117981553711, + "grad_norm": 2.7303418193014295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272260 + }, + { + "epoch": 1.3204602963482073, + "grad_norm": 2.6635428085342028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272270 + }, + { + "epoch": 1.3205087945410434, + "grad_norm": 2.8989058264983214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272280 + }, + { + "epoch": 1.3205572927338796, + "grad_norm": 2.6997911461990043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272290 + }, + { + "epoch": 1.3206057909267157, + "grad_norm": 2.8253007045009326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272300 + }, + { + "epoch": 1.3206542891195516, + "grad_norm": 2.8099774951328982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272310 + }, + { + "epoch": 1.3207027873123878, + "grad_norm": 2.3685780448090554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272320 + }, + { + "epoch": 1.320751285505224, + "grad_norm": 2.455396241884955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272330 + }, + { + "epoch": 1.3207997836980598, + "grad_norm": 2.7639552868663486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272340 + }, + { + "epoch": 1.320848281890896, + "grad_norm": 3.364124268045998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272350 + }, + { + "epoch": 1.3208967800837321, + "grad_norm": 2.501417561973085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272360 + }, + { + "epoch": 1.3209452782765683, + "grad_norm": 2.618353711625332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272370 + }, + { + "epoch": 1.3209937764694044, + "grad_norm": 3.867322106998472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272380 + }, + { + "epoch": 1.3210422746622403, + "grad_norm": 2.6257593432887916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272390 + }, + { + "epoch": 1.3210907728550765, + "grad_norm": 2.7264174917718265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272400 + }, + { + "epoch": 1.3211392710479126, + "grad_norm": 2.4167196244206934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272410 + }, + { + "epoch": 1.3211877692407485, + "grad_norm": 2.4288681288453517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272420 + }, + { + "epoch": 1.3212362674335847, + "grad_norm": 2.404206789208274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272430 + }, + { + "epoch": 1.3212847656264208, + "grad_norm": 2.8586484290826775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272440 + }, + { + "epoch": 1.321333263819257, + "grad_norm": 2.4625604666539402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272450 + }, + { + "epoch": 1.3213817620120931, + "grad_norm": 2.6546754128276007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272460 + }, + { + "epoch": 1.321430260204929, + "grad_norm": 2.52051801652442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272470 + }, + { + "epoch": 1.3214787583977652, + "grad_norm": 2.2486620565587145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272480 + }, + { + "epoch": 1.3215272565906013, + "grad_norm": 2.632887685649621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272490 + }, + { + "epoch": 1.3215757547834373, + "grad_norm": 2.404992294202657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272500 + }, + { + "epoch": 1.3216242529762734, + "grad_norm": 2.6082489057444036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272510 + }, + { + "epoch": 1.3216727511691095, + "grad_norm": 2.4155136557624246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272520 + }, + { + "epoch": 1.3217212493619457, + "grad_norm": 2.3901664647496546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272530 + }, + { + "epoch": 1.3217697475547818, + "grad_norm": 2.7643464406423846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272540 + }, + { + "epoch": 1.3218182457476177, + "grad_norm": 3.271087223311042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272550 + }, + { + "epoch": 1.321866743940454, + "grad_norm": 2.488855876947582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272560 + }, + { + "epoch": 1.32191524213329, + "grad_norm": 2.5157403271691692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272570 + }, + { + "epoch": 1.3219637403261262, + "grad_norm": 2.6370226891003767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272580 + }, + { + "epoch": 1.3220122385189623, + "grad_norm": 2.6318366153077477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272590 + }, + { + "epoch": 1.3220607367117982, + "grad_norm": 2.6525993845893936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272600 + }, + { + "epoch": 1.3221092349046344, + "grad_norm": 2.6117117357671304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272610 + }, + { + "epoch": 1.3221577330974705, + "grad_norm": 2.4138721244071348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272620 + }, + { + "epoch": 1.3222062312903065, + "grad_norm": 2.4469798631798767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272630 + }, + { + "epoch": 1.3222547294831426, + "grad_norm": 2.6412035225575892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272640 + }, + { + "epoch": 1.3223032276759787, + "grad_norm": 2.569006873898161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272650 + }, + { + "epoch": 1.3223517258688149, + "grad_norm": 2.313480251814326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272660 + }, + { + "epoch": 1.322400224061651, + "grad_norm": 2.5077433463138732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272670 + }, + { + "epoch": 1.322448722254487, + "grad_norm": 2.489202977074001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272680 + }, + { + "epoch": 1.322497220447323, + "grad_norm": 2.894052286706028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272690 + }, + { + "epoch": 1.3225457186401592, + "grad_norm": 2.2056212856114144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272700 + }, + { + "epoch": 1.3225942168329952, + "grad_norm": 2.2088133988518166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272710 + }, + { + "epoch": 1.3226427150258313, + "grad_norm": 2.4442588397732834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272720 + }, + { + "epoch": 1.3226912132186674, + "grad_norm": 3.0411921869699654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272730 + }, + { + "epoch": 1.3227397114115036, + "grad_norm": 2.7549926784331547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272740 + }, + { + "epoch": 1.3227882096043397, + "grad_norm": 2.3931439940838573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272750 + }, + { + "epoch": 1.3228367077971757, + "grad_norm": 2.465614379332237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272760 + }, + { + "epoch": 1.3228852059900118, + "grad_norm": 2.5395285874196816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272770 + }, + { + "epoch": 1.322933704182848, + "grad_norm": 2.6993795643193153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272780 + }, + { + "epoch": 1.3229822023756839, + "grad_norm": 2.6785059503708908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272790 + }, + { + "epoch": 1.32303070056852, + "grad_norm": 2.699919221527125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272800 + }, + { + "epoch": 1.3230791987613562, + "grad_norm": 2.696598677687234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272810 + }, + { + "epoch": 1.3231276969541923, + "grad_norm": 2.4267210463335687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272820 + }, + { + "epoch": 1.3231761951470284, + "grad_norm": 2.4192626568719788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272830 + }, + { + "epoch": 1.3232246933398644, + "grad_norm": 2.6937328811982297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272840 + }, + { + "epoch": 1.3232731915327005, + "grad_norm": 2.1168080621691843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272850 + }, + { + "epoch": 1.3233216897255367, + "grad_norm": 2.243877794683158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272860 + }, + { + "epoch": 1.3233701879183726, + "grad_norm": 2.8546143227003995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272870 + }, + { + "epoch": 1.3234186861112087, + "grad_norm": 2.156704503875062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272880 + }, + { + "epoch": 1.3234671843040449, + "grad_norm": 2.6299574074073462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272890 + }, + { + "epoch": 1.323515682496881, + "grad_norm": 2.1535484506784996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272900 + }, + { + "epoch": 1.3235641806897172, + "grad_norm": 2.3020161776798886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272910 + }, + { + "epoch": 1.323612678882553, + "grad_norm": 2.3075312327591746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272920 + }, + { + "epoch": 1.3236611770753892, + "grad_norm": 2.3638737189912717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272930 + }, + { + "epoch": 1.3237096752682254, + "grad_norm": 2.4724997160774365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272940 + }, + { + "epoch": 1.3237581734610613, + "grad_norm": 2.1708602915282427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272950 + }, + { + "epoch": 1.3238066716538974, + "grad_norm": 2.362279261092226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272960 + }, + { + "epoch": 1.3238551698467336, + "grad_norm": 2.0925551069694848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272970 + }, + { + "epoch": 1.3239036680395697, + "grad_norm": 3.864677466935973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272980 + }, + { + "epoch": 1.3239521662324059, + "grad_norm": 2.5978089013278804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 272990 + }, + { + "epoch": 1.3240006644252418, + "grad_norm": 2.3698097706414956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273000 + }, + { + "epoch": 1.324049162618078, + "grad_norm": 2.2938985821951974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273010 + }, + { + "epoch": 1.324097660810914, + "grad_norm": 2.2109498232225633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273020 + }, + { + "epoch": 1.32414615900375, + "grad_norm": 2.116083130943025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273030 + }, + { + "epoch": 1.3241946571965861, + "grad_norm": 2.38316051337506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273040 + }, + { + "epoch": 1.3242431553894223, + "grad_norm": 2.3503661239487883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273050 + }, + { + "epoch": 1.3242916535822584, + "grad_norm": 3.121598268762682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273060 + }, + { + "epoch": 1.3243401517750946, + "grad_norm": 2.4870697501455652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273070 + }, + { + "epoch": 1.3243886499679305, + "grad_norm": 2.7037476257874005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273080 + }, + { + "epoch": 1.3244371481607666, + "grad_norm": 2.6523453655613594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273090 + }, + { + "epoch": 1.3244856463536028, + "grad_norm": 2.5981131912544697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273100 + }, + { + "epoch": 1.324534144546439, + "grad_norm": 2.0772304765159788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273110 + }, + { + "epoch": 1.324582642739275, + "grad_norm": 2.2624368156698438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273120 + }, + { + "epoch": 1.324631140932111, + "grad_norm": 2.043073976665255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273130 + }, + { + "epoch": 1.3246796391249471, + "grad_norm": 2.3919641378711276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273140 + }, + { + "epoch": 1.3247281373177833, + "grad_norm": 2.365182361074858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273150 + }, + { + "epoch": 1.3247766355106192, + "grad_norm": 2.8365876758584818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273160 + }, + { + "epoch": 1.3248251337034553, + "grad_norm": 2.488939010447666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273170 + }, + { + "epoch": 1.3248736318962915, + "grad_norm": 2.1516969539447928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273180 + }, + { + "epoch": 1.3249221300891276, + "grad_norm": 2.398176413009878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273190 + }, + { + "epoch": 1.3249706282819638, + "grad_norm": 2.393476883355561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273200 + }, + { + "epoch": 1.3250191264747997, + "grad_norm": 2.238755847372431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273210 + }, + { + "epoch": 1.3250676246676358, + "grad_norm": 2.5633932310142882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273220 + }, + { + "epoch": 1.325116122860472, + "grad_norm": 2.2692354662012804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273230 + }, + { + "epoch": 1.325164621053308, + "grad_norm": 2.336603444064167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273240 + }, + { + "epoch": 1.325213119246144, + "grad_norm": 2.5220630917033304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273250 + }, + { + "epoch": 1.3252616174389802, + "grad_norm": 2.1144197503986106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273260 + }, + { + "epoch": 1.3253101156318163, + "grad_norm": 2.646381247473073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273270 + }, + { + "epoch": 1.3253586138246525, + "grad_norm": 2.4951873456302565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273280 + }, + { + "epoch": 1.3254071120174884, + "grad_norm": 2.8487418646250262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273290 + }, + { + "epoch": 1.3254556102103245, + "grad_norm": 2.142218313849753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273300 + }, + { + "epoch": 1.3255041084031607, + "grad_norm": 2.144114397140129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273310 + }, + { + "epoch": 1.3255526065959966, + "grad_norm": 2.3262924031541843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273320 + }, + { + "epoch": 1.3256011047888328, + "grad_norm": 2.7081490827640664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273330 + }, + { + "epoch": 1.325649602981669, + "grad_norm": 2.3256969683416173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273340 + }, + { + "epoch": 1.325698101174505, + "grad_norm": 2.0502968212099404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273350 + }, + { + "epoch": 1.3257465993673412, + "grad_norm": 2.0081111884451275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273360 + }, + { + "epoch": 1.325795097560177, + "grad_norm": 2.5271784664937513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273370 + }, + { + "epoch": 1.3258435957530132, + "grad_norm": 2.4774692519713426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273380 + }, + { + "epoch": 1.3258920939458494, + "grad_norm": 2.2492098850079856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273390 + }, + { + "epoch": 1.3259405921386853, + "grad_norm": 2.0400570122092176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273400 + }, + { + "epoch": 1.3259890903315215, + "grad_norm": 1.990652265249082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273410 + }, + { + "epoch": 1.3260375885243576, + "grad_norm": 2.3359776335496463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273420 + }, + { + "epoch": 1.3260860867171937, + "grad_norm": 2.233620755021093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273430 + }, + { + "epoch": 1.32613458491003, + "grad_norm": 2.3119334002785763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273440 + }, + { + "epoch": 1.3261830831028658, + "grad_norm": 2.1605675470937058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273450 + }, + { + "epoch": 1.326231581295702, + "grad_norm": 2.8419202990903614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273460 + }, + { + "epoch": 1.326280079488538, + "grad_norm": 2.1958916462949674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273470 + }, + { + "epoch": 1.326328577681374, + "grad_norm": 2.122664177761635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273480 + }, + { + "epoch": 1.3263770758742102, + "grad_norm": 2.3959307426935084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273490 + }, + { + "epoch": 1.3264255740670463, + "grad_norm": 1.9951022167674637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273500 + }, + { + "epoch": 1.3264740722598825, + "grad_norm": 2.095404028068515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273510 + }, + { + "epoch": 1.3265225704527186, + "grad_norm": 2.8225901615996918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273520 + }, + { + "epoch": 1.3265710686455545, + "grad_norm": 2.243648822286559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273530 + }, + { + "epoch": 1.3266195668383907, + "grad_norm": 2.4855905778053966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273540 + }, + { + "epoch": 1.3266680650312268, + "grad_norm": 3.448199237254812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273550 + }, + { + "epoch": 1.3267165632240627, + "grad_norm": 2.1159564767003758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273560 + }, + { + "epoch": 1.3267650614168989, + "grad_norm": 2.566346779531159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273570 + }, + { + "epoch": 1.326813559609735, + "grad_norm": 3.247929214467149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273580 + }, + { + "epoch": 1.3268620578025712, + "grad_norm": 2.024361123176277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273590 + }, + { + "epoch": 1.3269105559954073, + "grad_norm": 2.5376252210662642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273600 + }, + { + "epoch": 1.3269590541882432, + "grad_norm": 2.144966515515989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273610 + }, + { + "epoch": 1.3270075523810794, + "grad_norm": 2.31179200227416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273620 + }, + { + "epoch": 1.3270560505739155, + "grad_norm": 1.998616561138533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273630 + }, + { + "epoch": 1.3271045487667517, + "grad_norm": 2.4816154464701867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273640 + }, + { + "epoch": 1.3271530469595878, + "grad_norm": 2.0126121214048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273650 + }, + { + "epoch": 1.3272015451524237, + "grad_norm": 1.9049862132192175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273660 + }, + { + "epoch": 1.3272500433452599, + "grad_norm": 9.466622685749826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273670 + }, + { + "epoch": 1.327298541538096, + "grad_norm": 2.0169695247318487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273680 + }, + { + "epoch": 1.327347039730932, + "grad_norm": 2.245326058414321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273690 + }, + { + "epoch": 1.327395537923768, + "grad_norm": 2.0376885956352453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273700 + }, + { + "epoch": 1.3274440361166042, + "grad_norm": 2.1516880721605958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273710 + }, + { + "epoch": 1.3274925343094404, + "grad_norm": 2.3092443512950922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273720 + }, + { + "epoch": 1.3275410325022765, + "grad_norm": 2.0412620926890668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273730 + }, + { + "epoch": 1.3275895306951124, + "grad_norm": 2.118840214393458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273740 + }, + { + "epoch": 1.3276380288879486, + "grad_norm": 2.173424107354549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273750 + }, + { + "epoch": 1.3276865270807847, + "grad_norm": 2.4279913191094238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273760 + }, + { + "epoch": 1.3277350252736206, + "grad_norm": 2.0786677268347376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273770 + }, + { + "epoch": 1.3277835234664568, + "grad_norm": 1.855080711266055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273780 + }, + { + "epoch": 1.327832021659293, + "grad_norm": 2.2981513581044055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273790 + }, + { + "epoch": 1.327880519852129, + "grad_norm": 2.1260596838601487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273800 + }, + { + "epoch": 1.3279290180449652, + "grad_norm": 1.8689604530663928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273810 + }, + { + "epoch": 1.3279775162378011, + "grad_norm": 2.110393282350742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273820 + }, + { + "epoch": 1.3280260144306373, + "grad_norm": 1.8795121903281142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273830 + }, + { + "epoch": 1.3280745126234734, + "grad_norm": 2.7389516432663186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273840 + }, + { + "epoch": 1.3281230108163093, + "grad_norm": 2.2604922378377523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273850 + }, + { + "epoch": 1.3281715090091455, + "grad_norm": 1.8012118019328227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273860 + }, + { + "epoch": 1.3282200072019816, + "grad_norm": 2.0906897546524306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273870 + }, + { + "epoch": 1.3282685053948178, + "grad_norm": 2.0283556168010364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273880 + }, + { + "epoch": 1.328317003587654, + "grad_norm": 2.0731944161411775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273890 + }, + { + "epoch": 1.3283655017804898, + "grad_norm": 2.2823028800189604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273900 + }, + { + "epoch": 1.328413999973326, + "grad_norm": 2.407764476686225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273910 + }, + { + "epoch": 1.3284624981661621, + "grad_norm": 2.2853569703329413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273920 + }, + { + "epoch": 1.328510996358998, + "grad_norm": 1.7591307965858505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273930 + }, + { + "epoch": 1.3285594945518342, + "grad_norm": 2.0441362380552164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273940 + }, + { + "epoch": 1.3286079927446703, + "grad_norm": 1.9800991069018892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273950 + }, + { + "epoch": 1.3286564909375065, + "grad_norm": 2.163324808179823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273960 + }, + { + "epoch": 1.3287049891303426, + "grad_norm": 3.041768437128667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273970 + }, + { + "epoch": 1.3287534873231786, + "grad_norm": 2.042778035615811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273980 + }, + { + "epoch": 1.3288019855160147, + "grad_norm": 2.262409815045885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 273990 + }, + { + "epoch": 1.3288504837088508, + "grad_norm": 2.5164924366549712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274000 + }, + { + "epoch": 1.3288989819016868, + "grad_norm": 2.2608446670346893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274010 + }, + { + "epoch": 1.328947480094523, + "grad_norm": 2.4449612112675823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274020 + }, + { + "epoch": 1.328995978287359, + "grad_norm": 1.909054425652812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274030 + }, + { + "epoch": 1.3290444764801952, + "grad_norm": 2.361316653320955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274040 + }, + { + "epoch": 1.3290929746730313, + "grad_norm": 2.0300108261039895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274050 + }, + { + "epoch": 1.3291414728658673, + "grad_norm": 2.1762032176297907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274060 + }, + { + "epoch": 1.3291899710587034, + "grad_norm": 2.3314802533036527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274070 + }, + { + "epoch": 1.3292384692515395, + "grad_norm": 1.9608792811709463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274080 + }, + { + "epoch": 1.3292869674443755, + "grad_norm": 2.306277480101926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274090 + }, + { + "epoch": 1.3293354656372116, + "grad_norm": 2.142357402590278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274100 + }, + { + "epoch": 1.3293839638300478, + "grad_norm": 1.9120520278193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274110 + }, + { + "epoch": 1.329432462022884, + "grad_norm": 1.947613803565673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274120 + }, + { + "epoch": 1.32948096021572, + "grad_norm": 2.0794312050043118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274130 + }, + { + "epoch": 1.329529458408556, + "grad_norm": 2.8380158667573596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274140 + }, + { + "epoch": 1.329577956601392, + "grad_norm": 2.3889933586929146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274150 + }, + { + "epoch": 1.3296264547942283, + "grad_norm": 2.3116008662782406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274160 + }, + { + "epoch": 1.3296749529870644, + "grad_norm": 1.9723882971334206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274170 + }, + { + "epoch": 1.3297234511799005, + "grad_norm": 2.603041160398334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274180 + }, + { + "epoch": 1.3297719493727365, + "grad_norm": 2.3455699604824076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274190 + }, + { + "epoch": 1.3298204475655726, + "grad_norm": 1.9522213179357095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274200 + }, + { + "epoch": 1.3298689457584087, + "grad_norm": 2.3156527362289125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274210 + }, + { + "epoch": 1.3299174439512447, + "grad_norm": 2.0223362540150447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274220 + }, + { + "epoch": 1.3299659421440808, + "grad_norm": 1.9226069625233322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274230 + }, + { + "epoch": 1.330014440336917, + "grad_norm": 2.483655059393186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274240 + }, + { + "epoch": 1.330062938529753, + "grad_norm": 1.896184897987041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274250 + }, + { + "epoch": 1.3301114367225892, + "grad_norm": 2.1505080383121822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274260 + }, + { + "epoch": 1.3301599349154252, + "grad_norm": 3.990205854620399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274270 + }, + { + "epoch": 1.3302084331082613, + "grad_norm": 1.9807465889698506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274280 + }, + { + "epoch": 1.3302569313010975, + "grad_norm": 2.2803391175330034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274290 + }, + { + "epoch": 1.3303054294939334, + "grad_norm": 1.9204682288886943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274300 + }, + { + "epoch": 1.3303539276867695, + "grad_norm": 2.0540722900364017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274310 + }, + { + "epoch": 1.3304024258796057, + "grad_norm": 2.0501843778220064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274320 + }, + { + "epoch": 1.3304509240724418, + "grad_norm": 1.9483266555653245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274330 + }, + { + "epoch": 1.330499422265278, + "grad_norm": 1.8290350567440328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274340 + }, + { + "epoch": 1.3305479204581139, + "grad_norm": 1.7263074525430966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274350 + }, + { + "epoch": 1.33059641865095, + "grad_norm": 2.1766332736206095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274360 + }, + { + "epoch": 1.3306449168437862, + "grad_norm": 2.5508819945230243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274370 + }, + { + "epoch": 1.330693415036622, + "grad_norm": 1.6521074286401927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274380 + }, + { + "epoch": 1.3307419132294582, + "grad_norm": 2.2962943546644965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274390 + }, + { + "epoch": 1.3307904114222944, + "grad_norm": 2.3863551135150374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274400 + }, + { + "epoch": 1.3308389096151305, + "grad_norm": 1.9989320421132106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274410 + }, + { + "epoch": 1.3308874078079667, + "grad_norm": 2.149207922741425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274420 + }, + { + "epoch": 1.3309359060008026, + "grad_norm": 2.3785945657550656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274430 + }, + { + "epoch": 1.3309844041936387, + "grad_norm": 2.6747832393425597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274440 + }, + { + "epoch": 1.3310329023864749, + "grad_norm": 2.6664009666887978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274450 + }, + { + "epoch": 1.3310814005793108, + "grad_norm": 2.149942446294517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274460 + }, + { + "epoch": 1.331129898772147, + "grad_norm": 1.9649482041472766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274470 + }, + { + "epoch": 1.331178396964983, + "grad_norm": 1.908213320689356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274480 + }, + { + "epoch": 1.3312268951578192, + "grad_norm": 2.1154535900791416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274490 + }, + { + "epoch": 1.3312753933506554, + "grad_norm": 2.155072742482389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274500 + }, + { + "epoch": 1.3313238915434913, + "grad_norm": 1.7601857749127703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274510 + }, + { + "epoch": 1.3313723897363274, + "grad_norm": 1.754985490265426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274520 + }, + { + "epoch": 1.3314208879291636, + "grad_norm": 1.9120863115063003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274530 + }, + { + "epoch": 1.3314693861219995, + "grad_norm": 1.818253636542977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274540 + }, + { + "epoch": 1.3315178843148356, + "grad_norm": 2.120330044874663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274550 + }, + { + "epoch": 1.3315663825076718, + "grad_norm": 2.0059337302313907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274560 + }, + { + "epoch": 1.331614880700508, + "grad_norm": 2.0738641026696314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274570 + }, + { + "epoch": 1.331663378893344, + "grad_norm": 2.0537029854494904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274580 + }, + { + "epoch": 1.33171187708618, + "grad_norm": 1.9660797434539745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274590 + }, + { + "epoch": 1.3317603752790161, + "grad_norm": 1.7241742256146608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274600 + }, + { + "epoch": 1.3318088734718523, + "grad_norm": 1.8712277949362033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274610 + }, + { + "epoch": 1.3318573716646884, + "grad_norm": 2.205707794189493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274620 + }, + { + "epoch": 1.3319058698575246, + "grad_norm": 1.7402481233830258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274630 + }, + { + "epoch": 1.3319543680503605, + "grad_norm": 2.2199202476258506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274640 + }, + { + "epoch": 1.3320028662431966, + "grad_norm": 2.4701812151306513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274650 + }, + { + "epoch": 1.3320513644360328, + "grad_norm": 1.9633667136531585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274660 + }, + { + "epoch": 1.3320998626288687, + "grad_norm": 1.814209937833766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274670 + }, + { + "epoch": 1.3321483608217048, + "grad_norm": 2.1834495100847562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274680 + }, + { + "epoch": 1.332196859014541, + "grad_norm": 2.2064149618472584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274690 + }, + { + "epoch": 1.3322453572073771, + "grad_norm": 2.1428800067724296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274700 + }, + { + "epoch": 1.3322938554002133, + "grad_norm": 1.992616205370723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274710 + }, + { + "epoch": 1.3323423535930492, + "grad_norm": 1.6586893636372224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274720 + }, + { + "epoch": 1.3323908517858853, + "grad_norm": 1.9949956353570997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274730 + }, + { + "epoch": 1.3324393499787215, + "grad_norm": 2.233750606706053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274740 + }, + { + "epoch": 1.3324878481715574, + "grad_norm": 2.4573608925493318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274750 + }, + { + "epoch": 1.3325363463643936, + "grad_norm": 1.9084279045955554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274760 + }, + { + "epoch": 1.3325848445572297, + "grad_norm": 1.9063714162825818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274770 + }, + { + "epoch": 1.3326333427500658, + "grad_norm": 1.8979584126554983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274780 + }, + { + "epoch": 1.332681840942902, + "grad_norm": 2.1521216808650934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274790 + }, + { + "epoch": 1.332730339135738, + "grad_norm": 1.974241037316915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274800 + }, + { + "epoch": 1.332778837328574, + "grad_norm": 1.794339965499603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274810 + }, + { + "epoch": 1.3328273355214102, + "grad_norm": 1.864096788040115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274820 + }, + { + "epoch": 1.3328758337142461, + "grad_norm": 1.7874734581369012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274830 + }, + { + "epoch": 1.3329243319070823, + "grad_norm": 1.840911068029527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274840 + }, + { + "epoch": 1.3329728300999184, + "grad_norm": 2.383462494037758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274850 + }, + { + "epoch": 1.3330213282927545, + "grad_norm": 1.7333361412852355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274860 + }, + { + "epoch": 1.3330698264855907, + "grad_norm": 1.2901188028990873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274870 + }, + { + "epoch": 1.3331183246784266, + "grad_norm": 1.559167550624352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274880 + }, + { + "epoch": 1.3331668228712628, + "grad_norm": 1.8148996971945053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274890 + }, + { + "epoch": 1.333215321064099, + "grad_norm": 1.7067604218823362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274900 + }, + { + "epoch": 1.3332638192569348, + "grad_norm": 1.8216676167526202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274910 + }, + { + "epoch": 1.333312317449771, + "grad_norm": 2.0287053814627143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274920 + }, + { + "epoch": 1.3333608156426071, + "grad_norm": 2.1604027011790095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274930 + }, + { + "epoch": 1.3334093138354433, + "grad_norm": 1.9913596105425313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274940 + }, + { + "epoch": 1.3334578120282794, + "grad_norm": 1.7040381550259553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274950 + }, + { + "epoch": 1.3335063102211153, + "grad_norm": 1.8721634020835154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274960 + }, + { + "epoch": 1.3335548084139515, + "grad_norm": 2.0690661628464113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274970 + }, + { + "epoch": 1.3336033066067876, + "grad_norm": 1.894674461766499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274980 + }, + { + "epoch": 1.3336518047996235, + "grad_norm": 1.8035880344768884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 274990 + }, + { + "epoch": 1.3337003029924597, + "grad_norm": 1.900017743139415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275000 + }, + { + "epoch": 1.3337488011852958, + "grad_norm": 1.9981037269189983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275010 + }, + { + "epoch": 1.333797299378132, + "grad_norm": 1.959011441954317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275020 + }, + { + "epoch": 1.333845797570968, + "grad_norm": 1.7269416119347625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275030 + }, + { + "epoch": 1.333894295763804, + "grad_norm": 2.101200458071162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275040 + }, + { + "epoch": 1.3339427939566402, + "grad_norm": 2.2657882681187402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275050 + }, + { + "epoch": 1.3339912921494763, + "grad_norm": 1.7902623383747596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275060 + }, + { + "epoch": 1.3340397903423122, + "grad_norm": 1.8779408250679808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275070 + }, + { + "epoch": 1.3340882885351484, + "grad_norm": 2.2798481325025932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275080 + }, + { + "epoch": 1.3341367867279845, + "grad_norm": 2.3360060552590767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275090 + }, + { + "epoch": 1.3341852849208207, + "grad_norm": 1.7037175226164436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275100 + }, + { + "epoch": 1.3342337831136568, + "grad_norm": 2.0759546970339215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275110 + }, + { + "epoch": 1.3342822813064927, + "grad_norm": 1.0518555626504167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275120 + }, + { + "epoch": 1.3343307794993289, + "grad_norm": 1.821635287058143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275130 + }, + { + "epoch": 1.334379277692165, + "grad_norm": 1.8771840970543963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275140 + }, + { + "epoch": 1.3344277758850012, + "grad_norm": 1.8753759434275707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275150 + }, + { + "epoch": 1.3344762740778373, + "grad_norm": 1.9869700551566893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275160 + }, + { + "epoch": 1.3345247722706732, + "grad_norm": 1.7177926636691154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275170 + }, + { + "epoch": 1.3345732704635094, + "grad_norm": 1.8547778424249373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275180 + }, + { + "epoch": 1.3346217686563455, + "grad_norm": 1.999466903157554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275190 + }, + { + "epoch": 1.3346702668491814, + "grad_norm": 2.003521792914853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275200 + }, + { + "epoch": 1.3347187650420176, + "grad_norm": 1.9817028018564997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275210 + }, + { + "epoch": 1.3347672632348537, + "grad_norm": 2.1282451356796628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275220 + }, + { + "epoch": 1.3348157614276899, + "grad_norm": 1.851138620168058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275230 + }, + { + "epoch": 1.334864259620526, + "grad_norm": 1.8448274019533528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275240 + }, + { + "epoch": 1.334912757813362, + "grad_norm": 1.66098317322394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275250 + }, + { + "epoch": 1.334961256006198, + "grad_norm": 2.1228055757660513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275260 + }, + { + "epoch": 1.3350097541990342, + "grad_norm": 1.7807010976866877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275270 + }, + { + "epoch": 1.3350582523918701, + "grad_norm": 2.1636424207827076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275280 + }, + { + "epoch": 1.3351067505847063, + "grad_norm": 1.7245714190039507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275290 + }, + { + "epoch": 1.3351552487775424, + "grad_norm": 1.691699935690849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275300 + }, + { + "epoch": 1.3352037469703786, + "grad_norm": 1.9856159383380145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275310 + }, + { + "epoch": 1.3352522451632147, + "grad_norm": 1.9041605625602642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275320 + }, + { + "epoch": 1.3353007433560506, + "grad_norm": 2.0982506399036538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275330 + }, + { + "epoch": 1.3353492415488868, + "grad_norm": 1.9710565624109222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275340 + }, + { + "epoch": 1.335397739741723, + "grad_norm": 1.7468220647742783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275350 + }, + { + "epoch": 1.3354462379345589, + "grad_norm": 1.713342001607998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275360 + }, + { + "epoch": 1.335494736127395, + "grad_norm": 1.77952763635858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275370 + }, + { + "epoch": 1.3355432343202311, + "grad_norm": 1.695790885491988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275380 + }, + { + "epoch": 1.3355917325130673, + "grad_norm": 2.4358232764143395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275390 + }, + { + "epoch": 1.3356402307059034, + "grad_norm": 2.083455896695341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275400 + }, + { + "epoch": 1.3356887288987394, + "grad_norm": 2.37202950614801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275410 + }, + { + "epoch": 1.3357372270915755, + "grad_norm": 1.7972643817643075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275420 + }, + { + "epoch": 1.3357857252844116, + "grad_norm": 1.5446650181161203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275430 + }, + { + "epoch": 1.3358342234772476, + "grad_norm": 1.5849566992187647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275440 + }, + { + "epoch": 1.3358827216700837, + "grad_norm": 1.6286323401004665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275450 + }, + { + "epoch": 1.3359312198629198, + "grad_norm": 2.6756815429962444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275460 + }, + { + "epoch": 1.335979718055756, + "grad_norm": 1.5664809893678466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275470 + }, + { + "epoch": 1.3360282162485921, + "grad_norm": 2.365111306801282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275480 + }, + { + "epoch": 1.336076714441428, + "grad_norm": 1.6205994768370147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275490 + }, + { + "epoch": 1.3361252126342642, + "grad_norm": 1.7938997842747995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275500 + }, + { + "epoch": 1.3361737108271003, + "grad_norm": 1.9714576637852588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275510 + }, + { + "epoch": 1.3362222090199363, + "grad_norm": 1.824866657784696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275520 + }, + { + "epoch": 1.3362707072127724, + "grad_norm": 1.9815709961790162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275530 + }, + { + "epoch": 1.3363192054056086, + "grad_norm": 2.234918028420907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275540 + }, + { + "epoch": 1.3363677035984447, + "grad_norm": 1.6385447665356878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275550 + }, + { + "epoch": 1.3364162017912808, + "grad_norm": 2.0682181300912816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275560 + }, + { + "epoch": 1.3364646999841168, + "grad_norm": 1.9506165571669953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275570 + }, + { + "epoch": 1.336513198176953, + "grad_norm": 1.6977933725570438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275580 + }, + { + "epoch": 1.336561696369789, + "grad_norm": 2.240370200468078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275590 + }, + { + "epoch": 1.336610194562625, + "grad_norm": 2.152964384549705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275600 + }, + { + "epoch": 1.3366586927554611, + "grad_norm": 1.732789733921436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275610 + }, + { + "epoch": 1.3367071909482973, + "grad_norm": 2.0043918524947912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275620 + }, + { + "epoch": 1.3367556891411334, + "grad_norm": 2.2207402139429178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275630 + }, + { + "epoch": 1.3368041873339696, + "grad_norm": 1.622932721545567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275640 + }, + { + "epoch": 1.3368526855268055, + "grad_norm": 1.649589265184659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275650 + }, + { + "epoch": 1.3369011837196416, + "grad_norm": 2.7618701992082606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275660 + }, + { + "epoch": 1.3369496819124778, + "grad_norm": 1.5114187235099052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275670 + }, + { + "epoch": 1.336998180105314, + "grad_norm": 1.635158852764107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275680 + }, + { + "epoch": 1.33704667829815, + "grad_norm": 3.1082741713817086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275690 + }, + { + "epoch": 1.337095176490986, + "grad_norm": 1.995474185889634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275700 + }, + { + "epoch": 1.3371436746838221, + "grad_norm": 1.6697843108204324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275710 + }, + { + "epoch": 1.3371921728766583, + "grad_norm": 1.5811522757758212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275720 + }, + { + "epoch": 1.3372406710694942, + "grad_norm": 1.5745944992318073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275730 + }, + { + "epoch": 1.3372891692623303, + "grad_norm": 2.1122783522287136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275740 + }, + { + "epoch": 1.3373376674551665, + "grad_norm": 1.6827694793164483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275750 + }, + { + "epoch": 1.3373861656480026, + "grad_norm": 1.5187405111305452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275760 + }, + { + "epoch": 1.3374346638408388, + "grad_norm": 1.4702181694303817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275770 + }, + { + "epoch": 1.3374831620336747, + "grad_norm": 2.1521946891311927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275780 + }, + { + "epoch": 1.3375316602265108, + "grad_norm": 1.8122003453413527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275790 + }, + { + "epoch": 1.337580158419347, + "grad_norm": 1.8769311438404657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275800 + }, + { + "epoch": 1.3376286566121829, + "grad_norm": 1.6448224116061283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275810 + }, + { + "epoch": 1.337677154805019, + "grad_norm": 2.7595577378747294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275820 + }, + { + "epoch": 1.3377256529978552, + "grad_norm": 1.519268089111847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275830 + }, + { + "epoch": 1.3377741511906913, + "grad_norm": 1.4909989687339475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275840 + }, + { + "epoch": 1.3378226493835275, + "grad_norm": 1.6886916753833248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275850 + }, + { + "epoch": 1.3378711475763634, + "grad_norm": 1.909101143837688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275860 + }, + { + "epoch": 1.3379196457691995, + "grad_norm": 2.7716293260482416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275870 + }, + { + "epoch": 1.3379681439620357, + "grad_norm": 1.7754707926087576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275880 + }, + { + "epoch": 1.3380166421548716, + "grad_norm": 1.6932217405951633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275890 + }, + { + "epoch": 1.3380651403477077, + "grad_norm": 1.6561713778173726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275900 + }, + { + "epoch": 1.3381136385405439, + "grad_norm": 1.975121044495154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275910 + }, + { + "epoch": 1.33816213673338, + "grad_norm": 1.693267392965936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275920 + }, + { + "epoch": 1.3382106349262162, + "grad_norm": 1.8530482037704132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275930 + }, + { + "epoch": 1.338259133119052, + "grad_norm": 1.789909020999403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275940 + }, + { + "epoch": 1.3383076313118882, + "grad_norm": 2.3182613162475718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275950 + }, + { + "epoch": 1.3383561295047244, + "grad_norm": 1.8831958215059785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275960 + }, + { + "epoch": 1.3384046276975603, + "grad_norm": 1.5326238056445618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275970 + }, + { + "epoch": 1.3384531258903964, + "grad_norm": 1.610836086740619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275980 + }, + { + "epoch": 1.3385016240832326, + "grad_norm": 2.7684871284350265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 275990 + }, + { + "epoch": 1.3385501222760687, + "grad_norm": 1.9897564484949726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276000 + }, + { + "epoch": 1.3385986204689049, + "grad_norm": 1.656863446442003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276010 + }, + { + "epoch": 1.3386471186617408, + "grad_norm": 1.83590902480546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276020 + }, + { + "epoch": 1.338695616854577, + "grad_norm": 1.7894532078344128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276030 + }, + { + "epoch": 1.338744115047413, + "grad_norm": 1.823066497763648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276040 + }, + { + "epoch": 1.338792613240249, + "grad_norm": 1.7283037223592146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276050 + }, + { + "epoch": 1.3388411114330852, + "grad_norm": 1.836847474123715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276060 + }, + { + "epoch": 1.3388896096259213, + "grad_norm": 1.790063031137379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276070 + }, + { + "epoch": 1.3389381078187574, + "grad_norm": 1.8558999670403864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276080 + }, + { + "epoch": 1.3389866060115936, + "grad_norm": 2.0474390183267133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276090 + }, + { + "epoch": 1.3390351042044295, + "grad_norm": 1.422564377406843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276100 + }, + { + "epoch": 1.3390836023972656, + "grad_norm": 1.7599159463088654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276110 + }, + { + "epoch": 1.3391321005901018, + "grad_norm": 2.1236914449218602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276120 + }, + { + "epoch": 1.3391805987829377, + "grad_norm": 1.7884117298194724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276130 + }, + { + "epoch": 1.3392290969757739, + "grad_norm": 2.033572954474039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276140 + }, + { + "epoch": 1.33927759516861, + "grad_norm": 1.8692075443027534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276150 + }, + { + "epoch": 1.3393260933614461, + "grad_norm": 2.878511473625167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276160 + }, + { + "epoch": 1.3393745915542823, + "grad_norm": 1.7029361032427914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276170 + }, + { + "epoch": 1.3394230897471182, + "grad_norm": 2.001972276843844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276180 + }, + { + "epoch": 1.3394715879399544, + "grad_norm": 1.6131744828840056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276190 + }, + { + "epoch": 1.3395200861327905, + "grad_norm": 1.6360425902917086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276200 + }, + { + "epoch": 1.3395685843256266, + "grad_norm": 1.69452274434434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276210 + }, + { + "epoch": 1.3396170825184628, + "grad_norm": 1.701153884425821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276220 + }, + { + "epoch": 1.3396655807112987, + "grad_norm": 1.4680365367780723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276230 + }, + { + "epoch": 1.3397140789041349, + "grad_norm": 1.7535827012693517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276240 + }, + { + "epoch": 1.339762577096971, + "grad_norm": 1.6377510902998438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276250 + }, + { + "epoch": 1.339811075289807, + "grad_norm": 1.6219821930008038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276260 + }, + { + "epoch": 1.339859573482643, + "grad_norm": 1.6759742038630066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276270 + }, + { + "epoch": 1.3399080716754792, + "grad_norm": 2.348793870510235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276280 + }, + { + "epoch": 1.3399565698683154, + "grad_norm": 1.4739844012012782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276290 + }, + { + "epoch": 1.3400050680611515, + "grad_norm": 1.5643200512727162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276300 + }, + { + "epoch": 1.3400535662539874, + "grad_norm": 2.126678033675944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276310 + }, + { + "epoch": 1.3401020644468236, + "grad_norm": 1.4900522593563892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276320 + }, + { + "epoch": 1.3401505626396597, + "grad_norm": 1.7003088714773185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276330 + }, + { + "epoch": 1.3401990608324956, + "grad_norm": 2.063184822986841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276340 + }, + { + "epoch": 1.3402475590253318, + "grad_norm": 2.0363614794405294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276350 + }, + { + "epoch": 1.340296057218168, + "grad_norm": 1.4679847559762038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276360 + }, + { + "epoch": 1.340344555411004, + "grad_norm": 1.7361237780733063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276370 + }, + { + "epoch": 1.3403930536038402, + "grad_norm": 1.923273806880843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276380 + }, + { + "epoch": 1.3404415517966761, + "grad_norm": 2.438763324619231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276390 + }, + { + "epoch": 1.3404900499895123, + "grad_norm": 1.736992416567773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276400 + }, + { + "epoch": 1.3405385481823484, + "grad_norm": 1.604536947752422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276410 + }, + { + "epoch": 1.3405870463751843, + "grad_norm": 1.6692531801254518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276420 + }, + { + "epoch": 1.3406355445680205, + "grad_norm": 1.5765584393534482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276430 + }, + { + "epoch": 1.3406840427608566, + "grad_norm": 2.1493939073025103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276440 + }, + { + "epoch": 1.3407325409536928, + "grad_norm": 1.544609773418415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276450 + }, + { + "epoch": 1.340781039146529, + "grad_norm": 1.4231735789849154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276460 + }, + { + "epoch": 1.3408295373393648, + "grad_norm": 1.7519752759653784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276470 + }, + { + "epoch": 1.340878035532201, + "grad_norm": 1.6600065322336377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276480 + }, + { + "epoch": 1.3409265337250371, + "grad_norm": 1.7995411383253668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276490 + }, + { + "epoch": 1.340975031917873, + "grad_norm": 1.577912911443491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276500 + }, + { + "epoch": 1.3410235301107092, + "grad_norm": 1.3136968846083619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276510 + }, + { + "epoch": 1.3410720283035453, + "grad_norm": 1.9671068329785157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276520 + }, + { + "epoch": 1.3411205264963815, + "grad_norm": 1.7287923981257336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276530 + }, + { + "epoch": 1.3411690246892176, + "grad_norm": 1.883781131084561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276540 + }, + { + "epoch": 1.3412175228820535, + "grad_norm": 1.6399875235606487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276550 + }, + { + "epoch": 1.3412660210748897, + "grad_norm": 2.2422261380938835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276560 + }, + { + "epoch": 1.3413145192677258, + "grad_norm": 1.591723908234144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276570 + }, + { + "epoch": 1.3413630174605617, + "grad_norm": 2.0425865443485236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276580 + }, + { + "epoch": 1.341411515653398, + "grad_norm": 1.4453592989127628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276590 + }, + { + "epoch": 1.341460013846234, + "grad_norm": 2.1756019208396538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276600 + }, + { + "epoch": 1.3415085120390702, + "grad_norm": 2.1739325006819854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276610 + }, + { + "epoch": 1.3415570102319063, + "grad_norm": 1.7151148057337195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276620 + }, + { + "epoch": 1.3416055084247422, + "grad_norm": 1.2997607434783731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276630 + }, + { + "epoch": 1.3416540066175784, + "grad_norm": 1.7523541728792225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276640 + }, + { + "epoch": 1.3417025048104145, + "grad_norm": 2.389697151272685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276650 + }, + { + "epoch": 1.3417510030032505, + "grad_norm": 1.6616624520793266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276660 + }, + { + "epoch": 1.3417995011960868, + "grad_norm": 1.8188332617796732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276670 + }, + { + "epoch": 1.3418479993889227, + "grad_norm": 1.2567205054381247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276680 + }, + { + "epoch": 1.3418964975817589, + "grad_norm": 1.3663083997528247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276690 + }, + { + "epoch": 1.341944995774595, + "grad_norm": 2.5226228217434254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276700 + }, + { + "epoch": 1.341993493967431, + "grad_norm": 1.780498415371312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276710 + }, + { + "epoch": 1.342041992160267, + "grad_norm": 1.4618933619203744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276720 + }, + { + "epoch": 1.3420904903531032, + "grad_norm": 1.3501542106553188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276730 + }, + { + "epoch": 1.3421389885459394, + "grad_norm": 1.9777340654059117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276740 + }, + { + "epoch": 1.3421874867387755, + "grad_norm": 1.7975812838244565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276750 + }, + { + "epoch": 1.3422359849316114, + "grad_norm": 1.404411609229328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276760 + }, + { + "epoch": 1.3422844831244476, + "grad_norm": 1.7346311054211583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276770 + }, + { + "epoch": 1.3423329813172837, + "grad_norm": 1.9861099431750517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276780 + }, + { + "epoch": 1.3423814795101197, + "grad_norm": 2.094597917334795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276790 + }, + { + "epoch": 1.3424299777029558, + "grad_norm": 2.253429620679981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276800 + }, + { + "epoch": 1.342478475895792, + "grad_norm": 1.6076773690087975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276810 + }, + { + "epoch": 1.342526974088628, + "grad_norm": 1.8902634124629003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276820 + }, + { + "epoch": 1.3425754722814642, + "grad_norm": 1.5540024378424278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276830 + }, + { + "epoch": 1.3426239704743002, + "grad_norm": 1.654866998990201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276840 + }, + { + "epoch": 1.3426724686671363, + "grad_norm": 1.5409112208430997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276850 + }, + { + "epoch": 1.3427209668599724, + "grad_norm": 1.8103177623629563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276860 + }, + { + "epoch": 1.3427694650528084, + "grad_norm": 1.984892072925959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276870 + }, + { + "epoch": 1.3428179632456445, + "grad_norm": 1.9804041073712142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276880 + }, + { + "epoch": 1.3428664614384807, + "grad_norm": 1.6569112304409828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276890 + }, + { + "epoch": 1.3429149596313168, + "grad_norm": 1.8539545010298752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276900 + }, + { + "epoch": 1.342963457824153, + "grad_norm": 2.6310308598453958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276910 + }, + { + "epoch": 1.3430119560169889, + "grad_norm": 1.4708765760929055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276920 + }, + { + "epoch": 1.343060454209825, + "grad_norm": 1.665212501222868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276930 + }, + { + "epoch": 1.3431089524026611, + "grad_norm": 1.4888202670704231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276940 + }, + { + "epoch": 1.343157450595497, + "grad_norm": 1.562303353352945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276950 + }, + { + "epoch": 1.3432059487883332, + "grad_norm": 1.4779249823959617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276960 + }, + { + "epoch": 1.3432544469811694, + "grad_norm": 1.4076199761348107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276970 + }, + { + "epoch": 1.3433029451740055, + "grad_norm": 1.82069062049095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276980 + }, + { + "epoch": 1.3433514433668416, + "grad_norm": 1.948893846304145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 276990 + }, + { + "epoch": 1.3433999415596776, + "grad_norm": 1.3633449036376533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277000 + }, + { + "epoch": 1.3434484397525137, + "grad_norm": 3.077979116028473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277010 + }, + { + "epoch": 1.3434969379453499, + "grad_norm": 1.8234205256817404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277020 + }, + { + "epoch": 1.3435454361381858, + "grad_norm": 1.3635653495214228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277030 + }, + { + "epoch": 1.343593934331022, + "grad_norm": 1.4736047049268564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277040 + }, + { + "epoch": 1.343642432523858, + "grad_norm": 2.209531579921986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277050 + }, + { + "epoch": 1.3436909307166942, + "grad_norm": 1.625790169157426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277060 + }, + { + "epoch": 1.3437394289095304, + "grad_norm": 1.590459142164491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277070 + }, + { + "epoch": 1.3437879271023663, + "grad_norm": 1.5993308011275076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277080 + }, + { + "epoch": 1.3438364252952024, + "grad_norm": 1.8058889494909636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277090 + }, + { + "epoch": 1.3438849234880386, + "grad_norm": 1.7427108645051703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277100 + }, + { + "epoch": 1.3439334216808745, + "grad_norm": 1.70653606801352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277110 + }, + { + "epoch": 1.3439819198737106, + "grad_norm": 1.7058095380662053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277120 + }, + { + "epoch": 1.3440304180665468, + "grad_norm": 1.9314418508997733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277130 + }, + { + "epoch": 1.344078916259383, + "grad_norm": 1.5546582687875343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277140 + }, + { + "epoch": 1.344127414452219, + "grad_norm": 1.6159283688921278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277150 + }, + { + "epoch": 1.344175912645055, + "grad_norm": 1.5118528651214547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277160 + }, + { + "epoch": 1.3442244108378911, + "grad_norm": 1.5552508614291582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277170 + }, + { + "epoch": 1.3442729090307273, + "grad_norm": 1.8532372081381254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277180 + }, + { + "epoch": 1.3443214072235634, + "grad_norm": 2.0624341345865105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277190 + }, + { + "epoch": 1.3443699054163996, + "grad_norm": 1.9099177350767604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277200 + }, + { + "epoch": 1.3444184036092355, + "grad_norm": 1.6275956582489925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277210 + }, + { + "epoch": 1.3444669018020716, + "grad_norm": 1.8950013114249487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277220 + }, + { + "epoch": 1.3445153999949078, + "grad_norm": 2.3994850550934643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277230 + }, + { + "epoch": 1.3445638981877437, + "grad_norm": 2.2162483404031263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277240 + }, + { + "epoch": 1.3446123963805798, + "grad_norm": 1.619532596919271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277250 + }, + { + "epoch": 1.344660894573416, + "grad_norm": 1.6889023513044776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277260 + }, + { + "epoch": 1.3447093927662521, + "grad_norm": 1.5367749739425562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277270 + }, + { + "epoch": 1.3447578909590883, + "grad_norm": 2.0749808982145623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277280 + }, + { + "epoch": 1.3448063891519242, + "grad_norm": 1.7634704363445053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277290 + }, + { + "epoch": 1.3448548873447603, + "grad_norm": 1.4058639585812216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277300 + }, + { + "epoch": 1.3449033855375965, + "grad_norm": 1.553409312293752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277310 + }, + { + "epoch": 1.3449518837304324, + "grad_norm": 1.3904480233861705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277320 + }, + { + "epoch": 1.3450003819232685, + "grad_norm": 1.8023261105781785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277330 + }, + { + "epoch": 1.3450488801161047, + "grad_norm": 2.6554957344160357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277340 + }, + { + "epoch": 1.3450973783089408, + "grad_norm": 1.9754336832988884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277350 + }, + { + "epoch": 1.345145876501777, + "grad_norm": 1.655826409319161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277360 + }, + { + "epoch": 1.345194374694613, + "grad_norm": 1.4807538306627066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277370 + }, + { + "epoch": 1.345242872887449, + "grad_norm": 1.4201226861132454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277380 + }, + { + "epoch": 1.3452913710802852, + "grad_norm": 1.3418580024904259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277390 + }, + { + "epoch": 1.345339869273121, + "grad_norm": 1.4304612605542388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277400 + }, + { + "epoch": 1.3453883674659572, + "grad_norm": 2.4789111208178838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277410 + }, + { + "epoch": 1.3454368656587934, + "grad_norm": 1.8756306729983407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277420 + }, + { + "epoch": 1.3454853638516295, + "grad_norm": 1.667697269169821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277430 + }, + { + "epoch": 1.3455338620444657, + "grad_norm": 2.038619228983407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277440 + }, + { + "epoch": 1.3455823602373016, + "grad_norm": 1.521548576022269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277450 + }, + { + "epoch": 1.3456308584301377, + "grad_norm": 1.6067502883743146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277460 + }, + { + "epoch": 1.3456793566229739, + "grad_norm": 1.8302383608670425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277470 + }, + { + "epoch": 1.3457278548158098, + "grad_norm": 1.543963712435925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277480 + }, + { + "epoch": 1.345776353008646, + "grad_norm": 1.5885172288676586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277490 + }, + { + "epoch": 1.345824851201482, + "grad_norm": 1.5986319823468875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277500 + }, + { + "epoch": 1.3458733493943182, + "grad_norm": 1.487490663976132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277510 + }, + { + "epoch": 1.3459218475871544, + "grad_norm": 1.9203698187197915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277520 + }, + { + "epoch": 1.3459703457799903, + "grad_norm": 1.3575629509432474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277530 + }, + { + "epoch": 1.3460188439728265, + "grad_norm": 1.4760697553128921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277540 + }, + { + "epoch": 1.3460673421656626, + "grad_norm": 1.5522312324378618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277550 + }, + { + "epoch": 1.3461158403584985, + "grad_norm": 1.7084831327451866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277560 + }, + { + "epoch": 1.3461643385513347, + "grad_norm": 1.5492371829850526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277570 + }, + { + "epoch": 1.3462128367441708, + "grad_norm": 1.345790767715016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277580 + }, + { + "epoch": 1.346261334937007, + "grad_norm": 2.051070779884867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277590 + }, + { + "epoch": 1.346309833129843, + "grad_norm": 1.542924188413508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277600 + }, + { + "epoch": 1.346358331322679, + "grad_norm": 2.19908873333452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277610 + }, + { + "epoch": 1.3464068295155152, + "grad_norm": 1.9220962599320046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277620 + }, + { + "epoch": 1.3464553277083513, + "grad_norm": 2.3366258261603434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277630 + }, + { + "epoch": 1.3465038259011872, + "grad_norm": 1.6652636602998427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277640 + }, + { + "epoch": 1.3465523240940234, + "grad_norm": 1.4194379893694986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277650 + }, + { + "epoch": 1.3466008222868595, + "grad_norm": 1.9303328713249357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277660 + }, + { + "epoch": 1.3466493204796957, + "grad_norm": 1.7784072880999702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277670 + }, + { + "epoch": 1.3466978186725318, + "grad_norm": 1.4164776906966381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277680 + }, + { + "epoch": 1.3467463168653677, + "grad_norm": 1.3945874677290249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277690 + }, + { + "epoch": 1.3467948150582039, + "grad_norm": 1.3199809245634242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277700 + }, + { + "epoch": 1.34684331325104, + "grad_norm": 1.4049200913746063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277710 + }, + { + "epoch": 1.3468918114438762, + "grad_norm": 1.3245699648223308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277720 + }, + { + "epoch": 1.3469403096367123, + "grad_norm": 1.4638529499677588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277730 + }, + { + "epoch": 1.3469888078295482, + "grad_norm": 2.2005730571095228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277740 + }, + { + "epoch": 1.3470373060223844, + "grad_norm": 1.6524960955166534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277750 + }, + { + "epoch": 1.3470858042152205, + "grad_norm": 1.6262040603010064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277760 + }, + { + "epoch": 1.3471343024080564, + "grad_norm": 1.971184282467675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277770 + }, + { + "epoch": 1.3471828006008926, + "grad_norm": 1.2624462364385636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277780 + }, + { + "epoch": 1.3472312987937287, + "grad_norm": 1.4270617576528366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277790 + }, + { + "epoch": 1.3472797969865649, + "grad_norm": 1.275178096449281e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277800 + }, + { + "epoch": 1.347328295179401, + "grad_norm": 1.9705785447854396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277810 + }, + { + "epoch": 1.347376793372237, + "grad_norm": 2.2225949081189356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277820 + }, + { + "epoch": 1.347425291565073, + "grad_norm": 1.7804255847408967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277830 + }, + { + "epoch": 1.3474737897579092, + "grad_norm": 1.4965523931209646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277840 + }, + { + "epoch": 1.3475222879507451, + "grad_norm": 1.4253948243947434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277850 + }, + { + "epoch": 1.3475707861435813, + "grad_norm": 2.496269857488187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277860 + }, + { + "epoch": 1.3476192843364174, + "grad_norm": 1.4527035574474212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277870 + }, + { + "epoch": 1.3476677825292536, + "grad_norm": 1.843423369507491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277880 + }, + { + "epoch": 1.3477162807220897, + "grad_norm": 2.000565402227039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277890 + }, + { + "epoch": 1.3477647789149256, + "grad_norm": 1.5282434873142847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277900 + }, + { + "epoch": 1.3478132771077618, + "grad_norm": 1.95737364094839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277910 + }, + { + "epoch": 1.347861775300598, + "grad_norm": 1.6671666713818922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277920 + }, + { + "epoch": 1.3479102734934338, + "grad_norm": 1.5358029514800364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277930 + }, + { + "epoch": 1.34795877168627, + "grad_norm": 1.3817487598544176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277940 + }, + { + "epoch": 1.3480072698791061, + "grad_norm": 1.7091430493110238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277950 + }, + { + "epoch": 1.3480557680719423, + "grad_norm": 1.555337370007237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277960 + }, + { + "epoch": 1.3481042662647784, + "grad_norm": 1.3602726056660686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277970 + }, + { + "epoch": 1.3481527644576143, + "grad_norm": 1.49362211487869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277980 + }, + { + "epoch": 1.3482012626504505, + "grad_norm": 1.604095167806463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 277990 + }, + { + "epoch": 1.3482497608432866, + "grad_norm": 1.8594983330899595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278000 + }, + { + "epoch": 1.3482982590361225, + "grad_norm": 1.553300066348129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278010 + }, + { + "epoch": 1.3483467572289587, + "grad_norm": 1.5362756400350008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278020 + }, + { + "epoch": 1.3483952554217948, + "grad_norm": 1.606162314260473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278030 + }, + { + "epoch": 1.348443753614631, + "grad_norm": 2.0325353844441452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278040 + }, + { + "epoch": 1.3484922518074671, + "grad_norm": 1.4640476386773571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278050 + }, + { + "epoch": 1.348540750000303, + "grad_norm": 2.2526956300339407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278060 + }, + { + "epoch": 1.3485892481931392, + "grad_norm": 1.3288815381429231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278070 + }, + { + "epoch": 1.3486377463859753, + "grad_norm": 1.557629580872799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278080 + }, + { + "epoch": 1.3486862445788113, + "grad_norm": 1.400871862955455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278090 + }, + { + "epoch": 1.3487347427716474, + "grad_norm": 1.3996175773911546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278100 + }, + { + "epoch": 1.3487832409644835, + "grad_norm": 1.932426307860169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278110 + }, + { + "epoch": 1.3488317391573197, + "grad_norm": 1.5790254437320073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278120 + }, + { + "epoch": 1.3488802373501558, + "grad_norm": 1.950244232773457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278130 + }, + { + "epoch": 1.3489287355429918, + "grad_norm": 2.097067763884297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278140 + }, + { + "epoch": 1.348977233735828, + "grad_norm": 1.864815857288704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278150 + }, + { + "epoch": 1.349025731928664, + "grad_norm": 1.733562449146575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278160 + }, + { + "epoch": 1.3490742301215, + "grad_norm": 1.18373728597021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278170 + }, + { + "epoch": 1.349122728314336, + "grad_norm": 3.3163161106131156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278180 + }, + { + "epoch": 1.3491712265071722, + "grad_norm": 1.535605598235179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278190 + }, + { + "epoch": 1.3492197247000084, + "grad_norm": 1.6837734762020773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278200 + }, + { + "epoch": 1.3492682228928445, + "grad_norm": 1.593728349291723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278210 + }, + { + "epoch": 1.3493167210856805, + "grad_norm": 1.7716448752480574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278220 + }, + { + "epoch": 1.3493652192785166, + "grad_norm": 1.3206228111073415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278230 + }, + { + "epoch": 1.3494137174713527, + "grad_norm": 2.1109073600200645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278240 + }, + { + "epoch": 1.349462215664189, + "grad_norm": 2.4374081419864524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278250 + }, + { + "epoch": 1.349510713857025, + "grad_norm": 1.4140323578715197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278260 + }, + { + "epoch": 1.349559212049861, + "grad_norm": 1.2057621567862498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278270 + }, + { + "epoch": 1.349607710242697, + "grad_norm": 1.704151308956625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278280 + }, + { + "epoch": 1.3496562084355332, + "grad_norm": 1.7568384080846045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278290 + }, + { + "epoch": 1.3497047066283692, + "grad_norm": 2.064635751253263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278300 + }, + { + "epoch": 1.3497532048212053, + "grad_norm": 1.5935174957348863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278310 + }, + { + "epoch": 1.3498017030140415, + "grad_norm": 1.914696490246115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278320 + }, + { + "epoch": 1.3498502012068776, + "grad_norm": 1.8823778091814347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278330 + }, + { + "epoch": 1.3498986993997137, + "grad_norm": 1.5899095373583805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278340 + }, + { + "epoch": 1.3499471975925497, + "grad_norm": 1.595395815456868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278350 + }, + { + "epoch": 1.3499956957853858, + "grad_norm": 1.4435487472042041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278360 + }, + { + "epoch": 1.350044193978222, + "grad_norm": 1.9397115025299172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278370 + }, + { + "epoch": 1.3500926921710579, + "grad_norm": 1.2979506358590243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278380 + }, + { + "epoch": 1.350141190363894, + "grad_norm": 1.771346802570406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278390 + }, + { + "epoch": 1.3501896885567302, + "grad_norm": 1.4141269488732178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278400 + }, + { + "epoch": 1.3502381867495663, + "grad_norm": 1.3320889280521442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278410 + }, + { + "epoch": 1.3502866849424024, + "grad_norm": 1.538650806764963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278420 + }, + { + "epoch": 1.3503351831352384, + "grad_norm": 1.3969692069792927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278430 + }, + { + "epoch": 1.3503836813280745, + "grad_norm": 1.40720155528129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278440 + }, + { + "epoch": 1.3504321795209107, + "grad_norm": 1.4251996915959353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278450 + }, + { + "epoch": 1.3504806777137466, + "grad_norm": 1.3481793459391156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278460 + }, + { + "epoch": 1.3505291759065827, + "grad_norm": 1.2646236946523004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278470 + }, + { + "epoch": 1.3505776740994189, + "grad_norm": 1.646360558993365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278480 + }, + { + "epoch": 1.350626172292255, + "grad_norm": 1.3359454875683241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278490 + }, + { + "epoch": 1.3506746704850912, + "grad_norm": 1.6600097296759486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278500 + }, + { + "epoch": 1.350723168677927, + "grad_norm": 1.6326465512861432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278510 + }, + { + "epoch": 1.3507716668707632, + "grad_norm": 1.4571749140657175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278520 + }, + { + "epoch": 1.3508201650635994, + "grad_norm": 1.3215360361584771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278530 + }, + { + "epoch": 1.3508686632564353, + "grad_norm": 1.6201646246827295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278540 + }, + { + "epoch": 1.3509171614492714, + "grad_norm": 1.4043516571859982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278550 + }, + { + "epoch": 1.3509656596421076, + "grad_norm": 1.4930284564229623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278560 + }, + { + "epoch": 1.3510141578349437, + "grad_norm": 1.8584060512694123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278570 + }, + { + "epoch": 1.3510626560277799, + "grad_norm": 1.3906931606300077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278580 + }, + { + "epoch": 1.3511111542206158, + "grad_norm": 1.817263850512063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278590 + }, + { + "epoch": 1.351159652413452, + "grad_norm": 2.0828698765740228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278600 + }, + { + "epoch": 1.351208150606288, + "grad_norm": 1.604219868056589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278610 + }, + { + "epoch": 1.351256648799124, + "grad_norm": 1.764211710053587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278620 + }, + { + "epoch": 1.3513051469919601, + "grad_norm": 1.6884042608467098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278630 + }, + { + "epoch": 1.3513536451847963, + "grad_norm": 1.2162240103918975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278640 + }, + { + "epoch": 1.3514021433776324, + "grad_norm": 1.7160946441663327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278650 + }, + { + "epoch": 1.3514506415704686, + "grad_norm": 1.3606899607054856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278660 + }, + { + "epoch": 1.3514991397633045, + "grad_norm": 2.034834700737065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278670 + }, + { + "epoch": 1.3515476379561406, + "grad_norm": 1.824209228118434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278680 + }, + { + "epoch": 1.3515961361489768, + "grad_norm": 1.2633470269918234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278690 + }, + { + "epoch": 1.3516446343418127, + "grad_norm": 1.55953401304032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278700 + }, + { + "epoch": 1.3516931325346488, + "grad_norm": 1.6722550455483542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278710 + }, + { + "epoch": 1.351741630727485, + "grad_norm": 2.1276322925700697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278720 + }, + { + "epoch": 1.3517901289203211, + "grad_norm": 1.4838289708052343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278730 + }, + { + "epoch": 1.3518386271131573, + "grad_norm": 1.1743983563405891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278740 + }, + { + "epoch": 1.3518871253059932, + "grad_norm": 1.6503671318446322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278750 + }, + { + "epoch": 1.3519356234988293, + "grad_norm": 1.4887826971232698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278760 + }, + { + "epoch": 1.3519841216916655, + "grad_norm": 1.5104465234117015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278770 + }, + { + "epoch": 1.3520326198845016, + "grad_norm": 1.5977906997477476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278780 + }, + { + "epoch": 1.3520811180773378, + "grad_norm": 1.539538807548979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278790 + }, + { + "epoch": 1.3521296162701737, + "grad_norm": 1.7047250722157514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278800 + }, + { + "epoch": 1.3521781144630098, + "grad_norm": 1.6220512932818565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278810 + }, + { + "epoch": 1.352226612655846, + "grad_norm": 1.62390669800061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278820 + }, + { + "epoch": 1.352275110848682, + "grad_norm": 1.4084619692766864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278830 + }, + { + "epoch": 1.352323609041518, + "grad_norm": 1.8193587081327678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278840 + }, + { + "epoch": 1.3523721072343542, + "grad_norm": 1.3986671376642335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278850 + }, + { + "epoch": 1.3524206054271903, + "grad_norm": 1.6161894933475196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278860 + }, + { + "epoch": 1.3524691036200265, + "grad_norm": 2.562467038558225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278870 + }, + { + "epoch": 1.3525176018128624, + "grad_norm": 1.4328127129203949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278880 + }, + { + "epoch": 1.3525661000056985, + "grad_norm": 2.0073064987968792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278890 + }, + { + "epoch": 1.3526145981985347, + "grad_norm": 1.3506193496937158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278900 + }, + { + "epoch": 1.3526630963913706, + "grad_norm": 1.6478939102171353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278910 + }, + { + "epoch": 1.3527115945842068, + "grad_norm": 2.39719906147684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278920 + }, + { + "epoch": 1.352760092777043, + "grad_norm": 1.2495941170698188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278930 + }, + { + "epoch": 1.352808590969879, + "grad_norm": 1.74448899770141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278940 + }, + { + "epoch": 1.3528570891627152, + "grad_norm": 1.2439976160294464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278950 + }, + { + "epoch": 1.352905587355551, + "grad_norm": 1.9139573481652405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278960 + }, + { + "epoch": 1.3529540855483873, + "grad_norm": 2.16383639894957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278970 + }, + { + "epoch": 1.3530025837412234, + "grad_norm": 1.6514109191234638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278980 + }, + { + "epoch": 1.3530510819340593, + "grad_norm": 1.3051720593182381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 278990 + }, + { + "epoch": 1.3530995801268955, + "grad_norm": 1.90952818002188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279000 + }, + { + "epoch": 1.3531480783197316, + "grad_norm": 2.3188635012161285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279010 + }, + { + "epoch": 1.3531965765125678, + "grad_norm": 1.748747635588188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279020 + }, + { + "epoch": 1.353245074705404, + "grad_norm": 1.2036547758498273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279030 + }, + { + "epoch": 1.3532935728982398, + "grad_norm": 1.2672310312211721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279040 + }, + { + "epoch": 1.353342071091076, + "grad_norm": 1.3665397702311566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279050 + }, + { + "epoch": 1.353390569283912, + "grad_norm": 1.58483430823253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279060 + }, + { + "epoch": 1.353439067476748, + "grad_norm": 1.6485779852359883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279070 + }, + { + "epoch": 1.3534875656695842, + "grad_norm": 1.826336237797932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279080 + }, + { + "epoch": 1.3535360638624203, + "grad_norm": 1.2528079018636618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279090 + }, + { + "epoch": 1.3535845620552565, + "grad_norm": 1.4928026814686746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279100 + }, + { + "epoch": 1.3536330602480926, + "grad_norm": 1.4645917367772654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279110 + }, + { + "epoch": 1.3536815584409285, + "grad_norm": 1.8743543606092317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279120 + }, + { + "epoch": 1.3537300566337647, + "grad_norm": 1.5386586227350563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279130 + }, + { + "epoch": 1.3537785548266008, + "grad_norm": 1.2793116788145653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279140 + }, + { + "epoch": 1.3538270530194367, + "grad_norm": 1.613089750662766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279150 + }, + { + "epoch": 1.3538755512122729, + "grad_norm": 1.412212835560922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279160 + }, + { + "epoch": 1.353924049405109, + "grad_norm": 1.482790334961237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279170 + }, + { + "epoch": 1.3539725475979452, + "grad_norm": 1.7840893207221598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279180 + }, + { + "epoch": 1.3540210457907813, + "grad_norm": 1.2977228180943712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279190 + }, + { + "epoch": 1.3540695439836172, + "grad_norm": 1.4070264953147671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279200 + }, + { + "epoch": 1.3541180421764534, + "grad_norm": 1.621335599111262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279210 + }, + { + "epoch": 1.3541665403692895, + "grad_norm": 1.4229933675835582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279220 + }, + { + "epoch": 1.3542150385621257, + "grad_norm": 1.2365699575411782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279230 + }, + { + "epoch": 1.3542635367549618, + "grad_norm": 1.1671073885111127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279240 + }, + { + "epoch": 1.3543120349477977, + "grad_norm": 1.3624473105267043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279250 + }, + { + "epoch": 1.3543605331406339, + "grad_norm": 1.3631941797598301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279260 + }, + { + "epoch": 1.35440903133347, + "grad_norm": 1.6868686003590483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279270 + }, + { + "epoch": 1.354457529526306, + "grad_norm": 2.3499664436599232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279280 + }, + { + "epoch": 1.354506027719142, + "grad_norm": 1.7107312899611316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279290 + }, + { + "epoch": 1.3545545259119782, + "grad_norm": 1.6609314812399134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279300 + }, + { + "epoch": 1.3546030241048144, + "grad_norm": 1.839883090326566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279310 + }, + { + "epoch": 1.3546515222976505, + "grad_norm": 1.509286740031257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279320 + }, + { + "epoch": 1.3547000204904864, + "grad_norm": 1.4057386366062019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279330 + }, + { + "epoch": 1.3547485186833226, + "grad_norm": 1.3204772386643526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279340 + }, + { + "epoch": 1.3547970168761587, + "grad_norm": 1.8351256514392844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279350 + }, + { + "epoch": 1.3548455150689946, + "grad_norm": 1.1256760856781511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279360 + }, + { + "epoch": 1.3548940132618308, + "grad_norm": 2.138986587851832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279370 + }, + { + "epoch": 1.354942511454667, + "grad_norm": 1.9384190252935696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279380 + }, + { + "epoch": 1.354991009647503, + "grad_norm": 1.523625492438896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279390 + }, + { + "epoch": 1.3550395078403392, + "grad_norm": 1.261855775425147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279400 + }, + { + "epoch": 1.3550880060331751, + "grad_norm": 1.2598618148729201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279410 + }, + { + "epoch": 1.3551365042260113, + "grad_norm": 2.3839003659986702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279420 + }, + { + "epoch": 1.3551850024188474, + "grad_norm": 1.305539676366152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279430 + }, + { + "epoch": 1.3552335006116834, + "grad_norm": 1.733829435579537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279440 + }, + { + "epoch": 1.3552819988045195, + "grad_norm": 1.4900373379589382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279450 + }, + { + "epoch": 1.3553304969973556, + "grad_norm": 1.7072780522653375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279460 + }, + { + "epoch": 1.3553789951901918, + "grad_norm": 1.850863462493635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279470 + }, + { + "epoch": 1.355427493383028, + "grad_norm": 1.603532595595425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279480 + }, + { + "epoch": 1.3554759915758638, + "grad_norm": 1.8310000626797773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279490 + }, + { + "epoch": 1.3555244897687, + "grad_norm": 1.8615153862810985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279500 + }, + { + "epoch": 1.3555729879615361, + "grad_norm": 1.9635761461245238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279510 + }, + { + "epoch": 1.355621486154372, + "grad_norm": 1.5498985206363614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279520 + }, + { + "epoch": 1.3556699843472082, + "grad_norm": 1.3557817979403808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279530 + }, + { + "epoch": 1.3557184825400443, + "grad_norm": 1.5098459371643003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279540 + }, + { + "epoch": 1.3557669807328805, + "grad_norm": 1.7693402298846195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279550 + }, + { + "epoch": 1.3558154789257166, + "grad_norm": 1.722838405271432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279560 + }, + { + "epoch": 1.3558639771185526, + "grad_norm": 1.3465851544935958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279570 + }, + { + "epoch": 1.3559124753113887, + "grad_norm": 1.5509174389194413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279580 + }, + { + "epoch": 1.3559609735042248, + "grad_norm": 1.3648348229367002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279590 + }, + { + "epoch": 1.3560094716970608, + "grad_norm": 1.802431093267387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279600 + }, + { + "epoch": 1.356057969889897, + "grad_norm": 1.754816914001367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279610 + }, + { + "epoch": 1.356106468082733, + "grad_norm": 1.556774442690312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279620 + }, + { + "epoch": 1.3561549662755692, + "grad_norm": 1.323171172629145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279630 + }, + { + "epoch": 1.3562034644684053, + "grad_norm": 2.0015708201981397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279640 + }, + { + "epoch": 1.3562519626612413, + "grad_norm": 1.4876636811322896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279650 + }, + { + "epoch": 1.3563004608540774, + "grad_norm": 1.8372791288356893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279660 + }, + { + "epoch": 1.3563489590469135, + "grad_norm": 1.3383412600376232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279670 + }, + { + "epoch": 1.3563974572397495, + "grad_norm": 1.4696994732332769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279680 + }, + { + "epoch": 1.3564459554325856, + "grad_norm": 1.70496079476834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279690 + }, + { + "epoch": 1.3564944536254218, + "grad_norm": 1.6906350097656286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279700 + }, + { + "epoch": 1.356542951818258, + "grad_norm": 1.4200311149181744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279710 + }, + { + "epoch": 1.356591450011094, + "grad_norm": 1.4114676538667936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279720 + }, + { + "epoch": 1.35663994820393, + "grad_norm": 1.5215443127658546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279730 + }, + { + "epoch": 1.3566884463967661, + "grad_norm": 1.6260559121406004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279740 + }, + { + "epoch": 1.3567369445896023, + "grad_norm": 1.413314887344086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279750 + }, + { + "epoch": 1.3567854427824384, + "grad_norm": 1.8153649250507442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279760 + }, + { + "epoch": 1.3568339409752745, + "grad_norm": 1.6506351840916977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279770 + }, + { + "epoch": 1.3568824391681105, + "grad_norm": 1.6239699363040927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279780 + }, + { + "epoch": 1.3569309373609466, + "grad_norm": 1.5797406049955498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279790 + }, + { + "epoch": 1.3569794355537828, + "grad_norm": 1.9413032958937038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279800 + }, + { + "epoch": 1.3570279337466187, + "grad_norm": 1.2988214947995402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279810 + }, + { + "epoch": 1.3570764319394548, + "grad_norm": 1.7319996103992708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279820 + }, + { + "epoch": 1.357124930132291, + "grad_norm": 1.3936553244775496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279830 + }, + { + "epoch": 1.357173428325127, + "grad_norm": 1.7878251767911024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279840 + }, + { + "epoch": 1.3572219265179633, + "grad_norm": 1.3071323579083582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279850 + }, + { + "epoch": 1.3572704247107992, + "grad_norm": 1.7089044845874923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279860 + }, + { + "epoch": 1.3573189229036353, + "grad_norm": 2.095871565188645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279870 + }, + { + "epoch": 1.3573674210964715, + "grad_norm": 1.2766784962536804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279880 + }, + { + "epoch": 1.3574159192893074, + "grad_norm": 1.3115070807145912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279890 + }, + { + "epoch": 1.3574644174821435, + "grad_norm": 2.0392020516624143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279900 + }, + { + "epoch": 1.3575129156749797, + "grad_norm": 1.1463165527914043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279910 + }, + { + "epoch": 1.3575614138678158, + "grad_norm": 1.2272824534420579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279920 + }, + { + "epoch": 1.357609912060652, + "grad_norm": 1.5188883040195833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279930 + }, + { + "epoch": 1.3576584102534879, + "grad_norm": 1.2845199570676868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279940 + }, + { + "epoch": 1.357706908446324, + "grad_norm": 1.5206580883386778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279950 + }, + { + "epoch": 1.3577554066391602, + "grad_norm": 1.6536340297079732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279960 + }, + { + "epoch": 1.357803904831996, + "grad_norm": 1.9392519590155644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279970 + }, + { + "epoch": 1.3578524030248322, + "grad_norm": 1.589321385608855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279980 + }, + { + "epoch": 1.3579009012176684, + "grad_norm": 1.4511962298513481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 279990 + }, + { + "epoch": 1.3579493994105045, + "grad_norm": 2.0181049720235933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280000 + }, + { + "epoch": 1.3579978976033407, + "grad_norm": 1.2699244322789127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280010 + }, + { + "epoch": 1.3580463957961766, + "grad_norm": 1.4019525984565462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280020 + }, + { + "epoch": 1.3580948939890127, + "grad_norm": 1.2517133995970653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280030 + }, + { + "epoch": 1.3581433921818489, + "grad_norm": 1.2271389238094343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280040 + }, + { + "epoch": 1.3581918903746848, + "grad_norm": 1.3799189346741514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280050 + }, + { + "epoch": 1.358240388567521, + "grad_norm": 1.641064706348061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280060 + }, + { + "epoch": 1.358288886760357, + "grad_norm": 2.1054342269621884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280070 + }, + { + "epoch": 1.3583373849531932, + "grad_norm": 1.2939425531044435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280080 + }, + { + "epoch": 1.3583858831460294, + "grad_norm": 1.5929719765495065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280090 + }, + { + "epoch": 1.3584343813388653, + "grad_norm": 1.5814569209737783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280100 + }, + { + "epoch": 1.3584828795317014, + "grad_norm": 1.8345295060839817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280110 + }, + { + "epoch": 1.3585313777245376, + "grad_norm": 1.214170275432025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280120 + }, + { + "epoch": 1.3585798759173735, + "grad_norm": 1.541182470532476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280130 + }, + { + "epoch": 1.3586283741102096, + "grad_norm": 1.5560884136789355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280140 + }, + { + "epoch": 1.3586768723030458, + "grad_norm": 1.9237528903204293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280150 + }, + { + "epoch": 1.358725370495882, + "grad_norm": 1.1114905440479106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280160 + }, + { + "epoch": 1.358773868688718, + "grad_norm": 1.5700090116865795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280170 + }, + { + "epoch": 1.358822366881554, + "grad_norm": 1.434707996850193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280180 + }, + { + "epoch": 1.3588708650743901, + "grad_norm": 1.6320509388378923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280190 + }, + { + "epoch": 1.3589193632672263, + "grad_norm": 1.5803848896212003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280200 + }, + { + "epoch": 1.3589678614600622, + "grad_norm": 1.8144229230188103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280210 + }, + { + "epoch": 1.3590163596528984, + "grad_norm": 1.7928780238207764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280220 + }, + { + "epoch": 1.3590648578457345, + "grad_norm": 1.5677693809834636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280230 + }, + { + "epoch": 1.3591133560385706, + "grad_norm": 1.1552501177902741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280240 + }, + { + "epoch": 1.3591618542314068, + "grad_norm": 1.1670618249581821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280250 + }, + { + "epoch": 1.3592103524242427, + "grad_norm": 1.800893656422886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280260 + }, + { + "epoch": 1.3592588506170789, + "grad_norm": 1.988715681022768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280270 + }, + { + "epoch": 1.359307348809915, + "grad_norm": 1.3531942677502684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280280 + }, + { + "epoch": 1.3593558470027511, + "grad_norm": 1.4840270345928275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280290 + }, + { + "epoch": 1.3594043451955873, + "grad_norm": 1.3663127518270812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280300 + }, + { + "epoch": 1.3594528433884232, + "grad_norm": 1.526639969995358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280310 + }, + { + "epoch": 1.3595013415812593, + "grad_norm": 1.3078390814769136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280320 + }, + { + "epoch": 1.3595498397740955, + "grad_norm": 1.6684955639334476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280330 + }, + { + "epoch": 1.3595983379669314, + "grad_norm": 3.2833746388405416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280340 + }, + { + "epoch": 1.3596468361597676, + "grad_norm": 1.449780828721714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280350 + }, + { + "epoch": 1.3596953343526037, + "grad_norm": 1.7503630544979387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280360 + }, + { + "epoch": 1.3597438325454398, + "grad_norm": 1.608638200423229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280370 + }, + { + "epoch": 1.359792330738276, + "grad_norm": 1.8343708774182232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280380 + }, + { + "epoch": 1.359840828931112, + "grad_norm": 1.6892222731712536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280390 + }, + { + "epoch": 1.359889327123948, + "grad_norm": 1.658243675706217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280400 + }, + { + "epoch": 1.3599378253167842, + "grad_norm": 1.4284928795404994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280410 + }, + { + "epoch": 1.3599863235096201, + "grad_norm": 1.426263995796262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280420 + }, + { + "epoch": 1.3600348217024563, + "grad_norm": 1.4330247211091773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280430 + }, + { + "epoch": 1.3600833198952924, + "grad_norm": 1.4280307603087294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280440 + }, + { + "epoch": 1.3601318180881286, + "grad_norm": 1.9803016115815808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280450 + }, + { + "epoch": 1.3601803162809647, + "grad_norm": 1.9511617210810073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280460 + }, + { + "epoch": 1.3602288144738006, + "grad_norm": 1.2948406791224443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280470 + }, + { + "epoch": 1.3602773126666368, + "grad_norm": 1.3084742178648412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280480 + }, + { + "epoch": 1.360325810859473, + "grad_norm": 1.2366731638735473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280490 + }, + { + "epoch": 1.3603743090523088, + "grad_norm": 1.4399434533629574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280500 + }, + { + "epoch": 1.360422807245145, + "grad_norm": 1.991082321239901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280510 + }, + { + "epoch": 1.3604713054379811, + "grad_norm": 1.5847138712388187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280520 + }, + { + "epoch": 1.3605198036308173, + "grad_norm": 2.3225627643341795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280530 + }, + { + "epoch": 1.3605683018236534, + "grad_norm": 1.189373488585943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280540 + }, + { + "epoch": 1.3606168000164893, + "grad_norm": 1.6891782195216365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280550 + }, + { + "epoch": 1.3606652982093255, + "grad_norm": 1.8349238573023285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280560 + }, + { + "epoch": 1.3607137964021616, + "grad_norm": 1.4152442773252005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280570 + }, + { + "epoch": 1.3607622945949975, + "grad_norm": 1.573327601533947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280580 + }, + { + "epoch": 1.3608107927878337, + "grad_norm": 1.1566903879156598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280590 + }, + { + "epoch": 1.3608592909806698, + "grad_norm": 1.5896317151486983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280600 + }, + { + "epoch": 1.360907789173506, + "grad_norm": 1.3078240712616207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280610 + }, + { + "epoch": 1.360956287366342, + "grad_norm": 1.5860955215885042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280620 + }, + { + "epoch": 1.361004785559178, + "grad_norm": 1.4991449859280692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280630 + }, + { + "epoch": 1.3610532837520142, + "grad_norm": 1.8086788955429256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280640 + }, + { + "epoch": 1.3611017819448503, + "grad_norm": 1.9348057378465455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280650 + }, + { + "epoch": 1.3611502801376862, + "grad_norm": 1.5224006943981294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280660 + }, + { + "epoch": 1.3611987783305224, + "grad_norm": 1.2122038484108089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280670 + }, + { + "epoch": 1.3612472765233585, + "grad_norm": 2.1332684951858027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280680 + }, + { + "epoch": 1.3612957747161947, + "grad_norm": 1.5174837386666695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280690 + }, + { + "epoch": 1.3613442729090308, + "grad_norm": 1.1868076299492714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280700 + }, + { + "epoch": 1.3613927711018667, + "grad_norm": 1.2699615581368562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280710 + }, + { + "epoch": 1.3614412692947029, + "grad_norm": 1.3183956149021014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280720 + }, + { + "epoch": 1.361489767487539, + "grad_norm": 1.4846732732110013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280730 + }, + { + "epoch": 1.361538265680375, + "grad_norm": 1.5845840195538585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280740 + }, + { + "epoch": 1.361586763873211, + "grad_norm": 1.3483635541433614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280750 + }, + { + "epoch": 1.3616352620660472, + "grad_norm": 1.4802418846215915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280760 + }, + { + "epoch": 1.3616837602588834, + "grad_norm": 1.58954343021378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280770 + }, + { + "epoch": 1.3617322584517195, + "grad_norm": 1.247099579160249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280780 + }, + { + "epoch": 1.3617807566445554, + "grad_norm": 1.6386874079898917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280790 + }, + { + "epoch": 1.3618292548373916, + "grad_norm": 1.529418547363548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280800 + }, + { + "epoch": 1.3618777530302277, + "grad_norm": 2.0609352446854245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280810 + }, + { + "epoch": 1.3619262512230639, + "grad_norm": 1.857592657472651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280820 + }, + { + "epoch": 1.3619747494159, + "grad_norm": 1.4328553454845405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280830 + }, + { + "epoch": 1.362023247608736, + "grad_norm": 1.5604504355337667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280840 + }, + { + "epoch": 1.362071745801572, + "grad_norm": 1.5946284293022472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280850 + }, + { + "epoch": 1.3621202439944082, + "grad_norm": 1.2879769251128437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280860 + }, + { + "epoch": 1.3621687421872442, + "grad_norm": 1.6478518105600415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280870 + }, + { + "epoch": 1.3622172403800803, + "grad_norm": 1.4224307065546782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280880 + }, + { + "epoch": 1.3622657385729164, + "grad_norm": 1.7440735078366743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280890 + }, + { + "epoch": 1.3623142367657526, + "grad_norm": 1.3430033085626292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280900 + }, + { + "epoch": 1.3623627349585887, + "grad_norm": 1.4086448452133027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280910 + }, + { + "epoch": 1.3624112331514246, + "grad_norm": 1.359566592640249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280920 + }, + { + "epoch": 1.3624597313442608, + "grad_norm": 2.195185189179938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280930 + }, + { + "epoch": 1.362508229537097, + "grad_norm": 1.3878128868327622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280940 + }, + { + "epoch": 1.3625567277299329, + "grad_norm": 1.484721323663507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280950 + }, + { + "epoch": 1.362605225922769, + "grad_norm": 1.3411736610180469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280960 + }, + { + "epoch": 1.3626537241156051, + "grad_norm": 1.619721956558351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280970 + }, + { + "epoch": 1.3627022223084413, + "grad_norm": 1.4488811039825578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280980 + }, + { + "epoch": 1.3627507205012774, + "grad_norm": 1.6526119139825823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 280990 + }, + { + "epoch": 1.3627992186941134, + "grad_norm": 1.140242655850443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281000 + }, + { + "epoch": 1.3628477168869495, + "grad_norm": 1.4827307381892751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281010 + }, + { + "epoch": 1.3628962150797856, + "grad_norm": 1.4479044629922555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281020 + }, + { + "epoch": 1.3629447132726216, + "grad_norm": 1.2852699349252816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281030 + }, + { + "epoch": 1.3629932114654577, + "grad_norm": 1.6422562865159307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281040 + }, + { + "epoch": 1.3630417096582939, + "grad_norm": 1.8114276301162135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281050 + }, + { + "epoch": 1.36309020785113, + "grad_norm": 2.7467276453307932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281060 + }, + { + "epoch": 1.3631387060439661, + "grad_norm": 1.4269163628455317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281070 + }, + { + "epoch": 1.363187204236802, + "grad_norm": 1.3639417595356917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281080 + }, + { + "epoch": 1.3632357024296382, + "grad_norm": 1.2405322102893024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281090 + }, + { + "epoch": 1.3632842006224744, + "grad_norm": 1.4342005805190183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281100 + }, + { + "epoch": 1.3633326988153103, + "grad_norm": 2.0340159778697853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281110 + }, + { + "epoch": 1.3633811970081464, + "grad_norm": 1.6534150049096752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281120 + }, + { + "epoch": 1.3634296952009826, + "grad_norm": 1.936513527311945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281130 + }, + { + "epoch": 1.3634781933938187, + "grad_norm": 1.1081997541850797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281140 + }, + { + "epoch": 1.3635266915866548, + "grad_norm": 1.8777491561650095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281150 + }, + { + "epoch": 1.3635751897794908, + "grad_norm": 1.8447863681103627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281160 + }, + { + "epoch": 1.363623687972327, + "grad_norm": 1.0254635363082798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281170 + }, + { + "epoch": 1.363672186165163, + "grad_norm": 1.4422128380431332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281180 + }, + { + "epoch": 1.363720684357999, + "grad_norm": 1.3123996112085479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281190 + }, + { + "epoch": 1.3637691825508351, + "grad_norm": 1.8270744917003867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281200 + }, + { + "epoch": 1.3638176807436713, + "grad_norm": 1.7109679006921397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281210 + }, + { + "epoch": 1.3638661789365074, + "grad_norm": 1.2831713469552142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281220 + }, + { + "epoch": 1.3639146771293436, + "grad_norm": 1.2962301454422231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281230 + }, + { + "epoch": 1.3639631753221795, + "grad_norm": 1.683602590674127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281240 + }, + { + "epoch": 1.3640116735150156, + "grad_norm": 1.667383209280615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281250 + }, + { + "epoch": 1.3640601717078518, + "grad_norm": 1.560431606151269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281260 + }, + { + "epoch": 1.3641086699006877, + "grad_norm": 1.7291249321260693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281270 + }, + { + "epoch": 1.364157168093524, + "grad_norm": 1.3386457275998964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281280 + }, + { + "epoch": 1.36420566628636, + "grad_norm": 1.4203248355215692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281290 + }, + { + "epoch": 1.3642541644791961, + "grad_norm": 1.4458994890276244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281300 + }, + { + "epoch": 1.3643026626720323, + "grad_norm": 1.859511833401939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281310 + }, + { + "epoch": 1.3643511608648682, + "grad_norm": 2.172992275006891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281320 + }, + { + "epoch": 1.3643996590577043, + "grad_norm": 1.3849573932134263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281330 + }, + { + "epoch": 1.3644481572505405, + "grad_norm": 1.5625220228798753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281340 + }, + { + "epoch": 1.3644966554433766, + "grad_norm": 1.4959894656385586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281350 + }, + { + "epoch": 1.3645451536362128, + "grad_norm": 1.79125017041315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281360 + }, + { + "epoch": 1.3645936518290487, + "grad_norm": 1.5922134721790826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281370 + }, + { + "epoch": 1.3646421500218848, + "grad_norm": 1.806521687797158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281380 + }, + { + "epoch": 1.364690648214721, + "grad_norm": 1.2437976870671719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281390 + }, + { + "epoch": 1.364739146407557, + "grad_norm": 2.1925169235714748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281400 + }, + { + "epoch": 1.364787644600393, + "grad_norm": 1.2627659806696556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281410 + }, + { + "epoch": 1.3648361427932292, + "grad_norm": 1.4091334321619797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281420 + }, + { + "epoch": 1.3648846409860653, + "grad_norm": 1.0529231708744646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281430 + }, + { + "epoch": 1.3649331391789015, + "grad_norm": 1.7424381937303224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281440 + }, + { + "epoch": 1.3649816373717374, + "grad_norm": 1.409011129993587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281450 + }, + { + "epoch": 1.3650301355645735, + "grad_norm": 1.289124984538148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281460 + }, + { + "epoch": 1.3650786337574097, + "grad_norm": 1.6478278297427096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281470 + }, + { + "epoch": 1.3651271319502456, + "grad_norm": 1.6282614367923998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281480 + }, + { + "epoch": 1.3651756301430817, + "grad_norm": 1.9133183926101083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281490 + }, + { + "epoch": 1.3652241283359179, + "grad_norm": 1.2939189275584795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281500 + }, + { + "epoch": 1.365272626528754, + "grad_norm": 1.2460206200159973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281510 + }, + { + "epoch": 1.3653211247215902, + "grad_norm": 1.3313042224183391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281520 + }, + { + "epoch": 1.365369622914426, + "grad_norm": 2.5401545755698862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281530 + }, + { + "epoch": 1.3654181211072622, + "grad_norm": 1.287981810094152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281540 + }, + { + "epoch": 1.3654666193000984, + "grad_norm": 1.3838079127026504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281550 + }, + { + "epoch": 1.3655151174929343, + "grad_norm": 1.2866309795356301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281560 + }, + { + "epoch": 1.3655636156857704, + "grad_norm": 1.875162247699791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281570 + }, + { + "epoch": 1.3656121138786066, + "grad_norm": 1.3986265479104532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281580 + }, + { + "epoch": 1.3656606120714427, + "grad_norm": 1.798527904384173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281590 + }, + { + "epoch": 1.3657091102642789, + "grad_norm": 1.3455440317500234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281600 + }, + { + "epoch": 1.3657576084571148, + "grad_norm": 1.4040188567321366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281610 + }, + { + "epoch": 1.365806106649951, + "grad_norm": 1.4276110071875792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281620 + }, + { + "epoch": 1.365854604842787, + "grad_norm": 1.1366092067532918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281630 + }, + { + "epoch": 1.365903103035623, + "grad_norm": 1.1949133238431386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281640 + }, + { + "epoch": 1.3659516012284592, + "grad_norm": 0.002776145003736019, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 281650 + }, + { + "epoch": 1.3660000994212953, + "grad_norm": 1.9480563423712738e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 281660 + }, + { + "epoch": 1.3660485976141314, + "grad_norm": 0.0006832360522821546, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 281670 + }, + { + "epoch": 1.3660970958069676, + "grad_norm": 0.0009451271616853774, + "learning_rate": 0.0002, + "loss": 0.0054, + "step": 281680 + }, + { + "epoch": 1.3661455939998035, + "grad_norm": 0.00011456113861640915, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 281690 + }, + { + "epoch": 1.3661940921926397, + "grad_norm": 0.0010260329581797123, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 281700 + }, + { + "epoch": 1.3662425903854758, + "grad_norm": 0.0004178086528554559, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 281710 + }, + { + "epoch": 1.3662910885783117, + "grad_norm": 0.0013346431078389287, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 281720 + }, + { + "epoch": 1.3663395867711479, + "grad_norm": 0.00022458001330960542, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 281730 + }, + { + "epoch": 1.366388084963984, + "grad_norm": 0.00016944122035056353, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 281740 + }, + { + "epoch": 1.3664365831568202, + "grad_norm": 0.004758358467370272, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281750 + }, + { + "epoch": 1.3664850813496563, + "grad_norm": 0.00012444821186363697, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 281760 + }, + { + "epoch": 1.3665335795424922, + "grad_norm": 3.034245310118422e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281770 + }, + { + "epoch": 1.3665820777353284, + "grad_norm": 5.9056976169813424e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281780 + }, + { + "epoch": 1.3666305759281645, + "grad_norm": 0.009261581115424633, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 281790 + }, + { + "epoch": 1.3666790741210006, + "grad_norm": 2.2578757125302218e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 281800 + }, + { + "epoch": 1.3667275723138368, + "grad_norm": 2.2087537217885256e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 281810 + }, + { + "epoch": 1.3667760705066727, + "grad_norm": 0.00010971317533403635, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 281820 + }, + { + "epoch": 1.3668245686995089, + "grad_norm": 0.002205220051109791, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 281830 + }, + { + "epoch": 1.366873066892345, + "grad_norm": 0.00016175853670574725, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 281840 + }, + { + "epoch": 1.366921565085181, + "grad_norm": 0.0028736393433064222, + "learning_rate": 0.0002, + "loss": 0.0021, + "step": 281850 + }, + { + "epoch": 1.366970063278017, + "grad_norm": 0.00038258449058048427, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 281860 + }, + { + "epoch": 1.3670185614708532, + "grad_norm": 0.0002002266701310873, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281870 + }, + { + "epoch": 1.3670670596636894, + "grad_norm": 3.50744703609962e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281880 + }, + { + "epoch": 1.3671155578565255, + "grad_norm": 1.9853645426337607e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281890 + }, + { + "epoch": 1.3671640560493614, + "grad_norm": 1.5752477338537574e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281900 + }, + { + "epoch": 1.3672125542421976, + "grad_norm": 1.6722382497391663e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281910 + }, + { + "epoch": 1.3672610524350337, + "grad_norm": 1.4427412679651752e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281920 + }, + { + "epoch": 1.3673095506278696, + "grad_norm": 1.0335367733205203e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281930 + }, + { + "epoch": 1.3673580488207058, + "grad_norm": 1.3136647794453893e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281940 + }, + { + "epoch": 1.367406547013542, + "grad_norm": 1.0690240742405877e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281950 + }, + { + "epoch": 1.367455045206378, + "grad_norm": 8.875905223248992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281960 + }, + { + "epoch": 1.3675035433992142, + "grad_norm": 8.520985829818528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281970 + }, + { + "epoch": 1.3675520415920501, + "grad_norm": 9.272832357964944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281980 + }, + { + "epoch": 1.3676005397848863, + "grad_norm": 9.345197213406209e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 281990 + }, + { + "epoch": 1.3676490379777224, + "grad_norm": 8.22438141767634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282000 + }, + { + "epoch": 1.3676975361705583, + "grad_norm": 7.905971870059147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282010 + }, + { + "epoch": 1.3677460343633945, + "grad_norm": 7.876342351664789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282020 + }, + { + "epoch": 1.3677945325562306, + "grad_norm": 7.090478902682662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282030 + }, + { + "epoch": 1.3678430307490668, + "grad_norm": 7.780258783895988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282040 + }, + { + "epoch": 1.367891528941903, + "grad_norm": 1.0669316907296889e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282050 + }, + { + "epoch": 1.3679400271347388, + "grad_norm": 6.004985152685549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282060 + }, + { + "epoch": 1.367988525327575, + "grad_norm": 7.169669061113382e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282070 + }, + { + "epoch": 1.3680370235204111, + "grad_norm": 5.651270839734934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282080 + }, + { + "epoch": 1.368085521713247, + "grad_norm": 6.735295301041333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282090 + }, + { + "epoch": 1.3681340199060832, + "grad_norm": 5.273846909403801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282100 + }, + { + "epoch": 1.3681825180989193, + "grad_norm": 6.578366537723923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282110 + }, + { + "epoch": 1.3682310162917555, + "grad_norm": 5.031592536397511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282120 + }, + { + "epoch": 1.3682795144845916, + "grad_norm": 5.194068762648385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282130 + }, + { + "epoch": 1.3683280126774275, + "grad_norm": 5.4256224757409655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282140 + }, + { + "epoch": 1.3683765108702637, + "grad_norm": 4.795835138793336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282150 + }, + { + "epoch": 1.3684250090630998, + "grad_norm": 4.81889082948328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282160 + }, + { + "epoch": 1.3684735072559358, + "grad_norm": 4.407600499689579e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282170 + }, + { + "epoch": 1.368522005448772, + "grad_norm": 4.419062406668672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282180 + }, + { + "epoch": 1.368570503641608, + "grad_norm": 4.824752977583557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282190 + }, + { + "epoch": 1.3686190018344442, + "grad_norm": 4.140489636483835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282200 + }, + { + "epoch": 1.3686675000272803, + "grad_norm": 8.349295057996642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282210 + }, + { + "epoch": 1.3687159982201162, + "grad_norm": 4.201770479994593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282220 + }, + { + "epoch": 1.3687644964129524, + "grad_norm": 4.2185592974419706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282230 + }, + { + "epoch": 1.3688129946057885, + "grad_norm": 4.598737177730072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282240 + }, + { + "epoch": 1.3688614927986245, + "grad_norm": 3.7316901853046147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282250 + }, + { + "epoch": 1.3689099909914606, + "grad_norm": 4.147716481384123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282260 + }, + { + "epoch": 1.3689584891842967, + "grad_norm": 3.559611968739773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282270 + }, + { + "epoch": 1.369006987377133, + "grad_norm": 3.4211975616926793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282280 + }, + { + "epoch": 1.369055485569969, + "grad_norm": 3.872699380735867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282290 + }, + { + "epoch": 1.369103983762805, + "grad_norm": 3.832486527244328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282300 + }, + { + "epoch": 1.369152481955641, + "grad_norm": 3.138667580060428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282310 + }, + { + "epoch": 1.3692009801484772, + "grad_norm": 3.165103407809511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282320 + }, + { + "epoch": 1.3692494783413134, + "grad_norm": 3.410897534195101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282330 + }, + { + "epoch": 1.3692979765341495, + "grad_norm": 3.7402098769234726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282340 + }, + { + "epoch": 1.3693464747269855, + "grad_norm": 3.3674302812869428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282350 + }, + { + "epoch": 1.3693949729198216, + "grad_norm": 3.0606911423092242e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282360 + }, + { + "epoch": 1.3694434711126577, + "grad_norm": 2.953558578155935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282370 + }, + { + "epoch": 1.3694919693054937, + "grad_norm": 2.9892059956182493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282380 + }, + { + "epoch": 1.3695404674983298, + "grad_norm": 9.22757408261532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282390 + }, + { + "epoch": 1.369588965691166, + "grad_norm": 2.9456214178935625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282400 + }, + { + "epoch": 1.369637463884002, + "grad_norm": 3.1036545351526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282410 + }, + { + "epoch": 1.3696859620768382, + "grad_norm": 2.8581462174770422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282420 + }, + { + "epoch": 1.3697344602696742, + "grad_norm": 2.9404268389043864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282430 + }, + { + "epoch": 1.3697829584625103, + "grad_norm": 3.0829903607809683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282440 + }, + { + "epoch": 1.3698314566553464, + "grad_norm": 2.732968823693227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282450 + }, + { + "epoch": 1.3698799548481824, + "grad_norm": 2.6821142000699183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282460 + }, + { + "epoch": 1.3699284530410185, + "grad_norm": 2.644923824846046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282470 + }, + { + "epoch": 1.3699769512338547, + "grad_norm": 2.6402979074191535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282480 + }, + { + "epoch": 1.3700254494266908, + "grad_norm": 3.049421593459556e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282490 + }, + { + "epoch": 1.370073947619527, + "grad_norm": 2.5747649488039315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282500 + }, + { + "epoch": 1.3701224458123629, + "grad_norm": 2.609988086987869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282510 + }, + { + "epoch": 1.370170944005199, + "grad_norm": 2.5771514629013836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282520 + }, + { + "epoch": 1.3702194421980352, + "grad_norm": 2.6447648906469112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282530 + }, + { + "epoch": 1.370267940390871, + "grad_norm": 2.838858108589193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282540 + }, + { + "epoch": 1.3703164385837072, + "grad_norm": 2.8730312351399334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282550 + }, + { + "epoch": 1.3703649367765434, + "grad_norm": 2.452686430842732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282560 + }, + { + "epoch": 1.3704134349693795, + "grad_norm": 2.3908758066681912e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282570 + }, + { + "epoch": 1.3704619331622157, + "grad_norm": 2.4361590931221144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282580 + }, + { + "epoch": 1.3705104313550516, + "grad_norm": 2.7000007776223356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282590 + }, + { + "epoch": 1.3705589295478877, + "grad_norm": 2.5215585992555134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282600 + }, + { + "epoch": 1.3706074277407239, + "grad_norm": 2.333241809537867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282610 + }, + { + "epoch": 1.3706559259335598, + "grad_norm": 2.234223984487471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282620 + }, + { + "epoch": 1.370704424126396, + "grad_norm": 2.3381128357868874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282630 + }, + { + "epoch": 1.370752922319232, + "grad_norm": 2.7305327421345282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282640 + }, + { + "epoch": 1.3708014205120682, + "grad_norm": 2.2428630472859368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282650 + }, + { + "epoch": 1.3708499187049044, + "grad_norm": 2.7413302632339764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282660 + }, + { + "epoch": 1.3708984168977403, + "grad_norm": 2.1167047634662595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282670 + }, + { + "epoch": 1.3709469150905764, + "grad_norm": 2.354463049414335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282680 + }, + { + "epoch": 1.3709954132834126, + "grad_norm": 2.3086599867383484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282690 + }, + { + "epoch": 1.3710439114762485, + "grad_norm": 2.0845529888902092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282700 + }, + { + "epoch": 1.3710924096690846, + "grad_norm": 1.5007288311608136e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282710 + }, + { + "epoch": 1.3711409078619208, + "grad_norm": 2.0064728687430033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282720 + }, + { + "epoch": 1.371189406054757, + "grad_norm": 1.8460630144545576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282730 + }, + { + "epoch": 1.371237904247593, + "grad_norm": 2.313527147634886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282740 + }, + { + "epoch": 1.371286402440429, + "grad_norm": 2.04194702746463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282750 + }, + { + "epoch": 1.3713349006332651, + "grad_norm": 2.3579923436045647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282760 + }, + { + "epoch": 1.3713833988261013, + "grad_norm": 2.0470213257794967e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282770 + }, + { + "epoch": 1.3714318970189372, + "grad_norm": 1.7916048591359868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282780 + }, + { + "epoch": 1.3714803952117733, + "grad_norm": 1.9037516949538258e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282790 + }, + { + "epoch": 1.3715288934046095, + "grad_norm": 1.7749184735293966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282800 + }, + { + "epoch": 1.3715773915974456, + "grad_norm": 1.7065538031602046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282810 + }, + { + "epoch": 1.3716258897902818, + "grad_norm": 1.7737118014338193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282820 + }, + { + "epoch": 1.3716743879831177, + "grad_norm": 1.6187435676329187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282830 + }, + { + "epoch": 1.3717228861759538, + "grad_norm": 1.8433881905366434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282840 + }, + { + "epoch": 1.37177138436879, + "grad_norm": 1.570970198372379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282850 + }, + { + "epoch": 1.3718198825616261, + "grad_norm": 1.5541306765953777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282860 + }, + { + "epoch": 1.3718683807544623, + "grad_norm": 1.5727949858046486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282870 + }, + { + "epoch": 1.3719168789472982, + "grad_norm": 1.6077785858215066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282880 + }, + { + "epoch": 1.3719653771401343, + "grad_norm": 1.7197456827489077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282890 + }, + { + "epoch": 1.3720138753329705, + "grad_norm": 1.5921311842248542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282900 + }, + { + "epoch": 1.3720623735258064, + "grad_norm": 1.6489115068907267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282910 + }, + { + "epoch": 1.3721108717186425, + "grad_norm": 1.4622620483351056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282920 + }, + { + "epoch": 1.3721593699114787, + "grad_norm": 1.5430000530614052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282930 + }, + { + "epoch": 1.3722078681043148, + "grad_norm": 1.5279305216608918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282940 + }, + { + "epoch": 1.372256366297151, + "grad_norm": 1.4161697663439554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282950 + }, + { + "epoch": 1.372304864489987, + "grad_norm": 1.4281231415225193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282960 + }, + { + "epoch": 1.372353362682823, + "grad_norm": 1.4443492091231747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282970 + }, + { + "epoch": 1.3724018608756592, + "grad_norm": 1.3840359542882652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282980 + }, + { + "epoch": 1.372450359068495, + "grad_norm": 2.9018039640504867e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 282990 + }, + { + "epoch": 1.3724988572613313, + "grad_norm": 1.3224491794971982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283000 + }, + { + "epoch": 1.3725473554541674, + "grad_norm": 1.2308996701904107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283010 + }, + { + "epoch": 1.3725958536470035, + "grad_norm": 1.5209874391075573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283020 + }, + { + "epoch": 1.3726443518398397, + "grad_norm": 1.253578602700145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283030 + }, + { + "epoch": 1.3726928500326756, + "grad_norm": 1.359678094559058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283040 + }, + { + "epoch": 1.3727413482255117, + "grad_norm": 1.2694300721705076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283050 + }, + { + "epoch": 1.372789846418348, + "grad_norm": 1.1546341056600795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283060 + }, + { + "epoch": 1.3728383446111838, + "grad_norm": 1.279597768188978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283070 + }, + { + "epoch": 1.37288684280402, + "grad_norm": 1.259531131836411e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283080 + }, + { + "epoch": 1.372935340996856, + "grad_norm": 1.1738246712411637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283090 + }, + { + "epoch": 1.3729838391896922, + "grad_norm": 1.24442306059791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283100 + }, + { + "epoch": 1.3730323373825284, + "grad_norm": 1.2108474720662343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283110 + }, + { + "epoch": 1.3730808355753643, + "grad_norm": 1.1027847222067066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283120 + }, + { + "epoch": 1.3731293337682005, + "grad_norm": 2.471009793225676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283130 + }, + { + "epoch": 1.3731778319610366, + "grad_norm": 1.1416124152674456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283140 + }, + { + "epoch": 1.3732263301538725, + "grad_norm": 1.0047475598184974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283150 + }, + { + "epoch": 1.3732748283467087, + "grad_norm": 1.103959903048235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283160 + }, + { + "epoch": 1.3733233265395448, + "grad_norm": 9.991335900849663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283170 + }, + { + "epoch": 1.373371824732381, + "grad_norm": 1.0118366162714665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283180 + }, + { + "epoch": 1.373420322925217, + "grad_norm": 1.0854910215130076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283190 + }, + { + "epoch": 1.373468821118053, + "grad_norm": 2.4361472696909914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283200 + }, + { + "epoch": 1.3735173193108892, + "grad_norm": 9.736264701132313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283210 + }, + { + "epoch": 1.3735658175037253, + "grad_norm": 9.865957508736756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283220 + }, + { + "epoch": 1.3736143156965612, + "grad_norm": 1.0078957757286844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283230 + }, + { + "epoch": 1.3736628138893974, + "grad_norm": 1.091670924324717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283240 + }, + { + "epoch": 1.3737113120822335, + "grad_norm": 9.665155857874197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283250 + }, + { + "epoch": 1.3737598102750697, + "grad_norm": 9.415315389560419e-07, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 283260 + }, + { + "epoch": 1.3738083084679058, + "grad_norm": 9.147823334387795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283270 + }, + { + "epoch": 1.3738568066607417, + "grad_norm": 9.344295222035726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283280 + }, + { + "epoch": 1.3739053048535779, + "grad_norm": 9.666483720138785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283290 + }, + { + "epoch": 1.373953803046414, + "grad_norm": 9.05204331047571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283300 + }, + { + "epoch": 1.37400230123925, + "grad_norm": 8.555981025892834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283310 + }, + { + "epoch": 1.3740507994320863, + "grad_norm": 9.194957897307177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283320 + }, + { + "epoch": 1.3740992976249222, + "grad_norm": 9.149196671387472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283330 + }, + { + "epoch": 1.3741477958177584, + "grad_norm": 9.879006483970443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283340 + }, + { + "epoch": 1.3741962940105945, + "grad_norm": 8.305351570925268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283350 + }, + { + "epoch": 1.3742447922034304, + "grad_norm": 8.080374414021207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283360 + }, + { + "epoch": 1.3742932903962666, + "grad_norm": 8.332926313414646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283370 + }, + { + "epoch": 1.3743417885891027, + "grad_norm": 1.0825536946867942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283380 + }, + { + "epoch": 1.3743902867819389, + "grad_norm": 8.615039064352459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283390 + }, + { + "epoch": 1.374438784974775, + "grad_norm": 7.747069616925728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283400 + }, + { + "epoch": 1.374487283167611, + "grad_norm": 7.811947284608323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283410 + }, + { + "epoch": 1.374535781360447, + "grad_norm": 7.538733939327358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283420 + }, + { + "epoch": 1.3745842795532832, + "grad_norm": 7.618613722115697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283430 + }, + { + "epoch": 1.3746327777461191, + "grad_norm": 8.629934882264934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283440 + }, + { + "epoch": 1.3746812759389553, + "grad_norm": 7.916696063148265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283450 + }, + { + "epoch": 1.3747297741317914, + "grad_norm": 7.352824695772142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283460 + }, + { + "epoch": 1.3747782723246276, + "grad_norm": 7.693748216297536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283470 + }, + { + "epoch": 1.3748267705174637, + "grad_norm": 7.65404763569677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283480 + }, + { + "epoch": 1.3748752687102996, + "grad_norm": 8.180783765965316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283490 + }, + { + "epoch": 1.3749237669031358, + "grad_norm": 6.860089456495189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283500 + }, + { + "epoch": 1.374972265095972, + "grad_norm": 7.084658477651828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283510 + }, + { + "epoch": 1.3750207632888078, + "grad_norm": 7.369044396909885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283520 + }, + { + "epoch": 1.375069261481644, + "grad_norm": 7.497663432332047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283530 + }, + { + "epoch": 1.3751177596744801, + "grad_norm": 7.424052341775678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283540 + }, + { + "epoch": 1.3751662578673163, + "grad_norm": 6.922325042069133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283550 + }, + { + "epoch": 1.3752147560601524, + "grad_norm": 7.048318479974114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283560 + }, + { + "epoch": 1.3752632542529883, + "grad_norm": 6.903462121954362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283570 + }, + { + "epoch": 1.3753117524458245, + "grad_norm": 6.977328439461417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283580 + }, + { + "epoch": 1.3753602506386606, + "grad_norm": 7.182693479990121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283590 + }, + { + "epoch": 1.3754087488314966, + "grad_norm": 6.68659197344823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283600 + }, + { + "epoch": 1.3754572470243327, + "grad_norm": 7.596584055136191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283610 + }, + { + "epoch": 1.3755057452171688, + "grad_norm": 6.590973953279899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283620 + }, + { + "epoch": 1.375554243410005, + "grad_norm": 6.609163278881169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283630 + }, + { + "epoch": 1.3756027416028411, + "grad_norm": 6.97368420787825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283640 + }, + { + "epoch": 1.375651239795677, + "grad_norm": 6.36761342320824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283650 + }, + { + "epoch": 1.3756997379885132, + "grad_norm": 6.058239137018973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283660 + }, + { + "epoch": 1.3757482361813493, + "grad_norm": 5.986195787954784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283670 + }, + { + "epoch": 1.3757967343741853, + "grad_norm": 6.056160373191233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283680 + }, + { + "epoch": 1.3758452325670214, + "grad_norm": 8.222834821935976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283690 + }, + { + "epoch": 1.3758937307598575, + "grad_norm": 6.339887477224693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283700 + }, + { + "epoch": 1.3759422289526937, + "grad_norm": 5.858833560523635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283710 + }, + { + "epoch": 1.3759907271455298, + "grad_norm": 5.929726398790081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283720 + }, + { + "epoch": 1.3760392253383658, + "grad_norm": 5.946647547716566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283730 + }, + { + "epoch": 1.376087723531202, + "grad_norm": 6.467080879701825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283740 + }, + { + "epoch": 1.376136221724038, + "grad_norm": 6.022174261488544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283750 + }, + { + "epoch": 1.376184719916874, + "grad_norm": 7.546879032815923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283760 + }, + { + "epoch": 1.37623321810971, + "grad_norm": 5.92244930430752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283770 + }, + { + "epoch": 1.3762817163025463, + "grad_norm": 5.847773536515888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283780 + }, + { + "epoch": 1.3763302144953824, + "grad_norm": 6.473850930888148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283790 + }, + { + "epoch": 1.3763787126882185, + "grad_norm": 5.927329880250909e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283800 + }, + { + "epoch": 1.3764272108810545, + "grad_norm": 5.551394224312389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283810 + }, + { + "epoch": 1.3764757090738906, + "grad_norm": 5.804189413538552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283820 + }, + { + "epoch": 1.3765242072667268, + "grad_norm": 5.63084029181482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283830 + }, + { + "epoch": 1.376572705459563, + "grad_norm": 5.766970048171061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283840 + }, + { + "epoch": 1.376621203652399, + "grad_norm": 5.4702894658476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283850 + }, + { + "epoch": 1.376669701845235, + "grad_norm": 5.358637054087012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283860 + }, + { + "epoch": 1.376718200038071, + "grad_norm": 5.297635539136536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283870 + }, + { + "epoch": 1.3767666982309072, + "grad_norm": 5.5343684834952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283880 + }, + { + "epoch": 1.3768151964237432, + "grad_norm": 5.615964937533136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283890 + }, + { + "epoch": 1.3768636946165793, + "grad_norm": 5.271322152111679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283900 + }, + { + "epoch": 1.3769121928094155, + "grad_norm": 5.890806278330274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283910 + }, + { + "epoch": 1.3769606910022516, + "grad_norm": 5.51114794689056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283920 + }, + { + "epoch": 1.3770091891950877, + "grad_norm": 5.519971182366135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283930 + }, + { + "epoch": 1.3770576873879237, + "grad_norm": 5.851711648574565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283940 + }, + { + "epoch": 1.3771061855807598, + "grad_norm": 5.110449592393707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283950 + }, + { + "epoch": 1.377154683773596, + "grad_norm": 5.589413945017441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283960 + }, + { + "epoch": 1.3772031819664319, + "grad_norm": 4.861460638494464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283970 + }, + { + "epoch": 1.377251680159268, + "grad_norm": 4.887877480541647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283980 + }, + { + "epoch": 1.3773001783521042, + "grad_norm": 5.479172955347167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 283990 + }, + { + "epoch": 1.3773486765449403, + "grad_norm": 5.005162506677152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284000 + }, + { + "epoch": 1.3773971747377765, + "grad_norm": 5.147026058693882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284010 + }, + { + "epoch": 1.3774456729306124, + "grad_norm": 5.016254362999462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284020 + }, + { + "epoch": 1.3774941711234485, + "grad_norm": 4.705518108494289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284030 + }, + { + "epoch": 1.3775426693162847, + "grad_norm": 5.380168772717298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284040 + }, + { + "epoch": 1.3775911675091206, + "grad_norm": 5.047562012805429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284050 + }, + { + "epoch": 1.3776396657019567, + "grad_norm": 5.069323947282101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284060 + }, + { + "epoch": 1.3776881638947929, + "grad_norm": 5.076605020803981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284070 + }, + { + "epoch": 1.377736662087629, + "grad_norm": 5.035707317802007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284080 + }, + { + "epoch": 1.3777851602804652, + "grad_norm": 5.094264565741469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284090 + }, + { + "epoch": 1.377833658473301, + "grad_norm": 5.03858132105961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284100 + }, + { + "epoch": 1.3778821566661372, + "grad_norm": 4.5497452560994134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284110 + }, + { + "epoch": 1.3779306548589734, + "grad_norm": 4.686510806095612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284120 + }, + { + "epoch": 1.3779791530518093, + "grad_norm": 4.785115947925078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284130 + }, + { + "epoch": 1.3780276512446454, + "grad_norm": 5.356287147151306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284140 + }, + { + "epoch": 1.3780761494374816, + "grad_norm": 4.966082087776158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284150 + }, + { + "epoch": 1.3781246476303177, + "grad_norm": 4.726760778339667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284160 + }, + { + "epoch": 1.3781731458231539, + "grad_norm": 4.420643904268218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284170 + }, + { + "epoch": 1.3782216440159898, + "grad_norm": 4.6801312691968633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284180 + }, + { + "epoch": 1.378270142208826, + "grad_norm": 4.7304629902100714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284190 + }, + { + "epoch": 1.378318640401662, + "grad_norm": 4.158679871579807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284200 + }, + { + "epoch": 1.378367138594498, + "grad_norm": 4.8894565907176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284210 + }, + { + "epoch": 1.3784156367873341, + "grad_norm": 4.3337240640539676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284220 + }, + { + "epoch": 1.3784641349801703, + "grad_norm": 4.2694190938163956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284230 + }, + { + "epoch": 1.3785126331730064, + "grad_norm": 4.399855981773726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284240 + }, + { + "epoch": 1.3785611313658426, + "grad_norm": 4.482115798509767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284250 + }, + { + "epoch": 1.3786096295586785, + "grad_norm": 4.347461413090059e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284260 + }, + { + "epoch": 1.3786581277515146, + "grad_norm": 4.875790864389273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284270 + }, + { + "epoch": 1.3787066259443508, + "grad_norm": 4.362547940672812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284280 + }, + { + "epoch": 1.3787551241371867, + "grad_norm": 4.4912155772180995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284290 + }, + { + "epoch": 1.3788036223300228, + "grad_norm": 4.582321651014354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284300 + }, + { + "epoch": 1.378852120522859, + "grad_norm": 4.511373958848708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284310 + }, + { + "epoch": 1.3789006187156951, + "grad_norm": 4.148699019879132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284320 + }, + { + "epoch": 1.3789491169085313, + "grad_norm": 4.1691720298331347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284330 + }, + { + "epoch": 1.3789976151013672, + "grad_norm": 4.355850080628443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284340 + }, + { + "epoch": 1.3790461132942033, + "grad_norm": 4.45342180910302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284350 + }, + { + "epoch": 1.3790946114870395, + "grad_norm": 4.5592270225824905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284360 + }, + { + "epoch": 1.3791431096798756, + "grad_norm": 4.2986840753655997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284370 + }, + { + "epoch": 1.3791916078727118, + "grad_norm": 3.86760461879021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284380 + }, + { + "epoch": 1.3792401060655477, + "grad_norm": 4.0475259766026284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284390 + }, + { + "epoch": 1.3792886042583838, + "grad_norm": 3.983584235811577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284400 + }, + { + "epoch": 1.37933710245122, + "grad_norm": 6.61547232994053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284410 + }, + { + "epoch": 1.379385600644056, + "grad_norm": 3.933089942620427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284420 + }, + { + "epoch": 1.379434098836892, + "grad_norm": 3.914487365364039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284430 + }, + { + "epoch": 1.3794825970297282, + "grad_norm": 4.538627536021522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284440 + }, + { + "epoch": 1.3795310952225643, + "grad_norm": 3.896432758665469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284450 + }, + { + "epoch": 1.3795795934154005, + "grad_norm": 4.589456068515574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284460 + }, + { + "epoch": 1.3796280916082364, + "grad_norm": 4.09784604471497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284470 + }, + { + "epoch": 1.3796765898010726, + "grad_norm": 3.8903831978132075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284480 + }, + { + "epoch": 1.3797250879939087, + "grad_norm": 4.0240047383122146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284490 + }, + { + "epoch": 1.3797735861867446, + "grad_norm": 3.980856604357541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284500 + }, + { + "epoch": 1.3798220843795808, + "grad_norm": 3.9801597040423076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284510 + }, + { + "epoch": 1.379870582572417, + "grad_norm": 4.2753427464958804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284520 + }, + { + "epoch": 1.379919080765253, + "grad_norm": 3.589853179164493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284530 + }, + { + "epoch": 1.3799675789580892, + "grad_norm": 3.8880440911270853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284540 + }, + { + "epoch": 1.3800160771509251, + "grad_norm": 3.7020009813204524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284550 + }, + { + "epoch": 1.3800645753437613, + "grad_norm": 4.1290400076832157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284560 + }, + { + "epoch": 1.3801130735365974, + "grad_norm": 3.6390102309269423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284570 + }, + { + "epoch": 1.3801615717294333, + "grad_norm": 3.84786147833438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284580 + }, + { + "epoch": 1.3802100699222695, + "grad_norm": 3.7625585491696256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284590 + }, + { + "epoch": 1.3802585681151056, + "grad_norm": 3.5879347137779405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284600 + }, + { + "epoch": 1.3803070663079418, + "grad_norm": 3.604332334816718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284610 + }, + { + "epoch": 1.380355564500778, + "grad_norm": 3.4716836694315134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284620 + }, + { + "epoch": 1.3804040626936138, + "grad_norm": 3.665366534733039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284630 + }, + { + "epoch": 1.38045256088645, + "grad_norm": 3.857308570331952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284640 + }, + { + "epoch": 1.380501059079286, + "grad_norm": 3.5746913340517494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284650 + }, + { + "epoch": 1.380549557272122, + "grad_norm": 3.9344328683910135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284660 + }, + { + "epoch": 1.3805980554649582, + "grad_norm": 3.5822148447550717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284670 + }, + { + "epoch": 1.3806465536577943, + "grad_norm": 3.4801615811375086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284680 + }, + { + "epoch": 1.3806950518506305, + "grad_norm": 3.478855887806276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284690 + }, + { + "epoch": 1.3807435500434666, + "grad_norm": 3.473013521215762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284700 + }, + { + "epoch": 1.3807920482363025, + "grad_norm": 3.404094570669258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284710 + }, + { + "epoch": 1.3808405464291387, + "grad_norm": 3.5568581324696424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284720 + }, + { + "epoch": 1.3808890446219748, + "grad_norm": 3.483869193132705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284730 + }, + { + "epoch": 1.3809375428148107, + "grad_norm": 3.271119908276887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284740 + }, + { + "epoch": 1.3809860410076469, + "grad_norm": 3.3627148354753444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284750 + }, + { + "epoch": 1.381034539200483, + "grad_norm": 3.463478321918956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284760 + }, + { + "epoch": 1.3810830373933192, + "grad_norm": 3.246553887947812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284770 + }, + { + "epoch": 1.3811315355861553, + "grad_norm": 3.409628561712452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284780 + }, + { + "epoch": 1.3811800337789912, + "grad_norm": 3.411733473512868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284790 + }, + { + "epoch": 1.3812285319718274, + "grad_norm": 3.2180906828216393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284800 + }, + { + "epoch": 1.3812770301646635, + "grad_norm": 3.3520245779072866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284810 + }, + { + "epoch": 1.3813255283574994, + "grad_norm": 3.163764574765082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284820 + }, + { + "epoch": 1.3813740265503356, + "grad_norm": 3.456187300798774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284830 + }, + { + "epoch": 1.3814225247431717, + "grad_norm": 3.22002620123385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284840 + }, + { + "epoch": 1.3814710229360079, + "grad_norm": 3.280285341134004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284850 + }, + { + "epoch": 1.381519521128844, + "grad_norm": 3.222114628442796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284860 + }, + { + "epoch": 1.38156801932168, + "grad_norm": 3.1941897304932354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284870 + }, + { + "epoch": 1.381616517514516, + "grad_norm": 3.1607771688868525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284880 + }, + { + "epoch": 1.3816650157073522, + "grad_norm": 3.0997645694696985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284890 + }, + { + "epoch": 1.3817135139001884, + "grad_norm": 3.534361781021289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284900 + }, + { + "epoch": 1.3817620120930245, + "grad_norm": 3.5293069799990917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284910 + }, + { + "epoch": 1.3818105102858604, + "grad_norm": 3.424780743443989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284920 + }, + { + "epoch": 1.3818590084786966, + "grad_norm": 2.9241871857266233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284930 + }, + { + "epoch": 1.3819075066715327, + "grad_norm": 3.1364874075734406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284940 + }, + { + "epoch": 1.3819560048643686, + "grad_norm": 3.1418903745361604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284950 + }, + { + "epoch": 1.3820045030572048, + "grad_norm": 3.129499361875787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284960 + }, + { + "epoch": 1.382053001250041, + "grad_norm": 3.1478637652071484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284970 + }, + { + "epoch": 1.382101499442877, + "grad_norm": 3.018854215497413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284980 + }, + { + "epoch": 1.3821499976357132, + "grad_norm": 3.076232530929701e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 284990 + }, + { + "epoch": 1.3821984958285491, + "grad_norm": 3.120422320534999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285000 + }, + { + "epoch": 1.3822469940213853, + "grad_norm": 2.9377960686360893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285010 + }, + { + "epoch": 1.3822954922142214, + "grad_norm": 3.062767461869953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285020 + }, + { + "epoch": 1.3823439904070574, + "grad_norm": 2.889134691486106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285030 + }, + { + "epoch": 1.3823924885998935, + "grad_norm": 3.080811836753128e-07, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 285040 + }, + { + "epoch": 1.3824409867927296, + "grad_norm": 4.403966613608645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285050 + }, + { + "epoch": 1.3824894849855658, + "grad_norm": 1.9819834051304497e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285060 + }, + { + "epoch": 1.382537983178402, + "grad_norm": 1.8507951608626172e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285070 + }, + { + "epoch": 1.3825864813712379, + "grad_norm": 4.538318989943946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285080 + }, + { + "epoch": 1.382634979564074, + "grad_norm": 2.1785410808661254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285090 + }, + { + "epoch": 1.3826834777569101, + "grad_norm": 7.149189400479372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285100 + }, + { + "epoch": 1.382731975949746, + "grad_norm": 7.466807119271834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285110 + }, + { + "epoch": 1.3827804741425822, + "grad_norm": 6.91890420512209e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285120 + }, + { + "epoch": 1.3828289723354183, + "grad_norm": 7.997400643944275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285130 + }, + { + "epoch": 1.3828774705282545, + "grad_norm": 4.3603236008493695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285140 + }, + { + "epoch": 1.3829259687210906, + "grad_norm": 5.815968506794889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285150 + }, + { + "epoch": 1.3829744669139266, + "grad_norm": 6.201090627655503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285160 + }, + { + "epoch": 1.3830229651067627, + "grad_norm": 6.338834737107391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285170 + }, + { + "epoch": 1.3830714632995988, + "grad_norm": 5.163313971934258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285180 + }, + { + "epoch": 1.3831199614924348, + "grad_norm": 5.798884217256273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285190 + }, + { + "epoch": 1.383168459685271, + "grad_norm": 5.784550012322143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285200 + }, + { + "epoch": 1.383216957878107, + "grad_norm": 4.961481181453564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285210 + }, + { + "epoch": 1.3832654560709432, + "grad_norm": 1.1283841558906715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285220 + }, + { + "epoch": 1.3833139542637793, + "grad_norm": 5.001102749702113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285230 + }, + { + "epoch": 1.3833624524566153, + "grad_norm": 4.4661605897999834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285240 + }, + { + "epoch": 1.3834109506494514, + "grad_norm": 4.818590468858019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285250 + }, + { + "epoch": 1.3834594488422876, + "grad_norm": 4.5944941007292073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285260 + }, + { + "epoch": 1.3835079470351235, + "grad_norm": 1.1695804005285027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285270 + }, + { + "epoch": 1.3835564452279596, + "grad_norm": 5.613170515061938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285280 + }, + { + "epoch": 1.3836049434207958, + "grad_norm": 4.827686552744126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285290 + }, + { + "epoch": 1.383653441613632, + "grad_norm": 5.755546226282604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285300 + }, + { + "epoch": 1.383701939806468, + "grad_norm": 4.3202842903156125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285310 + }, + { + "epoch": 1.383750437999304, + "grad_norm": 5.341628366295481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285320 + }, + { + "epoch": 1.3837989361921401, + "grad_norm": 4.893609002465382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285330 + }, + { + "epoch": 1.3838474343849763, + "grad_norm": 4.5469869291991927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285340 + }, + { + "epoch": 1.3838959325778122, + "grad_norm": 4.0334347772841284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285350 + }, + { + "epoch": 1.3839444307706483, + "grad_norm": 4.7366859234898584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285360 + }, + { + "epoch": 1.3839929289634845, + "grad_norm": 4.106603057607572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285370 + }, + { + "epoch": 1.3840414271563206, + "grad_norm": 6.644734185101697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285380 + }, + { + "epoch": 1.3840899253491568, + "grad_norm": 4.244613478476822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285390 + }, + { + "epoch": 1.3841384235419927, + "grad_norm": 4.029761839774437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285400 + }, + { + "epoch": 1.3841869217348288, + "grad_norm": 4.5534002879321633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285410 + }, + { + "epoch": 1.384235419927665, + "grad_norm": 5.457497991301352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285420 + }, + { + "epoch": 1.3842839181205011, + "grad_norm": 3.7974487554492953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285430 + }, + { + "epoch": 1.3843324163133373, + "grad_norm": 3.7946790598653024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285440 + }, + { + "epoch": 1.3843809145061732, + "grad_norm": 3.961721120049333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285450 + }, + { + "epoch": 1.3844294126990093, + "grad_norm": 5.127354256728722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285460 + }, + { + "epoch": 1.3844779108918455, + "grad_norm": 3.8484432707264205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285470 + }, + { + "epoch": 1.3845264090846814, + "grad_norm": 4.622663141162775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285480 + }, + { + "epoch": 1.3845749072775175, + "grad_norm": 5.214378688833676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285490 + }, + { + "epoch": 1.3846234054703537, + "grad_norm": 4.264474569026788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285500 + }, + { + "epoch": 1.3846719036631898, + "grad_norm": 4.1920350213331403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285510 + }, + { + "epoch": 1.384720401856026, + "grad_norm": 4.408900338148669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285520 + }, + { + "epoch": 1.3847689000488619, + "grad_norm": 3.9531278162030503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285530 + }, + { + "epoch": 1.384817398241698, + "grad_norm": 4.5314158114706515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285540 + }, + { + "epoch": 1.3848658964345342, + "grad_norm": 4.110501095055952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285550 + }, + { + "epoch": 1.38491439462737, + "grad_norm": 4.1144681972582475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285560 + }, + { + "epoch": 1.3849628928202062, + "grad_norm": 3.790101743561536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285570 + }, + { + "epoch": 1.3850113910130424, + "grad_norm": 3.7632958083122503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285580 + }, + { + "epoch": 1.3850598892058785, + "grad_norm": 4.2183310711152444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285590 + }, + { + "epoch": 1.3851083873987147, + "grad_norm": 3.597808415634063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285600 + }, + { + "epoch": 1.3851568855915506, + "grad_norm": 3.482986130620702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285610 + }, + { + "epoch": 1.3852053837843867, + "grad_norm": 3.4937627901854285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285620 + }, + { + "epoch": 1.3852538819772229, + "grad_norm": 3.507190911022917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285630 + }, + { + "epoch": 1.3853023801700588, + "grad_norm": 3.433203232816595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285640 + }, + { + "epoch": 1.385350878362895, + "grad_norm": 3.5233867379247386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285650 + }, + { + "epoch": 1.385399376555731, + "grad_norm": 3.3002606869558804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285660 + }, + { + "epoch": 1.3854478747485672, + "grad_norm": 3.6108048107053037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285670 + }, + { + "epoch": 1.3854963729414034, + "grad_norm": 3.00531155517092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285680 + }, + { + "epoch": 1.3855448711342393, + "grad_norm": 3.17380767000941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285690 + }, + { + "epoch": 1.3855933693270754, + "grad_norm": 3.229767173706932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285700 + }, + { + "epoch": 1.3856418675199116, + "grad_norm": 3.184104855336045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285710 + }, + { + "epoch": 1.3856903657127475, + "grad_norm": 3.233668905977538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285720 + }, + { + "epoch": 1.3857388639055837, + "grad_norm": 3.0869102829456097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285730 + }, + { + "epoch": 1.3857873620984198, + "grad_norm": 3.0488524771499215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285740 + }, + { + "epoch": 1.385835860291256, + "grad_norm": 3.4589388064887316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285750 + }, + { + "epoch": 1.385884358484092, + "grad_norm": 2.920948816154123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285760 + }, + { + "epoch": 1.385932856676928, + "grad_norm": 3.406036910291732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285770 + }, + { + "epoch": 1.3859813548697641, + "grad_norm": 3.141758213587309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285780 + }, + { + "epoch": 1.3860298530626003, + "grad_norm": 2.939717944627773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285790 + }, + { + "epoch": 1.3860783512554362, + "grad_norm": 4.230572358210338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285800 + }, + { + "epoch": 1.3861268494482724, + "grad_norm": 2.8603514579117473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285810 + }, + { + "epoch": 1.3861753476411085, + "grad_norm": 2.905397309405089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285820 + }, + { + "epoch": 1.3862238458339446, + "grad_norm": 2.811060824114975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285830 + }, + { + "epoch": 1.3862723440267808, + "grad_norm": 2.6459429136593826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285840 + }, + { + "epoch": 1.3863208422196167, + "grad_norm": 2.9670835033357434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285850 + }, + { + "epoch": 1.3863693404124529, + "grad_norm": 2.948248720713309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285860 + }, + { + "epoch": 1.386417838605289, + "grad_norm": 2.6639796146810113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285870 + }, + { + "epoch": 1.3864663367981251, + "grad_norm": 2.667075307272171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285880 + }, + { + "epoch": 1.3865148349909613, + "grad_norm": 2.5543474180267367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285890 + }, + { + "epoch": 1.3865633331837972, + "grad_norm": 2.781474393032113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285900 + }, + { + "epoch": 1.3866118313766334, + "grad_norm": 2.925408750797942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285910 + }, + { + "epoch": 1.3866603295694695, + "grad_norm": 2.553699118834629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285920 + }, + { + "epoch": 1.3867088277623054, + "grad_norm": 2.683366915334773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285930 + }, + { + "epoch": 1.3867573259551416, + "grad_norm": 2.4658814368194726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285940 + }, + { + "epoch": 1.3868058241479777, + "grad_norm": 2.6437675160195795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285950 + }, + { + "epoch": 1.3868543223408138, + "grad_norm": 2.47106697770505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285960 + }, + { + "epoch": 1.38690282053365, + "grad_norm": 2.7667491053762205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285970 + }, + { + "epoch": 1.386951318726486, + "grad_norm": 1.029897475746111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285980 + }, + { + "epoch": 1.386999816919322, + "grad_norm": 2.4689913402653474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 285990 + }, + { + "epoch": 1.3870483151121582, + "grad_norm": 2.489257440174697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286000 + }, + { + "epoch": 1.3870968133049941, + "grad_norm": 2.464950910052721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286010 + }, + { + "epoch": 1.3871453114978303, + "grad_norm": 0.0001188807946164161, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286020 + }, + { + "epoch": 1.3871938096906664, + "grad_norm": 2.4303113832502277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286030 + }, + { + "epoch": 1.3872423078835026, + "grad_norm": 2.2496932672311232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286040 + }, + { + "epoch": 1.3872908060763387, + "grad_norm": 2.4022520506150613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286050 + }, + { + "epoch": 1.3873393042691746, + "grad_norm": 2.3595576692514442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286060 + }, + { + "epoch": 1.3873878024620108, + "grad_norm": 2.3668974336032989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286070 + }, + { + "epoch": 1.387436300654847, + "grad_norm": 2.245206047746251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286080 + }, + { + "epoch": 1.3874847988476828, + "grad_norm": 2.2256979548274103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286090 + }, + { + "epoch": 1.387533297040519, + "grad_norm": 2.454158334330714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286100 + }, + { + "epoch": 1.3875817952333551, + "grad_norm": 2.281385604874231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286110 + }, + { + "epoch": 1.3876302934261913, + "grad_norm": 2.312315530161868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286120 + }, + { + "epoch": 1.3876787916190274, + "grad_norm": 2.2129870558273979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286130 + }, + { + "epoch": 1.3877272898118633, + "grad_norm": 2.2006555866482813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286140 + }, + { + "epoch": 1.3877757880046995, + "grad_norm": 2.2452395853633789e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286150 + }, + { + "epoch": 1.3878242861975356, + "grad_norm": 2.1575027631115518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286160 + }, + { + "epoch": 1.3878727843903715, + "grad_norm": 2.358955413228614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286170 + }, + { + "epoch": 1.3879212825832077, + "grad_norm": 2.2585236081340554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286180 + }, + { + "epoch": 1.3879697807760438, + "grad_norm": 2.0953352475316933e-07, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 286190 + }, + { + "epoch": 1.38801827896888, + "grad_norm": 7.50796573356638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286200 + }, + { + "epoch": 1.3880667771617161, + "grad_norm": 2.8440257665351965e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286210 + }, + { + "epoch": 1.388115275354552, + "grad_norm": 2.302737357240403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286220 + }, + { + "epoch": 1.3881637735473882, + "grad_norm": 1.6211656657105777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286230 + }, + { + "epoch": 1.3882122717402243, + "grad_norm": 2.6679260827222606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286240 + }, + { + "epoch": 1.3882607699330602, + "grad_norm": 1.284970835513377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286250 + }, + { + "epoch": 1.3883092681258964, + "grad_norm": 9.62708554652636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286260 + }, + { + "epoch": 1.3883577663187325, + "grad_norm": 3.596283931983635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286270 + }, + { + "epoch": 1.3884062645115687, + "grad_norm": 0.00037159016937948763, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286280 + }, + { + "epoch": 1.3884547627044048, + "grad_norm": 0.00038452306762337685, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 286290 + }, + { + "epoch": 1.3885032608972407, + "grad_norm": 0.00045191662502475083, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286300 + }, + { + "epoch": 1.3885517590900769, + "grad_norm": 1.7533111531520262e-05, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 286310 + }, + { + "epoch": 1.388600257282913, + "grad_norm": 2.372263406869024e-05, + "learning_rate": 0.0002, + "loss": 0.0028, + "step": 286320 + }, + { + "epoch": 1.388648755475749, + "grad_norm": 0.00012517724826466292, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286330 + }, + { + "epoch": 1.388697253668585, + "grad_norm": 5.7355911849299446e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286340 + }, + { + "epoch": 1.3887457518614212, + "grad_norm": 9.134390711551532e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286350 + }, + { + "epoch": 1.3887942500542574, + "grad_norm": 1.8516617274144664e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286360 + }, + { + "epoch": 1.3888427482470935, + "grad_norm": 2.5474600988673046e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286370 + }, + { + "epoch": 1.3888912464399294, + "grad_norm": 9.344988939119503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286380 + }, + { + "epoch": 1.3889397446327656, + "grad_norm": 5.812190011056373e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286390 + }, + { + "epoch": 1.3889882428256017, + "grad_norm": 7.185159574873978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286400 + }, + { + "epoch": 1.3890367410184379, + "grad_norm": 6.662609848717693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286410 + }, + { + "epoch": 1.389085239211274, + "grad_norm": 5.397920176619664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286420 + }, + { + "epoch": 1.38913373740411, + "grad_norm": 4.933926447847625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286430 + }, + { + "epoch": 1.389182235596946, + "grad_norm": 3.886646481987555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286440 + }, + { + "epoch": 1.3892307337897822, + "grad_norm": 4.235128926666221e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286450 + }, + { + "epoch": 1.3892792319826182, + "grad_norm": 4.16367947764229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286460 + }, + { + "epoch": 1.3893277301754543, + "grad_norm": 4.090724360139575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286470 + }, + { + "epoch": 1.3893762283682904, + "grad_norm": 4.121794518141542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286480 + }, + { + "epoch": 1.3894247265611266, + "grad_norm": 3.163950850648689e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286490 + }, + { + "epoch": 1.3894732247539627, + "grad_norm": 3.695652821988915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286500 + }, + { + "epoch": 1.3895217229467987, + "grad_norm": 3.299705440440448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286510 + }, + { + "epoch": 1.3895702211396348, + "grad_norm": 3.1857409794611158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286520 + }, + { + "epoch": 1.389618719332471, + "grad_norm": 2.5994952466135146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286530 + }, + { + "epoch": 1.3896672175253069, + "grad_norm": 2.24040240937029e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286540 + }, + { + "epoch": 1.389715715718143, + "grad_norm": 2.7323349058860913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286550 + }, + { + "epoch": 1.3897642139109792, + "grad_norm": 2.5635442852944834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286560 + }, + { + "epoch": 1.3898127121038153, + "grad_norm": 2.868691126423073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286570 + }, + { + "epoch": 1.3898612102966514, + "grad_norm": 3.4129707273677923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286580 + }, + { + "epoch": 1.3899097084894874, + "grad_norm": 2.331928044441156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286590 + }, + { + "epoch": 1.3899582066823235, + "grad_norm": 2.0576380848069675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286600 + }, + { + "epoch": 1.3900067048751596, + "grad_norm": 2.0594864054146456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286610 + }, + { + "epoch": 1.3900552030679956, + "grad_norm": 1.8187341765951714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286620 + }, + { + "epoch": 1.3901037012608317, + "grad_norm": 2.0832937934756046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286630 + }, + { + "epoch": 1.3901521994536679, + "grad_norm": 1.5390243106594426e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286640 + }, + { + "epoch": 1.390200697646504, + "grad_norm": 1.768348283803789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286650 + }, + { + "epoch": 1.3902491958393401, + "grad_norm": 1.8047153389488813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286660 + }, + { + "epoch": 1.390297694032176, + "grad_norm": 1.7351308088109363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286670 + }, + { + "epoch": 1.3903461922250122, + "grad_norm": 1.6182887065951945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286680 + }, + { + "epoch": 1.3903946904178484, + "grad_norm": 2.0168333776382497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286690 + }, + { + "epoch": 1.3904431886106843, + "grad_norm": 2.117267285939306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286700 + }, + { + "epoch": 1.3904916868035204, + "grad_norm": 1.5476723547180882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286710 + }, + { + "epoch": 1.3905401849963566, + "grad_norm": 1.4761062629986554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286720 + }, + { + "epoch": 1.3905886831891927, + "grad_norm": 1.6295284694933798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286730 + }, + { + "epoch": 1.3906371813820289, + "grad_norm": 3.0893925213604234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286740 + }, + { + "epoch": 1.3906856795748648, + "grad_norm": 1.7592492440599017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286750 + }, + { + "epoch": 1.390734177767701, + "grad_norm": 3.338041096867528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286760 + }, + { + "epoch": 1.390782675960537, + "grad_norm": 0.8891568779945374, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 286770 + }, + { + "epoch": 1.390831174153373, + "grad_norm": 1.1858118341478985e-05, + "learning_rate": 0.0002, + "loss": 0.0025, + "step": 286780 + }, + { + "epoch": 1.3908796723462091, + "grad_norm": 0.007687169127166271, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286790 + }, + { + "epoch": 1.3909281705390453, + "grad_norm": 0.0008470526081509888, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286800 + }, + { + "epoch": 1.3909766687318814, + "grad_norm": 5.924140350543894e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286810 + }, + { + "epoch": 1.3910251669247176, + "grad_norm": 2.8036056392011233e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286820 + }, + { + "epoch": 1.3910736651175535, + "grad_norm": 3.189979906892404e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286830 + }, + { + "epoch": 1.3911221633103896, + "grad_norm": 3.329033006593818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286840 + }, + { + "epoch": 1.3911706615032258, + "grad_norm": 1.060744580172468e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286850 + }, + { + "epoch": 1.3912191596960617, + "grad_norm": 0.0003018751449417323, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286860 + }, + { + "epoch": 1.3912676578888978, + "grad_norm": 9.26127268030541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286870 + }, + { + "epoch": 1.391316156081734, + "grad_norm": 7.235924385895487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286880 + }, + { + "epoch": 1.3913646542745701, + "grad_norm": 0.0002907185989897698, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286890 + }, + { + "epoch": 1.3914131524674063, + "grad_norm": 0.00014062138507142663, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286900 + }, + { + "epoch": 1.3914616506602422, + "grad_norm": 5.563686499954201e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286910 + }, + { + "epoch": 1.3915101488530783, + "grad_norm": 3.975228173658252e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286920 + }, + { + "epoch": 1.3915586470459145, + "grad_norm": 2.580958607723005e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286930 + }, + { + "epoch": 1.3916071452387506, + "grad_norm": 1.9979510398115963e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286940 + }, + { + "epoch": 1.3916556434315868, + "grad_norm": 1.2511702152551152e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286950 + }, + { + "epoch": 1.3917041416244227, + "grad_norm": 8.695078577147797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286960 + }, + { + "epoch": 1.3917526398172588, + "grad_norm": 9.028911335917655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286970 + }, + { + "epoch": 1.391801138010095, + "grad_norm": 1.435579815733945e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286980 + }, + { + "epoch": 1.391849636202931, + "grad_norm": 8.085302397375926e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 286990 + }, + { + "epoch": 1.391898134395767, + "grad_norm": 6.979865702305688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287000 + }, + { + "epoch": 1.3919466325886032, + "grad_norm": 5.827251243317733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287010 + }, + { + "epoch": 1.3919951307814393, + "grad_norm": 5.474482804856962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287020 + }, + { + "epoch": 1.3920436289742755, + "grad_norm": 5.664355740009341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287030 + }, + { + "epoch": 1.3920921271671114, + "grad_norm": 6.738061529176775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287040 + }, + { + "epoch": 1.3921406253599475, + "grad_norm": 4.2868100535997655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287050 + }, + { + "epoch": 1.3921891235527837, + "grad_norm": 4.858827651332831e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287060 + }, + { + "epoch": 1.3922376217456196, + "grad_norm": 4.365769655123586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287070 + }, + { + "epoch": 1.3922861199384557, + "grad_norm": 3.81996551368502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287080 + }, + { + "epoch": 1.392334618131292, + "grad_norm": 4.594947768055135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287090 + }, + { + "epoch": 1.392383116324128, + "grad_norm": 4.1493235585221555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287100 + }, + { + "epoch": 1.3924316145169642, + "grad_norm": 7.608534815517487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287110 + }, + { + "epoch": 1.3924801127098, + "grad_norm": 3.236037628084887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287120 + }, + { + "epoch": 1.3925286109026362, + "grad_norm": 3.521589633237454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287130 + }, + { + "epoch": 1.3925771090954724, + "grad_norm": 7.561957318102941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287140 + }, + { + "epoch": 1.3926256072883083, + "grad_norm": 2.7618571039056405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287150 + }, + { + "epoch": 1.3926741054811445, + "grad_norm": 2.7194116682949243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287160 + }, + { + "epoch": 1.3927226036739806, + "grad_norm": 2.680348870853777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287170 + }, + { + "epoch": 1.3927711018668167, + "grad_norm": 2.5568201635906007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287180 + }, + { + "epoch": 1.3928196000596529, + "grad_norm": 4.964188974554418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287190 + }, + { + "epoch": 1.3928680982524888, + "grad_norm": 2.7058586056227796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287200 + }, + { + "epoch": 1.392916596445325, + "grad_norm": 3.294878297310788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287210 + }, + { + "epoch": 1.392965094638161, + "grad_norm": 2.315387746421038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287220 + }, + { + "epoch": 1.393013592830997, + "grad_norm": 2.4117741759255296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287230 + }, + { + "epoch": 1.3930620910238332, + "grad_norm": 3.033486564163468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287240 + }, + { + "epoch": 1.3931105892166693, + "grad_norm": 2.0818815755774267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287250 + }, + { + "epoch": 1.3931590874095054, + "grad_norm": 2.2508604615723016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287260 + }, + { + "epoch": 1.3932075856023416, + "grad_norm": 2.6027316835097736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287270 + }, + { + "epoch": 1.3932560837951775, + "grad_norm": 1.638672870285518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287280 + }, + { + "epoch": 1.3933045819880137, + "grad_norm": 2.4817286430334207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287290 + }, + { + "epoch": 1.3933530801808498, + "grad_norm": 1.5014973541838117e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287300 + }, + { + "epoch": 1.3934015783736857, + "grad_norm": 2.159387804567814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287310 + }, + { + "epoch": 1.3934500765665219, + "grad_norm": 2.1856885723536834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287320 + }, + { + "epoch": 1.393498574759358, + "grad_norm": 1.9071794667979702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287330 + }, + { + "epoch": 1.3935470729521942, + "grad_norm": 1.782907020242419e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287340 + }, + { + "epoch": 1.3935955711450303, + "grad_norm": 2.6235898076265585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287350 + }, + { + "epoch": 1.3936440693378662, + "grad_norm": 1.831458689594001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287360 + }, + { + "epoch": 1.3936925675307024, + "grad_norm": 1.6018916539906058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287370 + }, + { + "epoch": 1.3937410657235385, + "grad_norm": 1.418739088876464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287380 + }, + { + "epoch": 1.3937895639163744, + "grad_norm": 1.483417690906208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287390 + }, + { + "epoch": 1.3938380621092106, + "grad_norm": 1.3918465811002534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287400 + }, + { + "epoch": 1.3938865603020467, + "grad_norm": 1.5218199678201927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287410 + }, + { + "epoch": 1.3939350584948829, + "grad_norm": 1.3985799114379915e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287420 + }, + { + "epoch": 1.393983556687719, + "grad_norm": 1.3250239589979174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287430 + }, + { + "epoch": 1.394032054880555, + "grad_norm": 1.8517100670578657e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287440 + }, + { + "epoch": 1.394080553073391, + "grad_norm": 1.2979810435354011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287450 + }, + { + "epoch": 1.3941290512662272, + "grad_norm": 1.371380676573608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287460 + }, + { + "epoch": 1.3941775494590634, + "grad_norm": 1.3539747669710778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287470 + }, + { + "epoch": 1.3942260476518995, + "grad_norm": 1.2591127642735955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287480 + }, + { + "epoch": 1.3942745458447354, + "grad_norm": 1.577917601025547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287490 + }, + { + "epoch": 1.3943230440375716, + "grad_norm": 1.1431875464040786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287500 + }, + { + "epoch": 1.3943715422304077, + "grad_norm": 1.191911792375322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287510 + }, + { + "epoch": 1.3944200404232436, + "grad_norm": 1.1176654197697644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287520 + }, + { + "epoch": 1.3944685386160798, + "grad_norm": 1.2331960306255496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287530 + }, + { + "epoch": 1.394517036808916, + "grad_norm": 1.7573686363903107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287540 + }, + { + "epoch": 1.394565535001752, + "grad_norm": 1.7799110310079413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287550 + }, + { + "epoch": 1.3946140331945882, + "grad_norm": 1.088004864868708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287560 + }, + { + "epoch": 1.3946625313874241, + "grad_norm": 1.0779375543279457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287570 + }, + { + "epoch": 1.3947110295802603, + "grad_norm": 1.0549053968134103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287580 + }, + { + "epoch": 1.3947595277730964, + "grad_norm": 1.0598217841106816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287590 + }, + { + "epoch": 1.3948080259659323, + "grad_norm": 1.1676172562147258e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287600 + }, + { + "epoch": 1.3948565241587685, + "grad_norm": 1.3085838190818322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287610 + }, + { + "epoch": 1.3949050223516046, + "grad_norm": 9.370126008434454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287620 + }, + { + "epoch": 1.3949535205444408, + "grad_norm": 1.1716421113305842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287630 + }, + { + "epoch": 1.395002018737277, + "grad_norm": 1.5382844367195503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287640 + }, + { + "epoch": 1.3950505169301128, + "grad_norm": 1.1536203601281159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287650 + }, + { + "epoch": 1.395099015122949, + "grad_norm": 1.0591826367090107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287660 + }, + { + "epoch": 1.3951475133157851, + "grad_norm": 9.505488378636073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287670 + }, + { + "epoch": 1.395196011508621, + "grad_norm": 8.26599489300861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287680 + }, + { + "epoch": 1.3952445097014572, + "grad_norm": 9.112547445511154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287690 + }, + { + "epoch": 1.3952930078942933, + "grad_norm": 8.698169153831259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287700 + }, + { + "epoch": 1.3953415060871295, + "grad_norm": 9.735713319969364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287710 + }, + { + "epoch": 1.3953900042799656, + "grad_norm": 1.0451769867358962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287720 + }, + { + "epoch": 1.3954385024728015, + "grad_norm": 1.0217753469987656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287730 + }, + { + "epoch": 1.3954870006656377, + "grad_norm": 8.731847174203722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287740 + }, + { + "epoch": 1.3955354988584738, + "grad_norm": 1.110950165639224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287750 + }, + { + "epoch": 1.3955839970513098, + "grad_norm": 9.904855460263207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287760 + }, + { + "epoch": 1.395632495244146, + "grad_norm": 7.599468858643377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287770 + }, + { + "epoch": 1.395680993436982, + "grad_norm": 8.283250849672186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287780 + }, + { + "epoch": 1.3957294916298182, + "grad_norm": 1.224835500579502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287790 + }, + { + "epoch": 1.3957779898226543, + "grad_norm": 8.869105272424349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287800 + }, + { + "epoch": 1.3958264880154903, + "grad_norm": 9.365028290631017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287810 + }, + { + "epoch": 1.3958749862083264, + "grad_norm": 7.935052508400986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287820 + }, + { + "epoch": 1.3959234844011625, + "grad_norm": 8.199306762435299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287830 + }, + { + "epoch": 1.3959719825939985, + "grad_norm": 6.998217259024386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287840 + }, + { + "epoch": 1.3960204807868346, + "grad_norm": 7.73594024394697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287850 + }, + { + "epoch": 1.3960689789796707, + "grad_norm": 8.796316706138896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287860 + }, + { + "epoch": 1.396117477172507, + "grad_norm": 7.444619427587895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287870 + }, + { + "epoch": 1.396165975365343, + "grad_norm": 7.082966817506531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287880 + }, + { + "epoch": 1.396214473558179, + "grad_norm": 1.4070436691326904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287890 + }, + { + "epoch": 1.396262971751015, + "grad_norm": 7.27555175217276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287900 + }, + { + "epoch": 1.3963114699438512, + "grad_norm": 7.328774813686323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287910 + }, + { + "epoch": 1.3963599681366872, + "grad_norm": 8.199342005354993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287920 + }, + { + "epoch": 1.3964084663295235, + "grad_norm": 9.998383347920026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287930 + }, + { + "epoch": 1.3964569645223595, + "grad_norm": 1.051691583597858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287940 + }, + { + "epoch": 1.3965054627151956, + "grad_norm": 7.317816539398336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287950 + }, + { + "epoch": 1.3965539609080317, + "grad_norm": 6.998690764703497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287960 + }, + { + "epoch": 1.3966024591008677, + "grad_norm": 6.790767201891867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287970 + }, + { + "epoch": 1.3966509572937038, + "grad_norm": 7.236674832711287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287980 + }, + { + "epoch": 1.39669945548654, + "grad_norm": 6.889020482958585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 287990 + }, + { + "epoch": 1.396747953679376, + "grad_norm": 6.556026050930086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288000 + }, + { + "epoch": 1.3967964518722122, + "grad_norm": 7.272250286405324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288010 + }, + { + "epoch": 1.3968449500650482, + "grad_norm": 5.659538828695077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288020 + }, + { + "epoch": 1.3968934482578843, + "grad_norm": 5.915111387366778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288030 + }, + { + "epoch": 1.3969419464507205, + "grad_norm": 6.574192070729623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288040 + }, + { + "epoch": 1.3969904446435564, + "grad_norm": 6.006083594911615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288050 + }, + { + "epoch": 1.3970389428363925, + "grad_norm": 6.049019702913938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288060 + }, + { + "epoch": 1.3970874410292287, + "grad_norm": 5.973931251901377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288070 + }, + { + "epoch": 1.3971359392220648, + "grad_norm": 5.650935577250493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288080 + }, + { + "epoch": 1.397184437414901, + "grad_norm": 6.568495791725582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288090 + }, + { + "epoch": 1.3972329356077369, + "grad_norm": 5.095072879157669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288100 + }, + { + "epoch": 1.397281433800573, + "grad_norm": 7.062532176860259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288110 + }, + { + "epoch": 1.3973299319934092, + "grad_norm": 5.476662749970274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288120 + }, + { + "epoch": 1.397378430186245, + "grad_norm": 6.349565637719934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288130 + }, + { + "epoch": 1.3974269283790812, + "grad_norm": 4.5295686845747696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288140 + }, + { + "epoch": 1.3974754265719174, + "grad_norm": 5.143050429978757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288150 + }, + { + "epoch": 1.3975239247647535, + "grad_norm": 7.575430913675518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288160 + }, + { + "epoch": 1.3975724229575897, + "grad_norm": 5.473886517393112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288170 + }, + { + "epoch": 1.3976209211504256, + "grad_norm": 5.115421117807273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288180 + }, + { + "epoch": 1.3976694193432617, + "grad_norm": 6.053007268747024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288190 + }, + { + "epoch": 1.3977179175360979, + "grad_norm": 5.301429268911306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288200 + }, + { + "epoch": 1.3977664157289338, + "grad_norm": 5.90752279094886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288210 + }, + { + "epoch": 1.39781491392177, + "grad_norm": 5.15820431701286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288220 + }, + { + "epoch": 1.397863412114606, + "grad_norm": 5.441939947559149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288230 + }, + { + "epoch": 1.3979119103074422, + "grad_norm": 5.385343229136197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288240 + }, + { + "epoch": 1.3979604085002784, + "grad_norm": 5.060692274128087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288250 + }, + { + "epoch": 1.3980089066931143, + "grad_norm": 4.855923521063232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288260 + }, + { + "epoch": 1.3980574048859504, + "grad_norm": 7.599720106554742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288270 + }, + { + "epoch": 1.3981059030787866, + "grad_norm": 5.426375082606683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288280 + }, + { + "epoch": 1.3981544012716225, + "grad_norm": 4.1309598941552395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288290 + }, + { + "epoch": 1.3982028994644586, + "grad_norm": 4.854856001657026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288300 + }, + { + "epoch": 1.3982513976572948, + "grad_norm": 2.319668965355959e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288310 + }, + { + "epoch": 1.398299895850131, + "grad_norm": 4.7213544007718156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288320 + }, + { + "epoch": 1.398348394042967, + "grad_norm": 7.671721959923161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288330 + }, + { + "epoch": 1.398396892235803, + "grad_norm": 3.730063440343656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288340 + }, + { + "epoch": 1.3984453904286391, + "grad_norm": 4.48892222948416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288350 + }, + { + "epoch": 1.3984938886214753, + "grad_norm": 4.363440666566021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288360 + }, + { + "epoch": 1.3985423868143112, + "grad_norm": 4.935190531796252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288370 + }, + { + "epoch": 1.3985908850071473, + "grad_norm": 4.34144567407202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288380 + }, + { + "epoch": 1.3986393831999835, + "grad_norm": 4.0067175177682657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288390 + }, + { + "epoch": 1.3986878813928196, + "grad_norm": 5.017491844228061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288400 + }, + { + "epoch": 1.3987363795856558, + "grad_norm": 4.416293450049125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288410 + }, + { + "epoch": 1.3987848777784917, + "grad_norm": 5.109777703182772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288420 + }, + { + "epoch": 1.3988333759713278, + "grad_norm": 4.150776078404306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288430 + }, + { + "epoch": 1.398881874164164, + "grad_norm": 4.6527912900273805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288440 + }, + { + "epoch": 1.3989303723570001, + "grad_norm": 4.429746240930399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288450 + }, + { + "epoch": 1.3989788705498363, + "grad_norm": 4.1846271869872e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288460 + }, + { + "epoch": 1.3990273687426722, + "grad_norm": 4.13599707371759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288470 + }, + { + "epoch": 1.3990758669355083, + "grad_norm": 6.113024255682831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288480 + }, + { + "epoch": 1.3991243651283445, + "grad_norm": 3.966603401295288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288490 + }, + { + "epoch": 1.3991728633211804, + "grad_norm": 4.2379917886137264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288500 + }, + { + "epoch": 1.3992213615140165, + "grad_norm": 7.272565198945813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288510 + }, + { + "epoch": 1.3992698597068527, + "grad_norm": 3.939799171348568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288520 + }, + { + "epoch": 1.3993183578996888, + "grad_norm": 4.934917114951531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288530 + }, + { + "epoch": 1.399366856092525, + "grad_norm": 4.6926567165428423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288540 + }, + { + "epoch": 1.399415354285361, + "grad_norm": 4.3980438135804434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288550 + }, + { + "epoch": 1.399463852478197, + "grad_norm": 4.448811523616314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288560 + }, + { + "epoch": 1.3995123506710332, + "grad_norm": 4.367227575130528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288570 + }, + { + "epoch": 1.3995608488638691, + "grad_norm": 3.4117238101316616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288580 + }, + { + "epoch": 1.3996093470567053, + "grad_norm": 4.43091181523414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288590 + }, + { + "epoch": 1.3996578452495414, + "grad_norm": 4.152545898250537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288600 + }, + { + "epoch": 1.3997063434423775, + "grad_norm": 3.70014589634593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288610 + }, + { + "epoch": 1.3997548416352137, + "grad_norm": 4.647450282391219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288620 + }, + { + "epoch": 1.3998033398280496, + "grad_norm": 3.0966722874836705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288630 + }, + { + "epoch": 1.3998518380208858, + "grad_norm": 3.4345131894042424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288640 + }, + { + "epoch": 1.399900336213722, + "grad_norm": 3.590470498693321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288650 + }, + { + "epoch": 1.3999488344065578, + "grad_norm": 3.980596545716253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288660 + }, + { + "epoch": 1.399997332599394, + "grad_norm": 5.377548859542003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288670 + }, + { + "epoch": 1.40004583079223, + "grad_norm": 3.275393680723937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288680 + }, + { + "epoch": 1.4000943289850662, + "grad_norm": 3.8121496004350774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288690 + }, + { + "epoch": 1.4001428271779024, + "grad_norm": 3.5107109397358727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288700 + }, + { + "epoch": 1.4001913253707383, + "grad_norm": 3.4298727769055404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288710 + }, + { + "epoch": 1.4002398235635745, + "grad_norm": 3.4035124940601236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288720 + }, + { + "epoch": 1.4002883217564106, + "grad_norm": 3.3320216630272625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288730 + }, + { + "epoch": 1.4003368199492465, + "grad_norm": 3.2737807487137616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288740 + }, + { + "epoch": 1.4003853181420827, + "grad_norm": 3.1411445888807066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288750 + }, + { + "epoch": 1.4004338163349188, + "grad_norm": 3.356492186412652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288760 + }, + { + "epoch": 1.400482314527755, + "grad_norm": 3.075606969105138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288770 + }, + { + "epoch": 1.400530812720591, + "grad_norm": 3.202963227977307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288780 + }, + { + "epoch": 1.400579310913427, + "grad_norm": 3.0114773608147516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288790 + }, + { + "epoch": 1.4006278091062632, + "grad_norm": 3.0510642545777955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288800 + }, + { + "epoch": 1.4006763072990993, + "grad_norm": 4.834755031879467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288810 + }, + { + "epoch": 1.4007248054919352, + "grad_norm": 3.373865524736175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288820 + }, + { + "epoch": 1.4007733036847714, + "grad_norm": 3.61657470193677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288830 + }, + { + "epoch": 1.4008218018776075, + "grad_norm": 2.8689620990007825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288840 + }, + { + "epoch": 1.4008703000704437, + "grad_norm": 5.957348889751302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288850 + }, + { + "epoch": 1.4009187982632798, + "grad_norm": 3.288093921582913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288860 + }, + { + "epoch": 1.4009672964561157, + "grad_norm": 3.081820523220813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288870 + }, + { + "epoch": 1.4010157946489519, + "grad_norm": 2.952526472199679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288880 + }, + { + "epoch": 1.401064292841788, + "grad_norm": 3.2122017046276596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288890 + }, + { + "epoch": 1.401112791034624, + "grad_norm": 3.32861077367852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288900 + }, + { + "epoch": 1.40116128922746, + "grad_norm": 2.856112928384391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288910 + }, + { + "epoch": 1.4012097874202962, + "grad_norm": 3.0952250540394743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288920 + }, + { + "epoch": 1.4012582856131324, + "grad_norm": 2.958761626814521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288930 + }, + { + "epoch": 1.4013067838059685, + "grad_norm": 3.6244338730284653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288940 + }, + { + "epoch": 1.4013552819988044, + "grad_norm": 2.795264606447745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288950 + }, + { + "epoch": 1.4014037801916406, + "grad_norm": 9.679863978817593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288960 + }, + { + "epoch": 1.4014522783844767, + "grad_norm": 2.658767073171475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288970 + }, + { + "epoch": 1.4015007765773129, + "grad_norm": 2.757539050435298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288980 + }, + { + "epoch": 1.401549274770149, + "grad_norm": 3.5672894682647893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 288990 + }, + { + "epoch": 1.401597772962985, + "grad_norm": 3.360376581440505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289000 + }, + { + "epoch": 1.401646271155821, + "grad_norm": 2.84585894405609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289010 + }, + { + "epoch": 1.4016947693486572, + "grad_norm": 3.625627300607448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289020 + }, + { + "epoch": 1.4017432675414931, + "grad_norm": 2.728949937136349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289030 + }, + { + "epoch": 1.4017917657343293, + "grad_norm": 2.4783423668850446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289040 + }, + { + "epoch": 1.4018402639271654, + "grad_norm": 3.3253797937504714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289050 + }, + { + "epoch": 1.4018887621200016, + "grad_norm": 2.886505114929605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289060 + }, + { + "epoch": 1.4019372603128377, + "grad_norm": 4.0901278453020495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289070 + }, + { + "epoch": 1.4019857585056736, + "grad_norm": 2.857300103187299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289080 + }, + { + "epoch": 1.4020342566985098, + "grad_norm": 3.2386930115535506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289090 + }, + { + "epoch": 1.402082754891346, + "grad_norm": 2.693221006211388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289100 + }, + { + "epoch": 1.4021312530841818, + "grad_norm": 2.299001948813384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289110 + }, + { + "epoch": 1.402179751277018, + "grad_norm": 2.713424862577085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289120 + }, + { + "epoch": 1.4022282494698541, + "grad_norm": 2.3884197730694723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289130 + }, + { + "epoch": 1.4022767476626903, + "grad_norm": 3.082783450736315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289140 + }, + { + "epoch": 1.4023252458555264, + "grad_norm": 3.1075029482963146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289150 + }, + { + "epoch": 1.4023737440483623, + "grad_norm": 2.525041509215953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289160 + }, + { + "epoch": 1.4024222422411985, + "grad_norm": 3.4735273857222637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289170 + }, + { + "epoch": 1.4024707404340346, + "grad_norm": 2.7047903472521284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289180 + }, + { + "epoch": 1.4025192386268706, + "grad_norm": 3.0893741609361314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289190 + }, + { + "epoch": 1.4025677368197067, + "grad_norm": 2.474737925695081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289200 + }, + { + "epoch": 1.4026162350125428, + "grad_norm": 2.538914145588933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289210 + }, + { + "epoch": 1.402664733205379, + "grad_norm": 3.040313458768651e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289220 + }, + { + "epoch": 1.4027132313982151, + "grad_norm": 2.465895931891282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289230 + }, + { + "epoch": 1.402761729591051, + "grad_norm": 4.173052445821668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289240 + }, + { + "epoch": 1.4028102277838872, + "grad_norm": 2.844839173121727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289250 + }, + { + "epoch": 1.4028587259767233, + "grad_norm": 2.3729296572128078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289260 + }, + { + "epoch": 1.4029072241695593, + "grad_norm": 2.3618279954007448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289270 + }, + { + "epoch": 1.4029557223623954, + "grad_norm": 2.507721887923253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289280 + }, + { + "epoch": 1.4030042205552316, + "grad_norm": 2.5664584768492205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289290 + }, + { + "epoch": 1.4030527187480677, + "grad_norm": 2.404351562290685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289300 + }, + { + "epoch": 1.4031012169409038, + "grad_norm": 1.2218038136779796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289310 + }, + { + "epoch": 1.4031497151337398, + "grad_norm": 2.485547270225652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289320 + }, + { + "epoch": 1.403198213326576, + "grad_norm": 2.477288489899365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289330 + }, + { + "epoch": 1.403246711519412, + "grad_norm": 2.6158878085880133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289340 + }, + { + "epoch": 1.403295209712248, + "grad_norm": 2.2137427890811523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289350 + }, + { + "epoch": 1.4033437079050841, + "grad_norm": 2.2585807357700105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289360 + }, + { + "epoch": 1.4033922060979203, + "grad_norm": 7.124819489945367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289370 + }, + { + "epoch": 1.4034407042907564, + "grad_norm": 2.3476142985145998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289380 + }, + { + "epoch": 1.4034892024835925, + "grad_norm": 2.3609881338870764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289390 + }, + { + "epoch": 1.4035377006764285, + "grad_norm": 2.0082808305232902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289400 + }, + { + "epoch": 1.4035861988692646, + "grad_norm": 3.3847547342702455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289410 + }, + { + "epoch": 1.4036346970621008, + "grad_norm": 2.2081624706515868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289420 + }, + { + "epoch": 1.4036831952549367, + "grad_norm": 2.0569177650031634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289430 + }, + { + "epoch": 1.4037316934477728, + "grad_norm": 1.9058153100104391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289440 + }, + { + "epoch": 1.403780191640609, + "grad_norm": 2.0562067959417618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289450 + }, + { + "epoch": 1.403828689833445, + "grad_norm": 3.0433270126195566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289460 + }, + { + "epoch": 1.4038771880262813, + "grad_norm": 2.1982300779654906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289470 + }, + { + "epoch": 1.4039256862191172, + "grad_norm": 2.022798071266152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289480 + }, + { + "epoch": 1.4039741844119533, + "grad_norm": 2.10629650609917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289490 + }, + { + "epoch": 1.4040226826047895, + "grad_norm": 2.1821232110141864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289500 + }, + { + "epoch": 1.4040711807976256, + "grad_norm": 2.2442864633376303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289510 + }, + { + "epoch": 1.4041196789904618, + "grad_norm": 2.7512368205862003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289520 + }, + { + "epoch": 1.4041681771832977, + "grad_norm": 1.8422295511300035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289530 + }, + { + "epoch": 1.4042166753761338, + "grad_norm": 1.995858127656902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289540 + }, + { + "epoch": 1.40426517356897, + "grad_norm": 1.8813950930507417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289550 + }, + { + "epoch": 1.4043136717618059, + "grad_norm": 2.3589646502841788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289560 + }, + { + "epoch": 1.404362169954642, + "grad_norm": 2.006183876801515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289570 + }, + { + "epoch": 1.4044106681474782, + "grad_norm": 1.9769720438489458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289580 + }, + { + "epoch": 1.4044591663403143, + "grad_norm": 2.3737230492315575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289590 + }, + { + "epoch": 1.4045076645331505, + "grad_norm": 1.8892723119279253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289600 + }, + { + "epoch": 1.4045561627259864, + "grad_norm": 1.915091161208693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289610 + }, + { + "epoch": 1.4046046609188225, + "grad_norm": 1.9316580335271283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289620 + }, + { + "epoch": 1.4046531591116587, + "grad_norm": 2.0591353688814706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289630 + }, + { + "epoch": 1.4047016573044946, + "grad_norm": 1.8588286820886424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289640 + }, + { + "epoch": 1.4047501554973307, + "grad_norm": 1.8616054830999929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289650 + }, + { + "epoch": 1.4047986536901669, + "grad_norm": 1.8013790281656838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289660 + }, + { + "epoch": 1.404847151883003, + "grad_norm": 1.910058244902757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289670 + }, + { + "epoch": 1.4048956500758392, + "grad_norm": 2.629312234603276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289680 + }, + { + "epoch": 1.404944148268675, + "grad_norm": 1.8535462231739075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289690 + }, + { + "epoch": 1.4049926464615112, + "grad_norm": 2.126706846183879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289700 + }, + { + "epoch": 1.4050411446543474, + "grad_norm": 1.951778472175647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289710 + }, + { + "epoch": 1.4050896428471833, + "grad_norm": 1.5598074298850406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289720 + }, + { + "epoch": 1.4051381410400194, + "grad_norm": 1.9567733033909462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289730 + }, + { + "epoch": 1.4051866392328556, + "grad_norm": 1.6073342123945622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289740 + }, + { + "epoch": 1.4052351374256917, + "grad_norm": 3.529354160036746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289750 + }, + { + "epoch": 1.4052836356185279, + "grad_norm": 1.7743367664024845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289760 + }, + { + "epoch": 1.4053321338113638, + "grad_norm": 2.0182953619496402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289770 + }, + { + "epoch": 1.4053806320042, + "grad_norm": 1.7076062874821218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289780 + }, + { + "epoch": 1.405429130197036, + "grad_norm": 1.8519186539833754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289790 + }, + { + "epoch": 1.405477628389872, + "grad_norm": 1.6375989275729808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289800 + }, + { + "epoch": 1.4055261265827081, + "grad_norm": 1.4927164215805533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289810 + }, + { + "epoch": 1.4055746247755443, + "grad_norm": 1.719427018542774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289820 + }, + { + "epoch": 1.4056231229683804, + "grad_norm": 2.426475305128406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289830 + }, + { + "epoch": 1.4056716211612166, + "grad_norm": 1.4717238627781626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289840 + }, + { + "epoch": 1.4057201193540525, + "grad_norm": 1.6574966821281123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289850 + }, + { + "epoch": 1.4057686175468886, + "grad_norm": 1.6842054151311459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289860 + }, + { + "epoch": 1.4058171157397248, + "grad_norm": 1.5978444878328446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289870 + }, + { + "epoch": 1.4058656139325607, + "grad_norm": 4.112358737984323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289880 + }, + { + "epoch": 1.4059141121253969, + "grad_norm": 2.84617328816239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289890 + }, + { + "epoch": 1.405962610318233, + "grad_norm": 1.526169057797233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289900 + }, + { + "epoch": 1.4060111085110691, + "grad_norm": 1.5022074251191952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289910 + }, + { + "epoch": 1.4060596067039053, + "grad_norm": 2.1086651713631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289920 + }, + { + "epoch": 1.4061081048967412, + "grad_norm": 1.6065084196270618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289930 + }, + { + "epoch": 1.4061566030895774, + "grad_norm": 1.6547660663945862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289940 + }, + { + "epoch": 1.4062051012824135, + "grad_norm": 1.800020896780552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289950 + }, + { + "epoch": 1.4062535994752494, + "grad_norm": 1.5078227022513602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289960 + }, + { + "epoch": 1.4063020976680856, + "grad_norm": 1.5557829158296954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289970 + }, + { + "epoch": 1.4063505958609217, + "grad_norm": 7.780798796375166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289980 + }, + { + "epoch": 1.4063990940537578, + "grad_norm": 1.6071474817636044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 289990 + }, + { + "epoch": 1.406447592246594, + "grad_norm": 2.42376415826584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290000 + }, + { + "epoch": 1.40649609043943, + "grad_norm": 1.444112029957978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290010 + }, + { + "epoch": 1.406544588632266, + "grad_norm": 1.3748645244504587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290020 + }, + { + "epoch": 1.4065930868251022, + "grad_norm": 1.867140611011564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290030 + }, + { + "epoch": 1.4066415850179383, + "grad_norm": 1.4611079279802652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290040 + }, + { + "epoch": 1.4066900832107745, + "grad_norm": 1.5571770006772567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290050 + }, + { + "epoch": 1.4067385814036104, + "grad_norm": 1.4666709091670782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290060 + }, + { + "epoch": 1.4067870795964466, + "grad_norm": 1.3539332712753094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290070 + }, + { + "epoch": 1.4068355777892827, + "grad_norm": 1.445223460905254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290080 + }, + { + "epoch": 1.4068840759821186, + "grad_norm": 1.489535890186744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290090 + }, + { + "epoch": 1.4069325741749548, + "grad_norm": 1.375432105987784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290100 + }, + { + "epoch": 1.406981072367791, + "grad_norm": 1.4490049693449691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290110 + }, + { + "epoch": 1.407029570560627, + "grad_norm": 1.3380417840380687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290120 + }, + { + "epoch": 1.4070780687534632, + "grad_norm": 4.192112328382791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290130 + }, + { + "epoch": 1.4071265669462991, + "grad_norm": 1.7787085937470692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290140 + }, + { + "epoch": 1.4071750651391353, + "grad_norm": 1.7555528586399305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290150 + }, + { + "epoch": 1.4072235633319714, + "grad_norm": 1.324867895391435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290160 + }, + { + "epoch": 1.4072720615248073, + "grad_norm": 1.3806173626562668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290170 + }, + { + "epoch": 1.4073205597176435, + "grad_norm": 1.4013545523994253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290180 + }, + { + "epoch": 1.4073690579104796, + "grad_norm": 1.4164275796701986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290190 + }, + { + "epoch": 1.4074175561033158, + "grad_norm": 1.4187371277785132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290200 + }, + { + "epoch": 1.407466054296152, + "grad_norm": 4.451922563930566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290210 + }, + { + "epoch": 1.4075145524889878, + "grad_norm": 1.472195378937613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290220 + }, + { + "epoch": 1.407563050681824, + "grad_norm": 1.2753230294038076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290230 + }, + { + "epoch": 1.4076115488746601, + "grad_norm": 1.6443921424524888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290240 + }, + { + "epoch": 1.407660047067496, + "grad_norm": 1.4695952188503725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290250 + }, + { + "epoch": 1.4077085452603322, + "grad_norm": 1.2905128699003399e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290260 + }, + { + "epoch": 1.4077570434531683, + "grad_norm": 1.2754405531723023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290270 + }, + { + "epoch": 1.4078055416460045, + "grad_norm": 1.1505009211987272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290280 + }, + { + "epoch": 1.4078540398388406, + "grad_norm": 1.59979023806045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290290 + }, + { + "epoch": 1.4079025380316765, + "grad_norm": 1.3345950833354436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290300 + }, + { + "epoch": 1.4079510362245127, + "grad_norm": 1.2428039042333694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290310 + }, + { + "epoch": 1.4079995344173488, + "grad_norm": 1.300632277434488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290320 + }, + { + "epoch": 1.4080480326101847, + "grad_norm": 1.2385410741444502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290330 + }, + { + "epoch": 1.4080965308030209, + "grad_norm": 1.538296032776998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290340 + }, + { + "epoch": 1.408145028995857, + "grad_norm": 1.401563594072286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290350 + }, + { + "epoch": 1.4081935271886932, + "grad_norm": 1.2461873666325118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290360 + }, + { + "epoch": 1.4082420253815293, + "grad_norm": 1.7579247924004449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290370 + }, + { + "epoch": 1.4082905235743652, + "grad_norm": 1.3421417577319517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290380 + }, + { + "epoch": 1.4083390217672014, + "grad_norm": 1.8154585745833174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290390 + }, + { + "epoch": 1.4083875199600375, + "grad_norm": 1.3505444940165034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290400 + }, + { + "epoch": 1.4084360181528734, + "grad_norm": 1.2915475622321537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290410 + }, + { + "epoch": 1.4084845163457096, + "grad_norm": 1.2990399511636497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290420 + }, + { + "epoch": 1.4085330145385457, + "grad_norm": 1.1954919898471417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290430 + }, + { + "epoch": 1.4085815127313819, + "grad_norm": 1.1372468122772261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290440 + }, + { + "epoch": 1.408630010924218, + "grad_norm": 1.2399893023484765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290450 + }, + { + "epoch": 1.408678509117054, + "grad_norm": 1.2118066194943822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290460 + }, + { + "epoch": 1.40872700730989, + "grad_norm": 1.2739981514187093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290470 + }, + { + "epoch": 1.4087755055027262, + "grad_norm": 1.1769576957476602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290480 + }, + { + "epoch": 1.4088240036955624, + "grad_norm": 1.3957223643501493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290490 + }, + { + "epoch": 1.4088725018883985, + "grad_norm": 1.1546705991349882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290500 + }, + { + "epoch": 1.4089210000812344, + "grad_norm": 3.028458479548135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290510 + }, + { + "epoch": 1.4089694982740706, + "grad_norm": 1.3129702836067736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290520 + }, + { + "epoch": 1.4090179964669067, + "grad_norm": 1.323939642361438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290530 + }, + { + "epoch": 1.4090664946597427, + "grad_norm": 1.1028169666360554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290540 + }, + { + "epoch": 1.4091149928525788, + "grad_norm": 1.2108120017728652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290550 + }, + { + "epoch": 1.409163491045415, + "grad_norm": 1.1171950831112554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290560 + }, + { + "epoch": 1.409211989238251, + "grad_norm": 1.222053640503873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290570 + }, + { + "epoch": 1.4092604874310872, + "grad_norm": 1.0954838813859169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290580 + }, + { + "epoch": 1.4093089856239231, + "grad_norm": 1.106763320990467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290590 + }, + { + "epoch": 1.4093574838167593, + "grad_norm": 1.0436349384690402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290600 + }, + { + "epoch": 1.4094059820095954, + "grad_norm": 1.1961752477418486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290610 + }, + { + "epoch": 1.4094544802024314, + "grad_norm": 1.1990330506250757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290620 + }, + { + "epoch": 1.4095029783952675, + "grad_norm": 1.0774229508569988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290630 + }, + { + "epoch": 1.4095514765881036, + "grad_norm": 1.1570858760023839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290640 + }, + { + "epoch": 1.4095999747809398, + "grad_norm": 1.2735256404994288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290650 + }, + { + "epoch": 1.409648472973776, + "grad_norm": 1.1368804564426682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290660 + }, + { + "epoch": 1.4096969711666119, + "grad_norm": 1.1310727643376595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290670 + }, + { + "epoch": 1.409745469359448, + "grad_norm": 1.1208608441393153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290680 + }, + { + "epoch": 1.4097939675522841, + "grad_norm": 1.2808617100290576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290690 + }, + { + "epoch": 1.40984246574512, + "grad_norm": 1.19131236431258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290700 + }, + { + "epoch": 1.4098909639379562, + "grad_norm": 1.1263092858371238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290710 + }, + { + "epoch": 1.4099394621307924, + "grad_norm": 1.6079853537576128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290720 + }, + { + "epoch": 1.4099879603236285, + "grad_norm": 1.0453906895691034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290730 + }, + { + "epoch": 1.4100364585164646, + "grad_norm": 1.398520907969214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290740 + }, + { + "epoch": 1.4100849567093006, + "grad_norm": 1.1728162263580089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290750 + }, + { + "epoch": 1.4101334549021367, + "grad_norm": 1.201988197863102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290760 + }, + { + "epoch": 1.4101819530949729, + "grad_norm": 1.1246211073512313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290770 + }, + { + "epoch": 1.4102304512878088, + "grad_norm": 1.0807559647219023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290780 + }, + { + "epoch": 1.410278949480645, + "grad_norm": 1.1278532952019305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290790 + }, + { + "epoch": 1.410327447673481, + "grad_norm": 1.0822665785781282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290800 + }, + { + "epoch": 1.4103759458663172, + "grad_norm": 1.1795987830964805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290810 + }, + { + "epoch": 1.4104244440591533, + "grad_norm": 1.1766167062887689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290820 + }, + { + "epoch": 1.4104729422519893, + "grad_norm": 1.1003250932617448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290830 + }, + { + "epoch": 1.4105214404448254, + "grad_norm": 1.248975110001993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290840 + }, + { + "epoch": 1.4105699386376616, + "grad_norm": 1.1404046773577647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290850 + }, + { + "epoch": 1.4106184368304975, + "grad_norm": 3.4712314800344757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290860 + }, + { + "epoch": 1.4106669350233336, + "grad_norm": 1.4873602083298465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290870 + }, + { + "epoch": 1.4107154332161698, + "grad_norm": 9.567025216483671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290880 + }, + { + "epoch": 1.410763931409006, + "grad_norm": 9.194355499175799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290890 + }, + { + "epoch": 1.410812429601842, + "grad_norm": 1.074499067499346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290900 + }, + { + "epoch": 1.410860927794678, + "grad_norm": 1.017737929487339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290910 + }, + { + "epoch": 1.4109094259875141, + "grad_norm": 1.0854677867655482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290920 + }, + { + "epoch": 1.4109579241803503, + "grad_norm": 1.1824469936527748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290930 + }, + { + "epoch": 1.4110064223731862, + "grad_norm": 9.993093641469386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290940 + }, + { + "epoch": 1.4110549205660223, + "grad_norm": 9.507834874966647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290950 + }, + { + "epoch": 1.4111034187588585, + "grad_norm": 1.1637913388540255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290960 + }, + { + "epoch": 1.4111519169516946, + "grad_norm": 1.0587918808369068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290970 + }, + { + "epoch": 1.4112004151445308, + "grad_norm": 9.260701006041927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290980 + }, + { + "epoch": 1.4112489133373667, + "grad_norm": 1.0734814281931904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 290990 + }, + { + "epoch": 1.4112974115302028, + "grad_norm": 9.888506014021914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291000 + }, + { + "epoch": 1.411345909723039, + "grad_norm": 9.981537374414984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291010 + }, + { + "epoch": 1.4113944079158751, + "grad_norm": 1.9517511873345939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291020 + }, + { + "epoch": 1.4114429061087113, + "grad_norm": 9.735820327705369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291030 + }, + { + "epoch": 1.4114914043015472, + "grad_norm": 9.423981595091391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291040 + }, + { + "epoch": 1.4115399024943833, + "grad_norm": 9.922391797090313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291050 + }, + { + "epoch": 1.4115884006872195, + "grad_norm": 9.972720249606937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291060 + }, + { + "epoch": 1.4116368988800554, + "grad_norm": 9.846138482316746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291070 + }, + { + "epoch": 1.4116853970728915, + "grad_norm": 8.526014028120699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291080 + }, + { + "epoch": 1.4117338952657277, + "grad_norm": 9.671696687973963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291090 + }, + { + "epoch": 1.4117823934585638, + "grad_norm": 1.08368908513512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291100 + }, + { + "epoch": 1.4118308916514, + "grad_norm": 9.737757977745787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291110 + }, + { + "epoch": 1.4118793898442359, + "grad_norm": 9.650484145140581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291120 + }, + { + "epoch": 1.411927888037072, + "grad_norm": 1.0356147583934217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291130 + }, + { + "epoch": 1.4119763862299082, + "grad_norm": 1.0125391725068766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291140 + }, + { + "epoch": 1.412024884422744, + "grad_norm": 9.908625031584961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291150 + }, + { + "epoch": 1.4120733826155802, + "grad_norm": 9.699856917677607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291160 + }, + { + "epoch": 1.4121218808084164, + "grad_norm": 9.637199127610074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291170 + }, + { + "epoch": 1.4121703790012525, + "grad_norm": 9.624272223618391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291180 + }, + { + "epoch": 1.4122188771940887, + "grad_norm": 8.677694296466143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291190 + }, + { + "epoch": 1.4122673753869246, + "grad_norm": 9.82754926326379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291200 + }, + { + "epoch": 1.4123158735797607, + "grad_norm": 9.973566506005227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291210 + }, + { + "epoch": 1.4123643717725969, + "grad_norm": 9.130376099619752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291220 + }, + { + "epoch": 1.4124128699654328, + "grad_norm": 9.165957237655675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291230 + }, + { + "epoch": 1.412461368158269, + "grad_norm": 9.14194160372972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291240 + }, + { + "epoch": 1.412509866351105, + "grad_norm": 9.649846077763868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291250 + }, + { + "epoch": 1.4125583645439412, + "grad_norm": 9.631347097638354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291260 + }, + { + "epoch": 1.4126068627367774, + "grad_norm": 9.81912151587494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291270 + }, + { + "epoch": 1.4126553609296133, + "grad_norm": 9.121078647922332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291280 + }, + { + "epoch": 1.4127038591224494, + "grad_norm": 1.0524566107505962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291290 + }, + { + "epoch": 1.4127523573152856, + "grad_norm": 9.351735030804775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291300 + }, + { + "epoch": 1.4128008555081215, + "grad_norm": 9.669658851407803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291310 + }, + { + "epoch": 1.4128493537009577, + "grad_norm": 1.027270144504655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291320 + }, + { + "epoch": 1.4128978518937938, + "grad_norm": 8.932081385637503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291330 + }, + { + "epoch": 1.41294635008663, + "grad_norm": 9.081563234758505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291340 + }, + { + "epoch": 1.412994848279466, + "grad_norm": 9.709703618909771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291350 + }, + { + "epoch": 1.413043346472302, + "grad_norm": 9.712062620792494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291360 + }, + { + "epoch": 1.4130918446651382, + "grad_norm": 9.059645833531249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291370 + }, + { + "epoch": 1.4131403428579743, + "grad_norm": 9.974151282676758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291380 + }, + { + "epoch": 1.4131888410508102, + "grad_norm": 8.881283264372541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291390 + }, + { + "epoch": 1.4132373392436464, + "grad_norm": 9.575759207791634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291400 + }, + { + "epoch": 1.4132858374364825, + "grad_norm": 9.599342831734248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291410 + }, + { + "epoch": 1.4133343356293186, + "grad_norm": 9.101788833731916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291420 + }, + { + "epoch": 1.4133828338221548, + "grad_norm": 9.857185290229609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291430 + }, + { + "epoch": 1.4134313320149907, + "grad_norm": 8.962868491835252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291440 + }, + { + "epoch": 1.4134798302078269, + "grad_norm": 9.158991787217019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291450 + }, + { + "epoch": 1.413528328400663, + "grad_norm": 9.096347497461466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291460 + }, + { + "epoch": 1.413576826593499, + "grad_norm": 9.671234835195719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291470 + }, + { + "epoch": 1.413625324786335, + "grad_norm": 9.096902431338094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291480 + }, + { + "epoch": 1.4136738229791712, + "grad_norm": 8.714776811302727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291490 + }, + { + "epoch": 1.4137223211720074, + "grad_norm": 9.266533140817046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291500 + }, + { + "epoch": 1.4137708193648435, + "grad_norm": 9.274985757201648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291510 + }, + { + "epoch": 1.4138193175576794, + "grad_norm": 9.880719886723455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291520 + }, + { + "epoch": 1.4138678157505156, + "grad_norm": 9.068134687595375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291530 + }, + { + "epoch": 1.4139163139433517, + "grad_norm": 9.764357855601702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291540 + }, + { + "epoch": 1.4139648121361879, + "grad_norm": 5.769408630840189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291550 + }, + { + "epoch": 1.414013310329024, + "grad_norm": 9.535332878840563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291560 + }, + { + "epoch": 1.41406180852186, + "grad_norm": 9.186310734321523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291570 + }, + { + "epoch": 1.414110306714696, + "grad_norm": 1.1908545616279298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291580 + }, + { + "epoch": 1.4141588049075322, + "grad_norm": 8.393617889623783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291590 + }, + { + "epoch": 1.4142073031003681, + "grad_norm": 9.252221389033366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291600 + }, + { + "epoch": 1.4142558012932043, + "grad_norm": 9.755200380823226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291610 + }, + { + "epoch": 1.4143042994860404, + "grad_norm": 9.063998618330515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291620 + }, + { + "epoch": 1.4143527976788766, + "grad_norm": 8.710131993439063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291630 + }, + { + "epoch": 1.4144012958717127, + "grad_norm": 8.535406692544711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291640 + }, + { + "epoch": 1.4144497940645486, + "grad_norm": 9.415913382326835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291650 + }, + { + "epoch": 1.4144982922573848, + "grad_norm": 9.356315189279485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291660 + }, + { + "epoch": 1.414546790450221, + "grad_norm": 8.960975605987187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291670 + }, + { + "epoch": 1.4145952886430568, + "grad_norm": 1.0947732675958832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291680 + }, + { + "epoch": 1.414643786835893, + "grad_norm": 8.604341417139949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291690 + }, + { + "epoch": 1.4146922850287291, + "grad_norm": 9.35248678501921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291700 + }, + { + "epoch": 1.4147407832215653, + "grad_norm": 9.166318193365441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291710 + }, + { + "epoch": 1.4147892814144014, + "grad_norm": 9.460433858521355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291720 + }, + { + "epoch": 1.4148377796072373, + "grad_norm": 9.248167742725855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291730 + }, + { + "epoch": 1.4148862778000735, + "grad_norm": 8.549436358862295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291740 + }, + { + "epoch": 1.4149347759929096, + "grad_norm": 8.767254655595025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291750 + }, + { + "epoch": 1.4149832741857455, + "grad_norm": 8.947079521703927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291760 + }, + { + "epoch": 1.4150317723785817, + "grad_norm": 2.7076021069660783e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291770 + }, + { + "epoch": 1.4150802705714178, + "grad_norm": 9.140199352941636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291780 + }, + { + "epoch": 1.415128768764254, + "grad_norm": 8.274405871588897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291790 + }, + { + "epoch": 1.4151772669570901, + "grad_norm": 9.366595321580462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291800 + }, + { + "epoch": 1.415225765149926, + "grad_norm": 8.972290999054167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291810 + }, + { + "epoch": 1.4152742633427622, + "grad_norm": 9.805756917558028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291820 + }, + { + "epoch": 1.4153227615355983, + "grad_norm": 8.264972706228946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291830 + }, + { + "epoch": 1.4153712597284343, + "grad_norm": 8.570263076990159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291840 + }, + { + "epoch": 1.4154197579212704, + "grad_norm": 8.786251726178307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291850 + }, + { + "epoch": 1.4154682561141065, + "grad_norm": 8.883238677981353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291860 + }, + { + "epoch": 1.4155167543069427, + "grad_norm": 8.632711967493378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291870 + }, + { + "epoch": 1.4155652524997788, + "grad_norm": 2.6020745735877426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291880 + }, + { + "epoch": 1.4156137506926147, + "grad_norm": 1.5789078133821022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291890 + }, + { + "epoch": 1.415662248885451, + "grad_norm": 1.1147493239604955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291900 + }, + { + "epoch": 1.415710747078287, + "grad_norm": 8.841045229246447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291910 + }, + { + "epoch": 1.415759245271123, + "grad_norm": 1.1943591005092458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291920 + }, + { + "epoch": 1.415807743463959, + "grad_norm": 8.326497891175677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291930 + }, + { + "epoch": 1.4158562416567952, + "grad_norm": 9.28200734051643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291940 + }, + { + "epoch": 1.4159047398496314, + "grad_norm": 9.161527714240947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291950 + }, + { + "epoch": 1.4159532380424675, + "grad_norm": 8.830899389522529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291960 + }, + { + "epoch": 1.4160017362353035, + "grad_norm": 9.002585699136034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291970 + }, + { + "epoch": 1.4160502344281396, + "grad_norm": 8.476322932438052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291980 + }, + { + "epoch": 1.4160987326209757, + "grad_norm": 8.714746968507825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 291990 + }, + { + "epoch": 1.4161472308138117, + "grad_norm": 8.820649810559189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292000 + }, + { + "epoch": 1.4161957290066478, + "grad_norm": 9.065461625823446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292010 + }, + { + "epoch": 1.416244227199484, + "grad_norm": 8.890459213262147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292020 + }, + { + "epoch": 1.41629272539232, + "grad_norm": 8.516809657521662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292030 + }, + { + "epoch": 1.4163412235851562, + "grad_norm": 8.122393069243117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292040 + }, + { + "epoch": 1.4163897217779922, + "grad_norm": 9.183299454207372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292050 + }, + { + "epoch": 1.4164382199708283, + "grad_norm": 8.856846989147016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292060 + }, + { + "epoch": 1.4164867181636644, + "grad_norm": 8.687648289651406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292070 + }, + { + "epoch": 1.4165352163565006, + "grad_norm": 8.257325845306696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292080 + }, + { + "epoch": 1.4165837145493367, + "grad_norm": 8.23617085643491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292090 + }, + { + "epoch": 1.4166322127421727, + "grad_norm": 8.602292922432753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292100 + }, + { + "epoch": 1.4166807109350088, + "grad_norm": 8.75488268547997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292110 + }, + { + "epoch": 1.416729209127845, + "grad_norm": 9.201619377563475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292120 + }, + { + "epoch": 1.4167777073206809, + "grad_norm": 8.08903806159833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292130 + }, + { + "epoch": 1.416826205513517, + "grad_norm": 8.517610439184864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292140 + }, + { + "epoch": 1.4168747037063532, + "grad_norm": 8.835506548621197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292150 + }, + { + "epoch": 1.4169232018991893, + "grad_norm": 1.0868904354310871e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292160 + }, + { + "epoch": 1.4169717000920254, + "grad_norm": 9.638668529987626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292170 + }, + { + "epoch": 1.4170201982848614, + "grad_norm": 8.521833905206222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292180 + }, + { + "epoch": 1.4170686964776975, + "grad_norm": 8.418098929041662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292190 + }, + { + "epoch": 1.4171171946705337, + "grad_norm": 8.444335009016868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292200 + }, + { + "epoch": 1.4171656928633696, + "grad_norm": 8.40641192212388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292210 + }, + { + "epoch": 1.4172141910562057, + "grad_norm": 8.46313099600593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292220 + }, + { + "epoch": 1.4172626892490419, + "grad_norm": 8.211050328554848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292230 + }, + { + "epoch": 1.417311187441878, + "grad_norm": 8.138712814798055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292240 + }, + { + "epoch": 1.4173596856347142, + "grad_norm": 8.210081858806006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292250 + }, + { + "epoch": 1.41740818382755, + "grad_norm": 8.838291876145377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292260 + }, + { + "epoch": 1.4174566820203862, + "grad_norm": 8.433934794993547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292270 + }, + { + "epoch": 1.4175051802132224, + "grad_norm": 7.752807107408444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292280 + }, + { + "epoch": 1.4175536784060583, + "grad_norm": 8.123795680603507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292290 + }, + { + "epoch": 1.4176021765988944, + "grad_norm": 8.166416876065341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292300 + }, + { + "epoch": 1.4176506747917306, + "grad_norm": 8.004641216530217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292310 + }, + { + "epoch": 1.4176991729845667, + "grad_norm": 8.175157972800662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292320 + }, + { + "epoch": 1.4177476711774029, + "grad_norm": 8.57450288549444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292330 + }, + { + "epoch": 1.4177961693702388, + "grad_norm": 8.427042530456674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292340 + }, + { + "epoch": 1.417844667563075, + "grad_norm": 8.841484344657147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292350 + }, + { + "epoch": 1.417893165755911, + "grad_norm": 7.960333903156425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292360 + }, + { + "epoch": 1.417941663948747, + "grad_norm": 8.319285171864976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292370 + }, + { + "epoch": 1.4179901621415831, + "grad_norm": 8.167435794348421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292380 + }, + { + "epoch": 1.4180386603344193, + "grad_norm": 8.322308531205636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292390 + }, + { + "epoch": 1.4180871585272554, + "grad_norm": 8.46122389930315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292400 + }, + { + "epoch": 1.4181356567200916, + "grad_norm": 8.506915349926203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292410 + }, + { + "epoch": 1.4181841549129275, + "grad_norm": 8.10941642725993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292420 + }, + { + "epoch": 1.4182326531057636, + "grad_norm": 7.95190686631031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292430 + }, + { + "epoch": 1.4182811512985998, + "grad_norm": 7.861244455398264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292440 + }, + { + "epoch": 1.4183296494914357, + "grad_norm": 8.246045979376504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292450 + }, + { + "epoch": 1.4183781476842718, + "grad_norm": 8.012142416191637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292460 + }, + { + "epoch": 1.418426645877108, + "grad_norm": 8.629161385442785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292470 + }, + { + "epoch": 1.4184751440699441, + "grad_norm": 8.170248833039295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292480 + }, + { + "epoch": 1.4185236422627803, + "grad_norm": 8.26114927576782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292490 + }, + { + "epoch": 1.4185721404556162, + "grad_norm": 8.203407730889012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292500 + }, + { + "epoch": 1.4186206386484523, + "grad_norm": 8.69287291038745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292510 + }, + { + "epoch": 1.4186691368412885, + "grad_norm": 8.067576118264697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292520 + }, + { + "epoch": 1.4187176350341244, + "grad_norm": 7.965247306174206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292530 + }, + { + "epoch": 1.4187661332269608, + "grad_norm": 9.168747538979005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292540 + }, + { + "epoch": 1.4188146314197967, + "grad_norm": 8.088624525726118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292550 + }, + { + "epoch": 1.4188631296126328, + "grad_norm": 8.57613500215848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292560 + }, + { + "epoch": 1.418911627805469, + "grad_norm": 8.125110184664663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292570 + }, + { + "epoch": 1.418960125998305, + "grad_norm": 7.793147460688488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292580 + }, + { + "epoch": 1.419008624191141, + "grad_norm": 7.917218169950502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292590 + }, + { + "epoch": 1.4190571223839772, + "grad_norm": 7.669962798217966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292600 + }, + { + "epoch": 1.4191056205768133, + "grad_norm": 7.916337096958159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292610 + }, + { + "epoch": 1.4191541187696495, + "grad_norm": 8.714464172498992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292620 + }, + { + "epoch": 1.4192026169624854, + "grad_norm": 8.023542363844172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292630 + }, + { + "epoch": 1.4192511151553215, + "grad_norm": 7.940281676610539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292640 + }, + { + "epoch": 1.4192996133481577, + "grad_norm": 8.216385083414934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292650 + }, + { + "epoch": 1.4193481115409936, + "grad_norm": 8.335073431453566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292660 + }, + { + "epoch": 1.4193966097338298, + "grad_norm": 7.994798068011733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292670 + }, + { + "epoch": 1.419445107926666, + "grad_norm": 8.178744081988043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292680 + }, + { + "epoch": 1.419493606119502, + "grad_norm": 7.831729931240261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292690 + }, + { + "epoch": 1.4195421043123382, + "grad_norm": 7.723181028040926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292700 + }, + { + "epoch": 1.419590602505174, + "grad_norm": 7.853275008073979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292710 + }, + { + "epoch": 1.4196391006980102, + "grad_norm": 8.196145273586808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292720 + }, + { + "epoch": 1.4196875988908464, + "grad_norm": 7.653974876120628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292730 + }, + { + "epoch": 1.4197360970836823, + "grad_norm": 7.84668543474254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292740 + }, + { + "epoch": 1.4197845952765185, + "grad_norm": 8.027419795553214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292750 + }, + { + "epoch": 1.4198330934693546, + "grad_norm": 8.071895507555382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292760 + }, + { + "epoch": 1.4198815916621907, + "grad_norm": 7.984332484056722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292770 + }, + { + "epoch": 1.419930089855027, + "grad_norm": 1.1749042982955871e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292780 + }, + { + "epoch": 1.4199785880478628, + "grad_norm": 8.028616349520235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292790 + }, + { + "epoch": 1.420027086240699, + "grad_norm": 7.82491440531885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292800 + }, + { + "epoch": 1.420075584433535, + "grad_norm": 8.215997837623945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292810 + }, + { + "epoch": 1.420124082626371, + "grad_norm": 7.509002841743495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292820 + }, + { + "epoch": 1.4201725808192072, + "grad_norm": 7.819192404667774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292830 + }, + { + "epoch": 1.4202210790120433, + "grad_norm": 8.036055021420907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292840 + }, + { + "epoch": 1.4202695772048795, + "grad_norm": 7.66884937775103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292850 + }, + { + "epoch": 1.4203180753977156, + "grad_norm": 7.92094212442862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292860 + }, + { + "epoch": 1.4203665735905515, + "grad_norm": 7.74959687532828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292870 + }, + { + "epoch": 1.4204150717833877, + "grad_norm": 8.087857139571497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292880 + }, + { + "epoch": 1.4204635699762238, + "grad_norm": 2.3921742808852287e-07, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 292890 + }, + { + "epoch": 1.4205120681690597, + "grad_norm": 3.1433493859367445e-05, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 292900 + }, + { + "epoch": 1.4205605663618959, + "grad_norm": 0.0029831125866621733, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 292910 + }, + { + "epoch": 1.420609064554732, + "grad_norm": 0.0024563551414757967, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 292920 + }, + { + "epoch": 1.4206575627475682, + "grad_norm": 0.0004896666505374014, + "learning_rate": 0.0002, + "loss": 0.0037, + "step": 292930 + }, + { + "epoch": 1.4207060609404043, + "grad_norm": 2.5433106202399358e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 292940 + }, + { + "epoch": 1.4207545591332402, + "grad_norm": 0.0001903585798572749, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292950 + }, + { + "epoch": 1.4208030573260764, + "grad_norm": 3.423450834816322e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292960 + }, + { + "epoch": 1.4208515555189125, + "grad_norm": 3.143617504974827e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292970 + }, + { + "epoch": 1.4209000537117484, + "grad_norm": 5.569068162003532e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292980 + }, + { + "epoch": 1.4209485519045846, + "grad_norm": 2.0089317331439815e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 292990 + }, + { + "epoch": 1.4209970500974207, + "grad_norm": 2.2155991246108897e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293000 + }, + { + "epoch": 1.4210455482902569, + "grad_norm": 2.1396666852524504e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293010 + }, + { + "epoch": 1.421094046483093, + "grad_norm": 0.0016471341950818896, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 293020 + }, + { + "epoch": 1.421142544675929, + "grad_norm": 0.0014014357002452016, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 293030 + }, + { + "epoch": 1.421191042868765, + "grad_norm": 0.021788956597447395, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 293040 + }, + { + "epoch": 1.4212395410616012, + "grad_norm": 0.01781616173684597, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293050 + }, + { + "epoch": 1.4212880392544374, + "grad_norm": 0.05699592083692551, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 293060 + }, + { + "epoch": 1.4213365374472735, + "grad_norm": 0.0002301665663253516, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293070 + }, + { + "epoch": 1.4213850356401094, + "grad_norm": 5.0179420213680714e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 293080 + }, + { + "epoch": 1.4214335338329456, + "grad_norm": 0.00011346635437803343, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 293090 + }, + { + "epoch": 1.4214820320257817, + "grad_norm": 0.0013446949888020754, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 293100 + }, + { + "epoch": 1.4215305302186176, + "grad_norm": 0.0001417388702975586, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 293110 + }, + { + "epoch": 1.4215790284114538, + "grad_norm": 3.800064587267116e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293120 + }, + { + "epoch": 1.42162752660429, + "grad_norm": 2.6925443307845853e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293130 + }, + { + "epoch": 1.421676024797126, + "grad_norm": 2.894619137805421e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293140 + }, + { + "epoch": 1.4217245229899622, + "grad_norm": 1.588589839229826e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293150 + }, + { + "epoch": 1.4217730211827981, + "grad_norm": 1.3630767170980107e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293160 + }, + { + "epoch": 1.4218215193756343, + "grad_norm": 1.2217345101817045e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293170 + }, + { + "epoch": 1.4218700175684704, + "grad_norm": 1.3256814781925641e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293180 + }, + { + "epoch": 1.4219185157613063, + "grad_norm": 1.5495112165808678e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293190 + }, + { + "epoch": 1.4219670139541425, + "grad_norm": 1.1085953701694962e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293200 + }, + { + "epoch": 1.4220155121469786, + "grad_norm": 9.640916687203571e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293210 + }, + { + "epoch": 1.4220640103398148, + "grad_norm": 9.535403478366788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293220 + }, + { + "epoch": 1.422112508532651, + "grad_norm": 9.856944416242186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293230 + }, + { + "epoch": 1.4221610067254868, + "grad_norm": 1.1358926713000983e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293240 + }, + { + "epoch": 1.422209504918323, + "grad_norm": 8.605139555584174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293250 + }, + { + "epoch": 1.4222580031111591, + "grad_norm": 7.579014436487341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293260 + }, + { + "epoch": 1.422306501303995, + "grad_norm": 7.241967068694066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293270 + }, + { + "epoch": 1.4223549994968312, + "grad_norm": 7.72512339608511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293280 + }, + { + "epoch": 1.4224034976896673, + "grad_norm": 8.975473065220285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293290 + }, + { + "epoch": 1.4224519958825035, + "grad_norm": 7.0945225161267444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293300 + }, + { + "epoch": 1.4225004940753396, + "grad_norm": 9.007919288706034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293310 + }, + { + "epoch": 1.4225489922681755, + "grad_norm": 6.632000804529525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293320 + }, + { + "epoch": 1.4225974904610117, + "grad_norm": 6.894995294715045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293330 + }, + { + "epoch": 1.4226459886538478, + "grad_norm": 7.859789548092522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293340 + }, + { + "epoch": 1.4226944868466838, + "grad_norm": 6.4544283304712735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293350 + }, + { + "epoch": 1.42274298503952, + "grad_norm": 5.902881639485713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293360 + }, + { + "epoch": 1.422791483232356, + "grad_norm": 6.026934443070786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293370 + }, + { + "epoch": 1.4228399814251922, + "grad_norm": 6.1121500039007515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293380 + }, + { + "epoch": 1.4228884796180283, + "grad_norm": 6.992402177274926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293390 + }, + { + "epoch": 1.4229369778108643, + "grad_norm": 5.21819720233907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293400 + }, + { + "epoch": 1.4229854760037004, + "grad_norm": 5.184839665162144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293410 + }, + { + "epoch": 1.4230339741965365, + "grad_norm": 5.091495950182434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293420 + }, + { + "epoch": 1.4230824723893725, + "grad_norm": 4.851573066844139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293430 + }, + { + "epoch": 1.4231309705822086, + "grad_norm": 6.374756139848614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293440 + }, + { + "epoch": 1.4231794687750448, + "grad_norm": 4.860524768446339e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293450 + }, + { + "epoch": 1.423227966967881, + "grad_norm": 4.874803380516823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293460 + }, + { + "epoch": 1.423276465160717, + "grad_norm": 5.335500190994935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293470 + }, + { + "epoch": 1.423324963353553, + "grad_norm": 4.622613687388366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293480 + }, + { + "epoch": 1.423373461546389, + "grad_norm": 5.5818136388552375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293490 + }, + { + "epoch": 1.4234219597392253, + "grad_norm": 4.558618002192816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293500 + }, + { + "epoch": 1.4234704579320612, + "grad_norm": 4.44385386799695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293510 + }, + { + "epoch": 1.4235189561248973, + "grad_norm": 4.208676728012506e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293520 + }, + { + "epoch": 1.4235674543177335, + "grad_norm": 4.246519893058576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293530 + }, + { + "epoch": 1.4236159525105696, + "grad_norm": 5.117580258229282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293540 + }, + { + "epoch": 1.4236644507034057, + "grad_norm": 4.107959739485523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293550 + }, + { + "epoch": 1.4237129488962417, + "grad_norm": 4.041906777274562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293560 + }, + { + "epoch": 1.4237614470890778, + "grad_norm": 4.0406512198387645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293570 + }, + { + "epoch": 1.423809945281914, + "grad_norm": 4.180497398920124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293580 + }, + { + "epoch": 1.42385844347475, + "grad_norm": 4.432015884958673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293590 + }, + { + "epoch": 1.4239069416675862, + "grad_norm": 3.775427330765524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293600 + }, + { + "epoch": 1.4239554398604222, + "grad_norm": 3.845022547466215e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293610 + }, + { + "epoch": 1.4240039380532583, + "grad_norm": 3.849860604532296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293620 + }, + { + "epoch": 1.4240524362460945, + "grad_norm": 3.652883151517017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293630 + }, + { + "epoch": 1.4241009344389304, + "grad_norm": 4.233346317050746e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293640 + }, + { + "epoch": 1.4241494326317665, + "grad_norm": 3.6511044072540244e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293650 + }, + { + "epoch": 1.4241979308246027, + "grad_norm": 3.581007149477955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293660 + }, + { + "epoch": 1.4242464290174388, + "grad_norm": 3.7552902085735695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293670 + }, + { + "epoch": 1.424294927210275, + "grad_norm": 3.546889274730347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293680 + }, + { + "epoch": 1.4243434254031109, + "grad_norm": 3.7491574857995147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293690 + }, + { + "epoch": 1.424391923595947, + "grad_norm": 3.2842660857568262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293700 + }, + { + "epoch": 1.4244404217887832, + "grad_norm": 3.4011566185654374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293710 + }, + { + "epoch": 1.424488919981619, + "grad_norm": 3.367163117218297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293720 + }, + { + "epoch": 1.4245374181744552, + "grad_norm": 3.165918542435975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293730 + }, + { + "epoch": 1.4245859163672914, + "grad_norm": 3.612978161982028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293740 + }, + { + "epoch": 1.4246344145601275, + "grad_norm": 3.0530118237948045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293750 + }, + { + "epoch": 1.4246829127529637, + "grad_norm": 2.8827641926909564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293760 + }, + { + "epoch": 1.4247314109457996, + "grad_norm": 2.945049118352472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293770 + }, + { + "epoch": 1.4247799091386357, + "grad_norm": 2.98562576972472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293780 + }, + { + "epoch": 1.4248284073314719, + "grad_norm": 3.426555167607148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293790 + }, + { + "epoch": 1.4248769055243078, + "grad_norm": 2.8628232939809095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293800 + }, + { + "epoch": 1.424925403717144, + "grad_norm": 2.865213673430844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293810 + }, + { + "epoch": 1.42497390190998, + "grad_norm": 2.750785370153608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293820 + }, + { + "epoch": 1.4250224001028162, + "grad_norm": 2.8022782316838857e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293830 + }, + { + "epoch": 1.4250708982956524, + "grad_norm": 3.1320262223744066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293840 + }, + { + "epoch": 1.4251193964884883, + "grad_norm": 2.6308127871743636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293850 + }, + { + "epoch": 1.4251678946813244, + "grad_norm": 2.6711697955761338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293860 + }, + { + "epoch": 1.4252163928741606, + "grad_norm": 2.583560899438453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293870 + }, + { + "epoch": 1.4252648910669965, + "grad_norm": 2.5663903215900064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293880 + }, + { + "epoch": 1.4253133892598326, + "grad_norm": 2.8073757221136475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293890 + }, + { + "epoch": 1.4253618874526688, + "grad_norm": 2.324499973838101e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293900 + }, + { + "epoch": 1.425410385645505, + "grad_norm": 2.4728076368774055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293910 + }, + { + "epoch": 1.425458883838341, + "grad_norm": 2.4267662865895545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293920 + }, + { + "epoch": 1.425507382031177, + "grad_norm": 2.346151632082183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293930 + }, + { + "epoch": 1.4255558802240131, + "grad_norm": 2.6473710477148416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293940 + }, + { + "epoch": 1.4256043784168493, + "grad_norm": 2.2937047106097452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293950 + }, + { + "epoch": 1.4256528766096852, + "grad_norm": 2.2114536477602087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293960 + }, + { + "epoch": 1.4257013748025213, + "grad_norm": 2.120199042110471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293970 + }, + { + "epoch": 1.4257498729953575, + "grad_norm": 2.165827481803717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293980 + }, + { + "epoch": 1.4257983711881936, + "grad_norm": 2.3295483515539672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 293990 + }, + { + "epoch": 1.4258468693810298, + "grad_norm": 2.0333777683845256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294000 + }, + { + "epoch": 1.4258953675738657, + "grad_norm": 2.1338673832360655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294010 + }, + { + "epoch": 1.4259438657667018, + "grad_norm": 1.982628418772947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294020 + }, + { + "epoch": 1.425992363959538, + "grad_norm": 2.0572097128024325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294030 + }, + { + "epoch": 1.426040862152374, + "grad_norm": 2.2091332994023105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294040 + }, + { + "epoch": 1.42608936034521, + "grad_norm": 1.8789140767694334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294050 + }, + { + "epoch": 1.4261378585380462, + "grad_norm": 2.022698481596308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294060 + }, + { + "epoch": 1.4261863567308823, + "grad_norm": 1.9716007955139503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294070 + }, + { + "epoch": 1.4262348549237185, + "grad_norm": 1.9138176412525354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294080 + }, + { + "epoch": 1.4262833531165544, + "grad_norm": 2.1626558464049594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294090 + }, + { + "epoch": 1.4263318513093906, + "grad_norm": 1.7316544926870847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294100 + }, + { + "epoch": 1.4263803495022267, + "grad_norm": 1.8762673335004365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294110 + }, + { + "epoch": 1.4264288476950628, + "grad_norm": 1.7152219697891269e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294120 + }, + { + "epoch": 1.426477345887899, + "grad_norm": 1.7908844256453449e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294130 + }, + { + "epoch": 1.426525844080735, + "grad_norm": 2.0225961634423584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294140 + }, + { + "epoch": 1.426574342273571, + "grad_norm": 1.6385147318942472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294150 + }, + { + "epoch": 1.4266228404664072, + "grad_norm": 1.7264668485950097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294160 + }, + { + "epoch": 1.4266713386592431, + "grad_norm": 1.679562842582527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294170 + }, + { + "epoch": 1.4267198368520793, + "grad_norm": 1.6758683614170877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294180 + }, + { + "epoch": 1.4267683350449154, + "grad_norm": 1.8874803799917572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294190 + }, + { + "epoch": 1.4268168332377515, + "grad_norm": 1.626419361855369e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294200 + }, + { + "epoch": 1.4268653314305877, + "grad_norm": 2.1271532659739023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294210 + }, + { + "epoch": 1.4269138296234236, + "grad_norm": 1.5820168073332752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294220 + }, + { + "epoch": 1.4269623278162598, + "grad_norm": 1.546230805615778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294230 + }, + { + "epoch": 1.427010826009096, + "grad_norm": 4.241803253535181e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294240 + }, + { + "epoch": 1.4270593242019318, + "grad_norm": 1.4101699434831971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294250 + }, + { + "epoch": 1.427107822394768, + "grad_norm": 1.3960271871837904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294260 + }, + { + "epoch": 1.427156320587604, + "grad_norm": 1.4406235777641996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294270 + }, + { + "epoch": 1.4272048187804403, + "grad_norm": 1.4538411505782278e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294280 + }, + { + "epoch": 1.4272533169732764, + "grad_norm": 1.5743137282697717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294290 + }, + { + "epoch": 1.4273018151661123, + "grad_norm": 1.341141683042224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294300 + }, + { + "epoch": 1.4273503133589485, + "grad_norm": 1.40632914735761e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294310 + }, + { + "epoch": 1.4273988115517846, + "grad_norm": 1.3538477787733427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294320 + }, + { + "epoch": 1.4274473097446205, + "grad_norm": 1.3314627267391188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294330 + }, + { + "epoch": 1.4274958079374567, + "grad_norm": 1.543137386761373e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294340 + }, + { + "epoch": 1.4275443061302928, + "grad_norm": 1.5495317029490252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294350 + }, + { + "epoch": 1.427592804323129, + "grad_norm": 1.319696366408607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294360 + }, + { + "epoch": 1.427641302515965, + "grad_norm": 1.2928137493872782e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294370 + }, + { + "epoch": 1.427689800708801, + "grad_norm": 1.2380921816657064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294380 + }, + { + "epoch": 1.4277382989016372, + "grad_norm": 1.387825818710553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294390 + }, + { + "epoch": 1.4277867970944733, + "grad_norm": 1.2271304967725882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294400 + }, + { + "epoch": 1.4278352952873092, + "grad_norm": 1.2256489299034001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294410 + }, + { + "epoch": 1.4278837934801454, + "grad_norm": 1.2067877150911954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294420 + }, + { + "epoch": 1.4279322916729815, + "grad_norm": 1.2556072306324495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294430 + }, + { + "epoch": 1.4279807898658177, + "grad_norm": 1.51128563175007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294440 + }, + { + "epoch": 1.4280292880586538, + "grad_norm": 1.1959448329434963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294450 + }, + { + "epoch": 1.4280777862514897, + "grad_norm": 1.2706749430435593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294460 + }, + { + "epoch": 1.4281262844443259, + "grad_norm": 1.1111285402876092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294470 + }, + { + "epoch": 1.428174782637162, + "grad_norm": 1.1815330935860402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294480 + }, + { + "epoch": 1.428223280829998, + "grad_norm": 1.2430907645466505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294490 + }, + { + "epoch": 1.428271779022834, + "grad_norm": 1.0948970157187432e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294500 + }, + { + "epoch": 1.4283202772156702, + "grad_norm": 1.0734904662967892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294510 + }, + { + "epoch": 1.4283687754085064, + "grad_norm": 1.0706685316108633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294520 + }, + { + "epoch": 1.4284172736013425, + "grad_norm": 1.0916280643868959e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294530 + }, + { + "epoch": 1.4284657717941784, + "grad_norm": 7.647350685147103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294540 + }, + { + "epoch": 1.4285142699870146, + "grad_norm": 1.0092850288856425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294550 + }, + { + "epoch": 1.4285627681798507, + "grad_norm": 1.0303375574949314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294560 + }, + { + "epoch": 1.4286112663726867, + "grad_norm": 1.0415952829134767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294570 + }, + { + "epoch": 1.4286597645655228, + "grad_norm": 9.733821570989676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294580 + }, + { + "epoch": 1.428708262758359, + "grad_norm": 1.0983769698214019e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294590 + }, + { + "epoch": 1.428756760951195, + "grad_norm": 1.0168212156713707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294600 + }, + { + "epoch": 1.4288052591440312, + "grad_norm": 1.0400361816209625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294610 + }, + { + "epoch": 1.4288537573368671, + "grad_norm": 1.0647910357874935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294620 + }, + { + "epoch": 1.4289022555297033, + "grad_norm": 1.0363453384343302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294630 + }, + { + "epoch": 1.4289507537225394, + "grad_norm": 1.180252525045944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294640 + }, + { + "epoch": 1.4289992519153756, + "grad_norm": 9.314707085650298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294650 + }, + { + "epoch": 1.4290477501082117, + "grad_norm": 9.810581786950934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294660 + }, + { + "epoch": 1.4290962483010476, + "grad_norm": 9.632458386477083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294670 + }, + { + "epoch": 1.4291447464938838, + "grad_norm": 9.804240335142822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294680 + }, + { + "epoch": 1.42919324468672, + "grad_norm": 1.023941649691551e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294690 + }, + { + "epoch": 1.4292417428795559, + "grad_norm": 9.429813303540868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294700 + }, + { + "epoch": 1.429290241072392, + "grad_norm": 9.110455607697077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294710 + }, + { + "epoch": 1.4293387392652281, + "grad_norm": 8.678937319928082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294720 + }, + { + "epoch": 1.4293872374580643, + "grad_norm": 8.537793974028318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294730 + }, + { + "epoch": 1.4294357356509004, + "grad_norm": 9.430810337107687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294740 + }, + { + "epoch": 1.4294842338437364, + "grad_norm": 8.733258027859847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294750 + }, + { + "epoch": 1.4295327320365725, + "grad_norm": 8.447560162494483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294760 + }, + { + "epoch": 1.4295812302294086, + "grad_norm": 8.536492259736406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294770 + }, + { + "epoch": 1.4296297284222446, + "grad_norm": 8.609708288531692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294780 + }, + { + "epoch": 1.4296782266150807, + "grad_norm": 9.75692614701984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294790 + }, + { + "epoch": 1.4297267248079168, + "grad_norm": 8.694207735970849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294800 + }, + { + "epoch": 1.429775223000753, + "grad_norm": 8.211448516703967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294810 + }, + { + "epoch": 1.4298237211935891, + "grad_norm": 8.47970909489959e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294820 + }, + { + "epoch": 1.429872219386425, + "grad_norm": 9.63153638622316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294830 + }, + { + "epoch": 1.4299207175792612, + "grad_norm": 1.00286627002788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294840 + }, + { + "epoch": 1.4299692157720973, + "grad_norm": 8.243977731581253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294850 + }, + { + "epoch": 1.4300177139649333, + "grad_norm": 8.024005069273699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294860 + }, + { + "epoch": 1.4300662121577694, + "grad_norm": 7.520162625951343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294870 + }, + { + "epoch": 1.4301147103506056, + "grad_norm": 8.029491027627955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294880 + }, + { + "epoch": 1.4301632085434417, + "grad_norm": 9.367156508233165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294890 + }, + { + "epoch": 1.4302117067362778, + "grad_norm": 7.640229000571708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294900 + }, + { + "epoch": 1.4302602049291138, + "grad_norm": 9.630550721340114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294910 + }, + { + "epoch": 1.43030870312195, + "grad_norm": 7.7518711805169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294920 + }, + { + "epoch": 1.430357201314786, + "grad_norm": 7.89341243034869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294930 + }, + { + "epoch": 1.430405699507622, + "grad_norm": 8.004430469554791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294940 + }, + { + "epoch": 1.4304541977004581, + "grad_norm": 7.382308240266866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294950 + }, + { + "epoch": 1.4305026958932943, + "grad_norm": 7.345981885009678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294960 + }, + { + "epoch": 1.4305511940861304, + "grad_norm": 7.611566843479523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294970 + }, + { + "epoch": 1.4305996922789666, + "grad_norm": 7.565539590359549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294980 + }, + { + "epoch": 1.4306481904718025, + "grad_norm": 8.42548843138502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 294990 + }, + { + "epoch": 1.4306966886646386, + "grad_norm": 7.303156621674134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295000 + }, + { + "epoch": 1.4307451868574748, + "grad_norm": 7.367152647930197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295010 + }, + { + "epoch": 1.4307936850503107, + "grad_norm": 7.268429840223689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295020 + }, + { + "epoch": 1.4308421832431468, + "grad_norm": 7.26801715700276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295030 + }, + { + "epoch": 1.430890681435983, + "grad_norm": 7.736481393294525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295040 + }, + { + "epoch": 1.4309391796288191, + "grad_norm": 6.857707717244921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295050 + }, + { + "epoch": 1.4309876778216553, + "grad_norm": 6.844522317805968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295060 + }, + { + "epoch": 1.4310361760144912, + "grad_norm": 7.095171099535946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295070 + }, + { + "epoch": 1.4310846742073273, + "grad_norm": 6.946239636818063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295080 + }, + { + "epoch": 1.4311331724001635, + "grad_norm": 8.04442777280201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295090 + }, + { + "epoch": 1.4311816705929996, + "grad_norm": 6.360947395478433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295100 + }, + { + "epoch": 1.4312301687858358, + "grad_norm": 6.757675237167859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295110 + }, + { + "epoch": 1.4312786669786717, + "grad_norm": 6.235941896193253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295120 + }, + { + "epoch": 1.4313271651715078, + "grad_norm": 5.924488277742057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295130 + }, + { + "epoch": 1.431375663364344, + "grad_norm": 6.578487727892934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295140 + }, + { + "epoch": 1.4314241615571799, + "grad_norm": 6.404101782209182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295150 + }, + { + "epoch": 1.431472659750016, + "grad_norm": 6.259405154196429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295160 + }, + { + "epoch": 1.4315211579428522, + "grad_norm": 6.125009122115443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295170 + }, + { + "epoch": 1.4315696561356883, + "grad_norm": 5.859651537321042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295180 + }, + { + "epoch": 1.4316181543285245, + "grad_norm": 5.884234042241587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295190 + }, + { + "epoch": 1.4316666525213604, + "grad_norm": 5.848052637702494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295200 + }, + { + "epoch": 1.4317151507141965, + "grad_norm": 6.273521648836322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295210 + }, + { + "epoch": 1.4317636489070327, + "grad_norm": 5.74631428662542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295220 + }, + { + "epoch": 1.4318121470998686, + "grad_norm": 5.863994942956197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295230 + }, + { + "epoch": 1.4318606452927047, + "grad_norm": 6.811449111410184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295240 + }, + { + "epoch": 1.4319091434855409, + "grad_norm": 6.223764899004891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295250 + }, + { + "epoch": 1.431957641678377, + "grad_norm": 5.937137075306964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295260 + }, + { + "epoch": 1.4320061398712132, + "grad_norm": 6.189839041326195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295270 + }, + { + "epoch": 1.432054638064049, + "grad_norm": 5.45455463907274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295280 + }, + { + "epoch": 1.4321031362568852, + "grad_norm": 5.489261525326583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295290 + }, + { + "epoch": 1.4321516344497214, + "grad_norm": 5.598136567641632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295300 + }, + { + "epoch": 1.4322001326425573, + "grad_norm": 5.93098434364947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295310 + }, + { + "epoch": 1.4322486308353934, + "grad_norm": 5.766357276115741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295320 + }, + { + "epoch": 1.4322971290282296, + "grad_norm": 5.414584052232385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295330 + }, + { + "epoch": 1.4323456272210657, + "grad_norm": 5.420951083578984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295340 + }, + { + "epoch": 1.4323941254139019, + "grad_norm": 5.57262978873041e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295350 + }, + { + "epoch": 1.4324426236067378, + "grad_norm": 5.821857484988868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295360 + }, + { + "epoch": 1.432491121799574, + "grad_norm": 5.925631398895348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295370 + }, + { + "epoch": 1.43253961999241, + "grad_norm": 5.472675752571377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295380 + }, + { + "epoch": 1.432588118185246, + "grad_norm": 5.275624062051065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295390 + }, + { + "epoch": 1.4326366163780822, + "grad_norm": 5.536273306461226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295400 + }, + { + "epoch": 1.4326851145709183, + "grad_norm": 5.283752670948161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295410 + }, + { + "epoch": 1.4327336127637544, + "grad_norm": 5.182806717130006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295420 + }, + { + "epoch": 1.4327821109565906, + "grad_norm": 5.311370046001684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295430 + }, + { + "epoch": 1.4328306091494265, + "grad_norm": 5.42207601483824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295440 + }, + { + "epoch": 1.4328791073422626, + "grad_norm": 5.473002602229826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295450 + }, + { + "epoch": 1.4329276055350988, + "grad_norm": 5.123296773490438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295460 + }, + { + "epoch": 1.4329761037279347, + "grad_norm": 5.025853511142486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295470 + }, + { + "epoch": 1.4330246019207709, + "grad_norm": 4.905847390546114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295480 + }, + { + "epoch": 1.433073100113607, + "grad_norm": 4.796510779669916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295490 + }, + { + "epoch": 1.4331215983064431, + "grad_norm": 5.185653435546556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295500 + }, + { + "epoch": 1.4331700964992793, + "grad_norm": 5.184772930988402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295510 + }, + { + "epoch": 1.4332185946921152, + "grad_norm": 5.217731882112275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295520 + }, + { + "epoch": 1.4332670928849514, + "grad_norm": 4.893399818683974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295530 + }, + { + "epoch": 1.4333155910777875, + "grad_norm": 4.890609375252097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295540 + }, + { + "epoch": 1.4333640892706234, + "grad_norm": 4.749768152123579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295550 + }, + { + "epoch": 1.4334125874634596, + "grad_norm": 4.886387614533305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295560 + }, + { + "epoch": 1.4334610856562957, + "grad_norm": 5.56334327939112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295570 + }, + { + "epoch": 1.4335095838491319, + "grad_norm": 4.850952564083855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295580 + }, + { + "epoch": 1.433558082041968, + "grad_norm": 4.780856670549838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295590 + }, + { + "epoch": 1.433606580234804, + "grad_norm": 4.994155915483134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295600 + }, + { + "epoch": 1.43365507842764, + "grad_norm": 4.750155255806021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295610 + }, + { + "epoch": 1.4337035766204762, + "grad_norm": 4.83663257000444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295620 + }, + { + "epoch": 1.4337520748133123, + "grad_norm": 4.6905674366826133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295630 + }, + { + "epoch": 1.4338005730061485, + "grad_norm": 4.7659750634920783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295640 + }, + { + "epoch": 1.4338490711989844, + "grad_norm": 4.5937633785797516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295650 + }, + { + "epoch": 1.4338975693918206, + "grad_norm": 4.7003436520753894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295660 + }, + { + "epoch": 1.4339460675846567, + "grad_norm": 4.6875462089701614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295670 + }, + { + "epoch": 1.4339945657774926, + "grad_norm": 4.580190591241262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295680 + }, + { + "epoch": 1.4340430639703288, + "grad_norm": 4.733838068204932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295690 + }, + { + "epoch": 1.434091562163165, + "grad_norm": 4.57944821619094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295700 + }, + { + "epoch": 1.434140060356001, + "grad_norm": 4.564910227600194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295710 + }, + { + "epoch": 1.4341885585488372, + "grad_norm": 4.489366176585463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295720 + }, + { + "epoch": 1.4342370567416731, + "grad_norm": 4.588913213865453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295730 + }, + { + "epoch": 1.4342855549345093, + "grad_norm": 4.41613451584999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295740 + }, + { + "epoch": 1.4343340531273454, + "grad_norm": 4.459153046809661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295750 + }, + { + "epoch": 1.4343825513201813, + "grad_norm": 4.880478172708536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295760 + }, + { + "epoch": 1.4344310495130175, + "grad_norm": 4.2708052205853164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295770 + }, + { + "epoch": 1.4344795477058536, + "grad_norm": 4.364800076928077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295780 + }, + { + "epoch": 1.4345280458986898, + "grad_norm": 4.232944377235981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295790 + }, + { + "epoch": 1.434576544091526, + "grad_norm": 4.3347336031729355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295800 + }, + { + "epoch": 1.4346250422843618, + "grad_norm": 4.3895099111068703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295810 + }, + { + "epoch": 1.434673540477198, + "grad_norm": 5.153185611561639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295820 + }, + { + "epoch": 1.4347220386700341, + "grad_norm": 4.293368078833737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295830 + }, + { + "epoch": 1.43477053686287, + "grad_norm": 4.4705603841066477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295840 + }, + { + "epoch": 1.4348190350557062, + "grad_norm": 4.2058692883983895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295850 + }, + { + "epoch": 1.4348675332485423, + "grad_norm": 4.224147858167271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295860 + }, + { + "epoch": 1.4349160314413785, + "grad_norm": 4.478525283957424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295870 + }, + { + "epoch": 1.4349645296342146, + "grad_norm": 4.172567287241691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295880 + }, + { + "epoch": 1.4350130278270505, + "grad_norm": 3.92093312484576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295890 + }, + { + "epoch": 1.4350615260198867, + "grad_norm": 5.194423283683136e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295900 + }, + { + "epoch": 1.4351100242127228, + "grad_norm": 6.653652917520958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295910 + }, + { + "epoch": 1.4351585224055587, + "grad_norm": 3.414899401832372e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295920 + }, + { + "epoch": 1.435207020598395, + "grad_norm": 2.163389763154555e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295930 + }, + { + "epoch": 1.435255518791231, + "grad_norm": 1.5756415905343601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295940 + }, + { + "epoch": 1.4353040169840672, + "grad_norm": 8.114101888168079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295950 + }, + { + "epoch": 1.4353525151769033, + "grad_norm": 7.903636287664995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295960 + }, + { + "epoch": 1.4354010133697392, + "grad_norm": 7.10918527602189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295970 + }, + { + "epoch": 1.4354495115625754, + "grad_norm": 6.755753929610364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295980 + }, + { + "epoch": 1.4354980097554115, + "grad_norm": 1.352884737571003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 295990 + }, + { + "epoch": 1.4355465079482475, + "grad_norm": 7.500503897972521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296000 + }, + { + "epoch": 1.4355950061410836, + "grad_norm": 6.655472475358692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296010 + }, + { + "epoch": 1.4356435043339197, + "grad_norm": 7.256898015839397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296020 + }, + { + "epoch": 1.4356920025267559, + "grad_norm": 7.355531010944105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296030 + }, + { + "epoch": 1.435740500719592, + "grad_norm": 1.05903097846749e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296040 + }, + { + "epoch": 1.435788998912428, + "grad_norm": 5.984092581456935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296050 + }, + { + "epoch": 1.435837497105264, + "grad_norm": 6.279686317611777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296060 + }, + { + "epoch": 1.4358859952981002, + "grad_norm": 5.702017915609758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296070 + }, + { + "epoch": 1.4359344934909362, + "grad_norm": 5.824130084874923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296080 + }, + { + "epoch": 1.4359829916837723, + "grad_norm": 8.834055051920586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296090 + }, + { + "epoch": 1.4360314898766084, + "grad_norm": 5.461817522700585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296100 + }, + { + "epoch": 1.4360799880694446, + "grad_norm": 5.920196599618066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296110 + }, + { + "epoch": 1.4361284862622807, + "grad_norm": 5.576369517257262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296120 + }, + { + "epoch": 1.4361769844551167, + "grad_norm": 6.651229114140733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296130 + }, + { + "epoch": 1.4362254826479528, + "grad_norm": 8.035626706259791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296140 + }, + { + "epoch": 1.436273980840789, + "grad_norm": 5.21762785865576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296150 + }, + { + "epoch": 1.436322479033625, + "grad_norm": 4.968824214302003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296160 + }, + { + "epoch": 1.4363709772264612, + "grad_norm": 5.303680268298194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296170 + }, + { + "epoch": 1.4364194754192972, + "grad_norm": 5.465520871439367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296180 + }, + { + "epoch": 1.4364679736121333, + "grad_norm": 8.13911924524291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296190 + }, + { + "epoch": 1.4365164718049694, + "grad_norm": 0.03168100863695145, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 296200 + }, + { + "epoch": 1.4365649699978054, + "grad_norm": 0.0006436237599700689, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 296210 + }, + { + "epoch": 1.4366134681906415, + "grad_norm": 9.173365106107667e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296220 + }, + { + "epoch": 1.4366619663834777, + "grad_norm": 2.933673567895312e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296230 + }, + { + "epoch": 1.4367104645763138, + "grad_norm": 1.1703815289365593e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296240 + }, + { + "epoch": 1.43675896276915, + "grad_norm": 5.8282771533413325e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296250 + }, + { + "epoch": 1.4368074609619859, + "grad_norm": 7.848558198020328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296260 + }, + { + "epoch": 1.436855959154822, + "grad_norm": 1.1218097824894357e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296270 + }, + { + "epoch": 1.4369044573476581, + "grad_norm": 3.475617404546938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296280 + }, + { + "epoch": 1.436952955540494, + "grad_norm": 3.716333367265179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296290 + }, + { + "epoch": 1.4370014537333302, + "grad_norm": 2.550772251197486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296300 + }, + { + "epoch": 1.4370499519261664, + "grad_norm": 2.2816388991486747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296310 + }, + { + "epoch": 1.4370984501190025, + "grad_norm": 2.3524846710643033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296320 + }, + { + "epoch": 1.4371469483118386, + "grad_norm": 2.54860947279667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296330 + }, + { + "epoch": 1.4371954465046746, + "grad_norm": 3.5484511045069667e-06, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 296340 + }, + { + "epoch": 1.4372439446975107, + "grad_norm": 0.0003080039459746331, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296350 + }, + { + "epoch": 1.4372924428903469, + "grad_norm": 4.781491225003265e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296360 + }, + { + "epoch": 1.4373409410831828, + "grad_norm": 2.1718677089666016e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296370 + }, + { + "epoch": 1.437389439276019, + "grad_norm": 1.563531805004459e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296380 + }, + { + "epoch": 1.437437937468855, + "grad_norm": 2.035872239503078e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296390 + }, + { + "epoch": 1.4374864356616912, + "grad_norm": 8.957755198935047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296400 + }, + { + "epoch": 1.4375349338545274, + "grad_norm": 8.124042324197944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296410 + }, + { + "epoch": 1.4375834320473633, + "grad_norm": 6.708332421112573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296420 + }, + { + "epoch": 1.4376319302401994, + "grad_norm": 7.989016921783332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296430 + }, + { + "epoch": 1.4376804284330356, + "grad_norm": 9.583653991285246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296440 + }, + { + "epoch": 1.4377289266258715, + "grad_norm": 5.719122327718651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296450 + }, + { + "epoch": 1.4377774248187076, + "grad_norm": 6.4131177168746945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296460 + }, + { + "epoch": 1.4378259230115438, + "grad_norm": 4.96293341711862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296470 + }, + { + "epoch": 1.43787442120438, + "grad_norm": 5.260168109089136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296480 + }, + { + "epoch": 1.437922919397216, + "grad_norm": 7.07925892129424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296490 + }, + { + "epoch": 1.437971417590052, + "grad_norm": 4.475855803320883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296500 + }, + { + "epoch": 1.4380199157828881, + "grad_norm": 6.234003194549587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296510 + }, + { + "epoch": 1.4380684139757243, + "grad_norm": 3.8340972423611674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296520 + }, + { + "epoch": 1.4381169121685602, + "grad_norm": 6.445677627198165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296530 + }, + { + "epoch": 1.4381654103613963, + "grad_norm": 5.180373136681737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296540 + }, + { + "epoch": 1.4382139085542325, + "grad_norm": 3.6059770991414553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296550 + }, + { + "epoch": 1.4382624067470686, + "grad_norm": 3.3467206321802223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296560 + }, + { + "epoch": 1.4383109049399048, + "grad_norm": 3.440063210291555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296570 + }, + { + "epoch": 1.4383594031327407, + "grad_norm": 3.249994051657268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296580 + }, + { + "epoch": 1.4384079013255768, + "grad_norm": 4.471885404200293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296590 + }, + { + "epoch": 1.438456399518413, + "grad_norm": 2.9751372494501993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296600 + }, + { + "epoch": 1.438504897711249, + "grad_norm": 2.6542393243289553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296610 + }, + { + "epoch": 1.438553395904085, + "grad_norm": 4.184606950730085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296620 + }, + { + "epoch": 1.4386018940969212, + "grad_norm": 2.6708994482760318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296630 + }, + { + "epoch": 1.4386503922897573, + "grad_norm": 3.96760924559203e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296640 + }, + { + "epoch": 1.4386988904825935, + "grad_norm": 2.539916295063449e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296650 + }, + { + "epoch": 1.4387473886754294, + "grad_norm": 2.5007122985698516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296660 + }, + { + "epoch": 1.4387958868682655, + "grad_norm": 2.468273578415392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296670 + }, + { + "epoch": 1.4388443850611017, + "grad_norm": 2.98632335216098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296680 + }, + { + "epoch": 1.4388928832539378, + "grad_norm": 3.261068059146055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296690 + }, + { + "epoch": 1.438941381446774, + "grad_norm": 2.415486960671842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296700 + }, + { + "epoch": 1.43898987963961, + "grad_norm": 1.938033619808266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296710 + }, + { + "epoch": 1.439038377832446, + "grad_norm": 2.2145663933770265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296720 + }, + { + "epoch": 1.4390868760252822, + "grad_norm": 2.4357652819162467e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296730 + }, + { + "epoch": 1.439135374218118, + "grad_norm": 3.902793650922831e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296740 + }, + { + "epoch": 1.4391838724109542, + "grad_norm": 1.8101555951943737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296750 + }, + { + "epoch": 1.4392323706037904, + "grad_norm": 1.7477447045166628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296760 + }, + { + "epoch": 1.4392808687966265, + "grad_norm": 1.6620439282633015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296770 + }, + { + "epoch": 1.4393293669894627, + "grad_norm": 1.7122132476288243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296780 + }, + { + "epoch": 1.4393778651822986, + "grad_norm": 2.0393356408021646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296790 + }, + { + "epoch": 1.4394263633751347, + "grad_norm": 1.1399311006243806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296800 + }, + { + "epoch": 1.4394748615679709, + "grad_norm": 2.0508691704890225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296810 + }, + { + "epoch": 1.4395233597608068, + "grad_norm": 1.6084201206467696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296820 + }, + { + "epoch": 1.439571857953643, + "grad_norm": 2.2192350570549024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296830 + }, + { + "epoch": 1.439620356146479, + "grad_norm": 8.396793418796733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296840 + }, + { + "epoch": 1.4396688543393152, + "grad_norm": 1.1971445701419725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296850 + }, + { + "epoch": 1.4397173525321514, + "grad_norm": 3.5030677736358484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296860 + }, + { + "epoch": 1.4397658507249873, + "grad_norm": 1.1155302672705147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296870 + }, + { + "epoch": 1.4398143489178234, + "grad_norm": 1.170326413557632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296880 + }, + { + "epoch": 1.4398628471106596, + "grad_norm": 2.041594598267693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296890 + }, + { + "epoch": 1.4399113453034955, + "grad_norm": 8.52284392749425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296900 + }, + { + "epoch": 1.4399598434963317, + "grad_norm": 1.080402853403939e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296910 + }, + { + "epoch": 1.4400083416891678, + "grad_norm": 1.0715709777286975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296920 + }, + { + "epoch": 1.440056839882004, + "grad_norm": 1.0118250202140189e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296930 + }, + { + "epoch": 1.44010533807484, + "grad_norm": 4.477661150303902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296940 + }, + { + "epoch": 1.440153836267676, + "grad_norm": 1.1512358923937427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296950 + }, + { + "epoch": 1.4402023344605122, + "grad_norm": 9.267390055356373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296960 + }, + { + "epoch": 1.4402508326533483, + "grad_norm": 9.410888424099539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296970 + }, + { + "epoch": 1.4402993308461842, + "grad_norm": 9.814008308239863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296980 + }, + { + "epoch": 1.4403478290390204, + "grad_norm": 5.915221208852017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 296990 + }, + { + "epoch": 1.4403963272318565, + "grad_norm": 1.0005300055127009e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297000 + }, + { + "epoch": 1.4404448254246927, + "grad_norm": 8.67050914621359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297010 + }, + { + "epoch": 1.4404933236175288, + "grad_norm": 1.4071216583033674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297020 + }, + { + "epoch": 1.4405418218103647, + "grad_norm": 3.9972960621526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297030 + }, + { + "epoch": 1.4405903200032009, + "grad_norm": 1.4345200725074392e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297040 + }, + { + "epoch": 1.440638818196037, + "grad_norm": 9.936939022736624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297050 + }, + { + "epoch": 1.440687316388873, + "grad_norm": 8.267869020528451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297060 + }, + { + "epoch": 1.440735814581709, + "grad_norm": 1.4622935395891545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297070 + }, + { + "epoch": 1.4407843127745452, + "grad_norm": 9.468339499107969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297080 + }, + { + "epoch": 1.4408328109673814, + "grad_norm": 9.091478432310396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297090 + }, + { + "epoch": 1.4408813091602175, + "grad_norm": 9.306352239946136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297100 + }, + { + "epoch": 1.4409298073530534, + "grad_norm": 9.276429295823618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297110 + }, + { + "epoch": 1.4409783055458896, + "grad_norm": 6.899792879266897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297120 + }, + { + "epoch": 1.4410268037387257, + "grad_norm": 7.996320050551731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297130 + }, + { + "epoch": 1.4410753019315619, + "grad_norm": 1.0761243629531236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297140 + }, + { + "epoch": 1.441123800124398, + "grad_norm": 7.215863888632157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297150 + }, + { + "epoch": 1.441172298317234, + "grad_norm": 7.787527920299908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297160 + }, + { + "epoch": 1.44122079651007, + "grad_norm": 7.300176321223262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297170 + }, + { + "epoch": 1.4412692947029062, + "grad_norm": 8.346316349161498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297180 + }, + { + "epoch": 1.4413177928957421, + "grad_norm": 1.0196177981924848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297190 + }, + { + "epoch": 1.4413662910885783, + "grad_norm": 8.448779453829047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297200 + }, + { + "epoch": 1.4414147892814144, + "grad_norm": 6.312642426564707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297210 + }, + { + "epoch": 1.4414632874742506, + "grad_norm": 6.265202614486043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297220 + }, + { + "epoch": 1.4415117856670867, + "grad_norm": 7.374512733804295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297230 + }, + { + "epoch": 1.4415602838599226, + "grad_norm": 8.819342269816843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297240 + }, + { + "epoch": 1.4416087820527588, + "grad_norm": 6.813797881477512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297250 + }, + { + "epoch": 1.441657280245595, + "grad_norm": 1.8323272570341942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297260 + }, + { + "epoch": 1.4417057784384308, + "grad_norm": 6.696142236251035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297270 + }, + { + "epoch": 1.441754276631267, + "grad_norm": 8.187504363377229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297280 + }, + { + "epoch": 1.4418027748241031, + "grad_norm": 7.794601515342947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297290 + }, + { + "epoch": 1.4418512730169393, + "grad_norm": 6.437935553549323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297300 + }, + { + "epoch": 1.4418997712097754, + "grad_norm": 6.417614031306584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297310 + }, + { + "epoch": 1.4419482694026113, + "grad_norm": 1.908400236061425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297320 + }, + { + "epoch": 1.4419967675954475, + "grad_norm": 6.124697620180086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297330 + }, + { + "epoch": 1.4420452657882836, + "grad_norm": 7.494883220715565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297340 + }, + { + "epoch": 1.4420937639811195, + "grad_norm": 6.200434086167661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297350 + }, + { + "epoch": 1.4421422621739557, + "grad_norm": 5.604521788882266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297360 + }, + { + "epoch": 1.4421907603667918, + "grad_norm": 6.213019787537633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297370 + }, + { + "epoch": 1.442239258559628, + "grad_norm": 5.848613113812462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297380 + }, + { + "epoch": 1.4422877567524641, + "grad_norm": 7.712400247328333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297390 + }, + { + "epoch": 1.4423362549453, + "grad_norm": 7.406349595839856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297400 + }, + { + "epoch": 1.4423847531381362, + "grad_norm": 5.816071961817215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297410 + }, + { + "epoch": 1.4424332513309723, + "grad_norm": 5.585049507317308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297420 + }, + { + "epoch": 1.4424817495238083, + "grad_norm": 6.283638640525169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297430 + }, + { + "epoch": 1.4425302477166444, + "grad_norm": 7.4797424076678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297440 + }, + { + "epoch": 1.4425787459094805, + "grad_norm": 8.097903787529503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297450 + }, + { + "epoch": 1.4426272441023167, + "grad_norm": 3.939850103051867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297460 + }, + { + "epoch": 1.4426757422951528, + "grad_norm": 6.179081424306787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297470 + }, + { + "epoch": 1.4427242404879888, + "grad_norm": 5.465404910864891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297480 + }, + { + "epoch": 1.442772738680825, + "grad_norm": 6.591412784473505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297490 + }, + { + "epoch": 1.442821236873661, + "grad_norm": 5.509486413757259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297500 + }, + { + "epoch": 1.442869735066497, + "grad_norm": 5.429949965218839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297510 + }, + { + "epoch": 1.442918233259333, + "grad_norm": 5.058215606368321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297520 + }, + { + "epoch": 1.4429667314521692, + "grad_norm": 5.250482786323118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297530 + }, + { + "epoch": 1.4430152296450054, + "grad_norm": 6.34382558928337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297540 + }, + { + "epoch": 1.4430637278378415, + "grad_norm": 4.65572782104573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297550 + }, + { + "epoch": 1.4431122260306775, + "grad_norm": 4.741662564811122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297560 + }, + { + "epoch": 1.4431607242235136, + "grad_norm": 4.789072818311979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297570 + }, + { + "epoch": 1.4432092224163497, + "grad_norm": 0.0036017929669469595, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 297580 + }, + { + "epoch": 1.4432577206091857, + "grad_norm": 6.462914825533517e-06, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 297590 + }, + { + "epoch": 1.4433062188020218, + "grad_norm": 1.9478904505376704e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297600 + }, + { + "epoch": 1.443354716994858, + "grad_norm": 5.043633791501634e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297610 + }, + { + "epoch": 1.443403215187694, + "grad_norm": 2.7932224838878028e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297620 + }, + { + "epoch": 1.4434517133805302, + "grad_norm": 1.7662097889115103e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297630 + }, + { + "epoch": 1.4435002115733662, + "grad_norm": 1.3390985259320587e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297640 + }, + { + "epoch": 1.4435487097662023, + "grad_norm": 4.465736310521606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297650 + }, + { + "epoch": 1.4435972079590385, + "grad_norm": 3.4175175187556306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297660 + }, + { + "epoch": 1.4436457061518746, + "grad_norm": 3.495033070066711e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297670 + }, + { + "epoch": 1.4436942043447107, + "grad_norm": 3.3241267374251038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297680 + }, + { + "epoch": 1.4437427025375467, + "grad_norm": 5.145294835529057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297690 + }, + { + "epoch": 1.4437912007303828, + "grad_norm": 2.723317493291688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297700 + }, + { + "epoch": 1.443839698923219, + "grad_norm": 2.7008395591110457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297710 + }, + { + "epoch": 1.4438881971160549, + "grad_norm": 2.607305304991314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297720 + }, + { + "epoch": 1.443936695308891, + "grad_norm": 2.427439540042542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297730 + }, + { + "epoch": 1.4439851935017272, + "grad_norm": 3.3354594961565454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297740 + }, + { + "epoch": 1.4440336916945633, + "grad_norm": 2.168811988667585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297750 + }, + { + "epoch": 1.4440821898873994, + "grad_norm": 2.1059136088297237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297760 + }, + { + "epoch": 1.4441306880802354, + "grad_norm": 2.1667854070983594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297770 + }, + { + "epoch": 1.4441791862730715, + "grad_norm": 2.0541867797874147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297780 + }, + { + "epoch": 1.4442276844659077, + "grad_norm": 2.9104623990861e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297790 + }, + { + "epoch": 1.4442761826587436, + "grad_norm": 8.760831406107172e-06, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 297800 + }, + { + "epoch": 1.4443246808515797, + "grad_norm": 1.8225266103399917e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 297810 + }, + { + "epoch": 1.4443731790444159, + "grad_norm": 0.0015613611321896315, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 297820 + }, + { + "epoch": 1.444421677237252, + "grad_norm": 0.00016716848767828196, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 297830 + }, + { + "epoch": 1.4444701754300882, + "grad_norm": 0.0001337451540166512, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297840 + }, + { + "epoch": 1.444518673622924, + "grad_norm": 4.145748243900016e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297850 + }, + { + "epoch": 1.4445671718157602, + "grad_norm": 2.6023242753581144e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297860 + }, + { + "epoch": 1.4446156700085964, + "grad_norm": 2.042407686531078e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297870 + }, + { + "epoch": 1.4446641682014323, + "grad_norm": 1.492040519224247e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297880 + }, + { + "epoch": 1.4447126663942684, + "grad_norm": 1.479277307225857e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297890 + }, + { + "epoch": 1.4447611645871046, + "grad_norm": 1.0801777534652501e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297900 + }, + { + "epoch": 1.4448096627799407, + "grad_norm": 8.95367338671349e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297910 + }, + { + "epoch": 1.4448581609727769, + "grad_norm": 8.450661880488042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297920 + }, + { + "epoch": 1.4449066591656128, + "grad_norm": 8.117855031741783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297930 + }, + { + "epoch": 1.444955157358449, + "grad_norm": 8.411388080276083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297940 + }, + { + "epoch": 1.445003655551285, + "grad_norm": 7.398023626592476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297950 + }, + { + "epoch": 1.445052153744121, + "grad_norm": 6.188142378960038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297960 + }, + { + "epoch": 1.4451006519369571, + "grad_norm": 6.055780886526918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297970 + }, + { + "epoch": 1.4451491501297933, + "grad_norm": 5.360729574022116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297980 + }, + { + "epoch": 1.4451976483226294, + "grad_norm": 6.231558472791221e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 297990 + }, + { + "epoch": 1.4452461465154656, + "grad_norm": 4.79107666251366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298000 + }, + { + "epoch": 1.4452946447083015, + "grad_norm": 4.876262210018467e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298010 + }, + { + "epoch": 1.4453431429011376, + "grad_norm": 4.241906026436482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298020 + }, + { + "epoch": 1.4453916410939738, + "grad_norm": 4.003953108622227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298030 + }, + { + "epoch": 1.4454401392868097, + "grad_norm": 4.620930667442735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298040 + }, + { + "epoch": 1.4454886374796458, + "grad_norm": 3.842369551421143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298050 + }, + { + "epoch": 1.445537135672482, + "grad_norm": 3.7076317767059663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298060 + }, + { + "epoch": 1.4455856338653181, + "grad_norm": 3.5634404866868863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298070 + }, + { + "epoch": 1.4456341320581543, + "grad_norm": 3.7351389892137377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298080 + }, + { + "epoch": 1.4456826302509902, + "grad_norm": 3.980791007052176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298090 + }, + { + "epoch": 1.4457311284438263, + "grad_norm": 3.331757625346654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298100 + }, + { + "epoch": 1.4457796266366625, + "grad_norm": 3.1310285066865617e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298110 + }, + { + "epoch": 1.4458281248294984, + "grad_norm": 3.0602004699176177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298120 + }, + { + "epoch": 1.4458766230223346, + "grad_norm": 3.021026032001828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298130 + }, + { + "epoch": 1.4459251212151707, + "grad_norm": 3.323749751871219e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298140 + }, + { + "epoch": 1.4459736194080068, + "grad_norm": 2.978185193569516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298150 + }, + { + "epoch": 1.446022117600843, + "grad_norm": 2.5544229629304027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298160 + }, + { + "epoch": 1.446070615793679, + "grad_norm": 2.58675800068886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298170 + }, + { + "epoch": 1.446119113986515, + "grad_norm": 2.5230751816707198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298180 + }, + { + "epoch": 1.4461676121793512, + "grad_norm": 2.974934886879055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298190 + }, + { + "epoch": 1.4462161103721873, + "grad_norm": 2.3186898943095002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298200 + }, + { + "epoch": 1.4462646085650235, + "grad_norm": 2.294887963216752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298210 + }, + { + "epoch": 1.4463131067578594, + "grad_norm": 2.2939655082154786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298220 + }, + { + "epoch": 1.4463616049506955, + "grad_norm": 2.3539262201666133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298230 + }, + { + "epoch": 1.4464101031435317, + "grad_norm": 2.7002145088772522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298240 + }, + { + "epoch": 1.4464586013363676, + "grad_norm": 2.153316700059804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298250 + }, + { + "epoch": 1.4465070995292038, + "grad_norm": 2.0186216715956107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298260 + }, + { + "epoch": 1.44655559772204, + "grad_norm": 1.9946264728787355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298270 + }, + { + "epoch": 1.446604095914876, + "grad_norm": 2.108165745084989e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298280 + }, + { + "epoch": 1.4466525941077122, + "grad_norm": 2.324100250916672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298290 + }, + { + "epoch": 1.446701092300548, + "grad_norm": 1.9309438812342705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298300 + }, + { + "epoch": 1.4467495904933843, + "grad_norm": 1.8130779153580079e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298310 + }, + { + "epoch": 1.4467980886862204, + "grad_norm": 1.921367129398277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298320 + }, + { + "epoch": 1.4468465868790563, + "grad_norm": 1.960325107575045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298330 + }, + { + "epoch": 1.4468950850718925, + "grad_norm": 2.157460130547406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298340 + }, + { + "epoch": 1.4469435832647286, + "grad_norm": 1.6521786392331705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298350 + }, + { + "epoch": 1.4469920814575647, + "grad_norm": 1.8203913896286394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298360 + }, + { + "epoch": 1.447040579650401, + "grad_norm": 1.497879679845937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298370 + }, + { + "epoch": 1.4470890778432368, + "grad_norm": 1.6394868680436048e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298380 + }, + { + "epoch": 1.447137576036073, + "grad_norm": 1.9379231162020005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298390 + }, + { + "epoch": 1.447186074228909, + "grad_norm": 1.5074252814883948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298400 + }, + { + "epoch": 1.447234572421745, + "grad_norm": 1.4181968026605318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298410 + }, + { + "epoch": 1.4472830706145812, + "grad_norm": 1.4286930536400178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298420 + }, + { + "epoch": 1.4473315688074173, + "grad_norm": 1.3921643358116853e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298430 + }, + { + "epoch": 1.4473800670002535, + "grad_norm": 1.802999463507149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298440 + }, + { + "epoch": 1.4474285651930896, + "grad_norm": 1.3965762946099858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298450 + }, + { + "epoch": 1.4474770633859255, + "grad_norm": 1.1449559451648383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298460 + }, + { + "epoch": 1.4475255615787617, + "grad_norm": 1.241909103555372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298470 + }, + { + "epoch": 1.4475740597715978, + "grad_norm": 1.1455877029220574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298480 + }, + { + "epoch": 1.4476225579644337, + "grad_norm": 1.5399865560539183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298490 + }, + { + "epoch": 1.4476710561572699, + "grad_norm": 1.1633317171799717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298500 + }, + { + "epoch": 1.447719554350106, + "grad_norm": 1.0988818530677236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298510 + }, + { + "epoch": 1.4477680525429422, + "grad_norm": 1.112734366870427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298520 + }, + { + "epoch": 1.4478165507357783, + "grad_norm": 1.042163603415247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298530 + }, + { + "epoch": 1.4478650489286142, + "grad_norm": 1.4151368077364168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298540 + }, + { + "epoch": 1.4479135471214504, + "grad_norm": 1.0203833653577021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298550 + }, + { + "epoch": 1.4479620453142865, + "grad_norm": 9.842464123721584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298560 + }, + { + "epoch": 1.4480105435071224, + "grad_norm": 1.0297432027073228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298570 + }, + { + "epoch": 1.4480590416999586, + "grad_norm": 1.0995280490533332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298580 + }, + { + "epoch": 1.4481075398927947, + "grad_norm": 1.154160599980969e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298590 + }, + { + "epoch": 1.4481560380856309, + "grad_norm": 8.675613116793102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298600 + }, + { + "epoch": 1.448204536278467, + "grad_norm": 8.483325473207515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298610 + }, + { + "epoch": 1.448253034471303, + "grad_norm": 8.212518309846928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298620 + }, + { + "epoch": 1.448301532664139, + "grad_norm": 8.342267960870231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298630 + }, + { + "epoch": 1.4483500308569752, + "grad_norm": 1.1757242646126542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298640 + }, + { + "epoch": 1.4483985290498111, + "grad_norm": 7.550277132395422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298650 + }, + { + "epoch": 1.4484470272426473, + "grad_norm": 8.448720905107621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298660 + }, + { + "epoch": 1.4484955254354834, + "grad_norm": 8.087257015176874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298670 + }, + { + "epoch": 1.4485440236283196, + "grad_norm": 8.080539828370092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298680 + }, + { + "epoch": 1.4485925218211557, + "grad_norm": 1.0242929420201108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298690 + }, + { + "epoch": 1.4486410200139916, + "grad_norm": 7.042188485684164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298700 + }, + { + "epoch": 1.4486895182068278, + "grad_norm": 7.353498858719831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298710 + }, + { + "epoch": 1.448738016399664, + "grad_norm": 6.758167501175194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298720 + }, + { + "epoch": 1.4487865145925, + "grad_norm": 6.738115985172044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298730 + }, + { + "epoch": 1.4488350127853362, + "grad_norm": 9.765594768396113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298740 + }, + { + "epoch": 1.4488835109781721, + "grad_norm": 6.509848162750131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298750 + }, + { + "epoch": 1.4489320091710083, + "grad_norm": 6.112182973083691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298760 + }, + { + "epoch": 1.4489805073638444, + "grad_norm": 6.510834964501555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298770 + }, + { + "epoch": 1.4490290055566803, + "grad_norm": 6.002676400385099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298780 + }, + { + "epoch": 1.4490775037495165, + "grad_norm": 8.998192697617924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298790 + }, + { + "epoch": 1.4491260019423526, + "grad_norm": 5.807764296150708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298800 + }, + { + "epoch": 1.4491745001351888, + "grad_norm": 5.983991400171362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298810 + }, + { + "epoch": 1.449222998328025, + "grad_norm": 5.90621198170993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298820 + }, + { + "epoch": 1.4492714965208608, + "grad_norm": 6.758590984645707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298830 + }, + { + "epoch": 1.449319994713697, + "grad_norm": 8.229491186284577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298840 + }, + { + "epoch": 1.4493684929065331, + "grad_norm": 5.778133527201135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298850 + }, + { + "epoch": 1.449416991099369, + "grad_norm": 5.148800710230717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298860 + }, + { + "epoch": 1.4494654892922052, + "grad_norm": 5.836960212945996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298870 + }, + { + "epoch": 1.4495139874850413, + "grad_norm": 5.422818389888562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298880 + }, + { + "epoch": 1.4495624856778775, + "grad_norm": 7.710248723924451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298890 + }, + { + "epoch": 1.4496109838707136, + "grad_norm": 5.129367650624772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298900 + }, + { + "epoch": 1.4496594820635496, + "grad_norm": 5.10520465013542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298910 + }, + { + "epoch": 1.4497079802563857, + "grad_norm": 5.120040214023902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298920 + }, + { + "epoch": 1.4497564784492218, + "grad_norm": 5.225680865805771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298930 + }, + { + "epoch": 1.4498049766420578, + "grad_norm": 7.032971893750073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298940 + }, + { + "epoch": 1.449853474834894, + "grad_norm": 4.4487083528110816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298950 + }, + { + "epoch": 1.44990197302773, + "grad_norm": 4.786481895280303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298960 + }, + { + "epoch": 1.4499504712205662, + "grad_norm": 4.7315444362538983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298970 + }, + { + "epoch": 1.4499989694134023, + "grad_norm": 5.327597705218068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298980 + }, + { + "epoch": 1.4500474676062383, + "grad_norm": 7.469755018973956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 298990 + }, + { + "epoch": 1.4500959657990744, + "grad_norm": 4.657995873458276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299000 + }, + { + "epoch": 1.4501444639919105, + "grad_norm": 4.313999113492173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299010 + }, + { + "epoch": 1.4501929621847465, + "grad_norm": 4.5674363491343684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299020 + }, + { + "epoch": 1.4502414603775826, + "grad_norm": 5.21571678291366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299030 + }, + { + "epoch": 1.4502899585704188, + "grad_norm": 6.197547008923721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299040 + }, + { + "epoch": 1.450338456763255, + "grad_norm": 3.899203306900745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299050 + }, + { + "epoch": 1.450386954956091, + "grad_norm": 4.5787211888637103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299060 + }, + { + "epoch": 1.450435453148927, + "grad_norm": 4.125176644720341e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299070 + }, + { + "epoch": 1.4504839513417631, + "grad_norm": 4.4182505121170834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299080 + }, + { + "epoch": 1.4505324495345993, + "grad_norm": 5.642807536787586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299090 + }, + { + "epoch": 1.4505809477274352, + "grad_norm": 3.7615160408677184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299100 + }, + { + "epoch": 1.4506294459202713, + "grad_norm": 4.1177781895385124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299110 + }, + { + "epoch": 1.4506779441131075, + "grad_norm": 4.2137833133892855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299120 + }, + { + "epoch": 1.4507264423059436, + "grad_norm": 4.281267820260837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299130 + }, + { + "epoch": 1.4507749404987798, + "grad_norm": 5.254340749161202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299140 + }, + { + "epoch": 1.4508234386916157, + "grad_norm": 3.957696890211082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299150 + }, + { + "epoch": 1.4508719368844518, + "grad_norm": 4.0933676359600213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299160 + }, + { + "epoch": 1.450920435077288, + "grad_norm": 3.598337627863657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299170 + }, + { + "epoch": 1.4509689332701239, + "grad_norm": 4.1539709627613775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299180 + }, + { + "epoch": 1.4510174314629602, + "grad_norm": 5.249488026493054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299190 + }, + { + "epoch": 1.4510659296557962, + "grad_norm": 3.635051086803287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299200 + }, + { + "epoch": 1.4511144278486323, + "grad_norm": 3.517614572956518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299210 + }, + { + "epoch": 1.4511629260414685, + "grad_norm": 3.8724445516891137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299220 + }, + { + "epoch": 1.4512114242343044, + "grad_norm": 3.335895826239721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299230 + }, + { + "epoch": 1.4512599224271405, + "grad_norm": 4.911824476039328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299240 + }, + { + "epoch": 1.4513084206199767, + "grad_norm": 3.719458163686795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299250 + }, + { + "epoch": 1.4513569188128128, + "grad_norm": 3.465448230599577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299260 + }, + { + "epoch": 1.451405417005649, + "grad_norm": 3.3791783948800003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299270 + }, + { + "epoch": 1.4514539151984849, + "grad_norm": 3.3028371149157465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299280 + }, + { + "epoch": 1.451502413391321, + "grad_norm": 4.7403793246303394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299290 + }, + { + "epoch": 1.4515509115841572, + "grad_norm": 3.247517099680408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299300 + }, + { + "epoch": 1.451599409776993, + "grad_norm": 3.348766028921091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299310 + }, + { + "epoch": 1.4516479079698292, + "grad_norm": 3.35283374397477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299320 + }, + { + "epoch": 1.4516964061626654, + "grad_norm": 3.5488218941281957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299330 + }, + { + "epoch": 1.4517449043555015, + "grad_norm": 4.647614844088821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299340 + }, + { + "epoch": 1.4517934025483377, + "grad_norm": 3.46578389098795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299350 + }, + { + "epoch": 1.4518419007411736, + "grad_norm": 3.0529665195899724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299360 + }, + { + "epoch": 1.4518903989340097, + "grad_norm": 3.1569584280077834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299370 + }, + { + "epoch": 1.4519388971268459, + "grad_norm": 3.211137311609491e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299380 + }, + { + "epoch": 1.4519873953196818, + "grad_norm": 4.186435091924068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299390 + }, + { + "epoch": 1.452035893512518, + "grad_norm": 3.250870008741913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299400 + }, + { + "epoch": 1.452084391705354, + "grad_norm": 3.1619393325854617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299410 + }, + { + "epoch": 1.4521328898981902, + "grad_norm": 2.9686560765185277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299420 + }, + { + "epoch": 1.4521813880910264, + "grad_norm": 3.2325436905011884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299430 + }, + { + "epoch": 1.4522298862838623, + "grad_norm": 4.124374015646026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299440 + }, + { + "epoch": 1.4522783844766984, + "grad_norm": 2.898443369758752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299450 + }, + { + "epoch": 1.4523268826695346, + "grad_norm": 3.338966507726582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299460 + }, + { + "epoch": 1.4523753808623705, + "grad_norm": 2.792345412672148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299470 + }, + { + "epoch": 1.4524238790552066, + "grad_norm": 3.144803031318588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299480 + }, + { + "epoch": 1.4524723772480428, + "grad_norm": 3.6967651340091834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299490 + }, + { + "epoch": 1.452520875440879, + "grad_norm": 2.753137096078717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299500 + }, + { + "epoch": 1.452569373633715, + "grad_norm": 3.073616596793727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299510 + }, + { + "epoch": 1.452617871826551, + "grad_norm": 2.615843470721302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299520 + }, + { + "epoch": 1.4526663700193871, + "grad_norm": 2.705928636714816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299530 + }, + { + "epoch": 1.4527148682122233, + "grad_norm": 3.7713334677391686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299540 + }, + { + "epoch": 1.4527633664050592, + "grad_norm": 3.5410863574725226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299550 + }, + { + "epoch": 1.4528118645978954, + "grad_norm": 2.7429106808085635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299560 + }, + { + "epoch": 1.4528603627907315, + "grad_norm": 2.653227966220584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299570 + }, + { + "epoch": 1.4529088609835676, + "grad_norm": 2.8149051445325313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299580 + }, + { + "epoch": 1.4529573591764038, + "grad_norm": 3.679338362871931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299590 + }, + { + "epoch": 1.4530058573692397, + "grad_norm": 2.798838352191524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299600 + }, + { + "epoch": 1.4530543555620759, + "grad_norm": 2.639488627664832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299610 + }, + { + "epoch": 1.453102853754912, + "grad_norm": 2.623960426717531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299620 + }, + { + "epoch": 1.453151351947748, + "grad_norm": 2.9300730375325656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299630 + }, + { + "epoch": 1.453199850140584, + "grad_norm": 3.362665097483841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299640 + }, + { + "epoch": 1.4532483483334202, + "grad_norm": 2.731270285494247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299650 + }, + { + "epoch": 1.4532968465262563, + "grad_norm": 2.428586185487802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299660 + }, + { + "epoch": 1.4533453447190925, + "grad_norm": 2.6524631380198116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299670 + }, + { + "epoch": 1.4533938429119284, + "grad_norm": 2.6546382514425204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299680 + }, + { + "epoch": 1.4534423411047646, + "grad_norm": 3.327141371300968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299690 + }, + { + "epoch": 1.4534908392976007, + "grad_norm": 2.5110423962360073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299700 + }, + { + "epoch": 1.4535393374904368, + "grad_norm": 2.489210828571231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299710 + }, + { + "epoch": 1.453587835683273, + "grad_norm": 2.633724704992346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299720 + }, + { + "epoch": 1.453636333876109, + "grad_norm": 3.0515289495269826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299730 + }, + { + "epoch": 1.453684832068945, + "grad_norm": 5.987967597320676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299740 + }, + { + "epoch": 1.4537333302617812, + "grad_norm": 2.384849722147919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299750 + }, + { + "epoch": 1.4537818284546171, + "grad_norm": 2.4032684109442926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299760 + }, + { + "epoch": 1.4538303266474533, + "grad_norm": 2.387729693964502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299770 + }, + { + "epoch": 1.4538788248402894, + "grad_norm": 2.323484551425281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299780 + }, + { + "epoch": 1.4539273230331256, + "grad_norm": 2.9698981052206364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299790 + }, + { + "epoch": 1.4539758212259617, + "grad_norm": 2.317502776350011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299800 + }, + { + "epoch": 1.4540243194187976, + "grad_norm": 2.2926576548343292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299810 + }, + { + "epoch": 1.4540728176116338, + "grad_norm": 2.2603877880555956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299820 + }, + { + "epoch": 1.45412131580447, + "grad_norm": 2.447885094625235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299830 + }, + { + "epoch": 1.4541698139973058, + "grad_norm": 2.774986000986246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299840 + }, + { + "epoch": 1.454218312190142, + "grad_norm": 2.509065097910934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299850 + }, + { + "epoch": 1.4542668103829781, + "grad_norm": 2.2777975061671896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299860 + }, + { + "epoch": 1.4543153085758143, + "grad_norm": 2.1966764052194776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299870 + }, + { + "epoch": 1.4543638067686504, + "grad_norm": 2.1998482679919107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299880 + }, + { + "epoch": 1.4544123049614863, + "grad_norm": 2.770378273453389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299890 + }, + { + "epoch": 1.4544608031543225, + "grad_norm": 2.3023881112749223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299900 + }, + { + "epoch": 1.4545093013471586, + "grad_norm": 2.427203469324013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299910 + }, + { + "epoch": 1.4545577995399945, + "grad_norm": 2.219824324356523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299920 + }, + { + "epoch": 1.4546062977328307, + "grad_norm": 2.2075427352774568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299930 + }, + { + "epoch": 1.4546547959256668, + "grad_norm": 2.6971176225742965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299940 + }, + { + "epoch": 1.454703294118503, + "grad_norm": 2.2595546056436433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299950 + }, + { + "epoch": 1.454751792311339, + "grad_norm": 2.3835437445995922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299960 + }, + { + "epoch": 1.454800290504175, + "grad_norm": 2.1859392518308596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299970 + }, + { + "epoch": 1.4548487886970112, + "grad_norm": 2.2070946670282865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299980 + }, + { + "epoch": 1.4548972868898473, + "grad_norm": 2.6566107180769905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 299990 + }, + { + "epoch": 1.4549457850826832, + "grad_norm": 2.4079920990516257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300000 + }, + { + "epoch": 1.4549942832755194, + "grad_norm": 2.136807495389803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300010 + }, + { + "epoch": 1.4550427814683555, + "grad_norm": 2.0589415328231553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300020 + }, + { + "epoch": 1.4550912796611917, + "grad_norm": 2.2569217605905578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300030 + }, + { + "epoch": 1.4551397778540278, + "grad_norm": 2.7082904807684827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300040 + }, + { + "epoch": 1.4551882760468637, + "grad_norm": 2.181467522177627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300050 + }, + { + "epoch": 1.4552367742396999, + "grad_norm": 2.070257067998682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300060 + }, + { + "epoch": 1.455285272432536, + "grad_norm": 2.0682230683632952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300070 + }, + { + "epoch": 1.455333770625372, + "grad_norm": 2.0256588584288693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300080 + }, + { + "epoch": 1.455382268818208, + "grad_norm": 2.268289165385795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300090 + }, + { + "epoch": 1.4554307670110442, + "grad_norm": 2.012513533600213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300100 + }, + { + "epoch": 1.4554792652038804, + "grad_norm": 2.1714188846999605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300110 + }, + { + "epoch": 1.4555277633967165, + "grad_norm": 3.0278772555902833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300120 + }, + { + "epoch": 1.4555762615895524, + "grad_norm": 2.1048543885626714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300130 + }, + { + "epoch": 1.4556247597823886, + "grad_norm": 2.283819924286945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300140 + }, + { + "epoch": 1.4556732579752247, + "grad_norm": 2.0605081374469592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300150 + }, + { + "epoch": 1.4557217561680607, + "grad_norm": 1.854815252499975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300160 + }, + { + "epoch": 1.4557702543608968, + "grad_norm": 1.888617617851196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300170 + }, + { + "epoch": 1.455818752553733, + "grad_norm": 1.9308741627810377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300180 + }, + { + "epoch": 1.455867250746569, + "grad_norm": 2.539811703172745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300190 + }, + { + "epoch": 1.4559157489394052, + "grad_norm": 2.0513266463240143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300200 + }, + { + "epoch": 1.4559642471322412, + "grad_norm": 1.850988127216624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300210 + }, + { + "epoch": 1.4560127453250773, + "grad_norm": 1.8162185710934864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300220 + }, + { + "epoch": 1.4560612435179134, + "grad_norm": 1.8874592910833599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300230 + }, + { + "epoch": 1.4561097417107496, + "grad_norm": 2.1313572062808817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300240 + }, + { + "epoch": 1.4561582399035857, + "grad_norm": 1.8222398523448646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300250 + }, + { + "epoch": 1.4562067380964216, + "grad_norm": 1.9277160845376784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300260 + }, + { + "epoch": 1.4562552362892578, + "grad_norm": 1.803374942710434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300270 + }, + { + "epoch": 1.456303734482094, + "grad_norm": 3.3618334782659076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300280 + }, + { + "epoch": 1.4563522326749299, + "grad_norm": 2.485581660494063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300290 + }, + { + "epoch": 1.456400730867766, + "grad_norm": 1.8118858235993685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300300 + }, + { + "epoch": 1.4564492290606021, + "grad_norm": 1.811504120041718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300310 + }, + { + "epoch": 1.4564977272534383, + "grad_norm": 1.8671418899884884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300320 + }, + { + "epoch": 1.4565462254462744, + "grad_norm": 1.7550677000599535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300330 + }, + { + "epoch": 1.4565947236391104, + "grad_norm": 1.9139980622639996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300340 + }, + { + "epoch": 1.4566432218319465, + "grad_norm": 1.7509286465156038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300350 + }, + { + "epoch": 1.4566917200247826, + "grad_norm": 1.7439313637623854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300360 + }, + { + "epoch": 1.4567402182176186, + "grad_norm": 1.6280755232855881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300370 + }, + { + "epoch": 1.4567887164104547, + "grad_norm": 1.7432246579573984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300380 + }, + { + "epoch": 1.4568372146032909, + "grad_norm": 1.901801169879036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300390 + }, + { + "epoch": 1.456885712796127, + "grad_norm": 1.7357974968490453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300400 + }, + { + "epoch": 1.4569342109889631, + "grad_norm": 1.7514840067178739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300410 + }, + { + "epoch": 1.456982709181799, + "grad_norm": 1.811726519918011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300420 + }, + { + "epoch": 1.4570312073746352, + "grad_norm": 1.5950406861975353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300430 + }, + { + "epoch": 1.4570797055674714, + "grad_norm": 2.1232476399291045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300440 + }, + { + "epoch": 1.4571282037603073, + "grad_norm": 1.765625938787707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300450 + }, + { + "epoch": 1.4571767019531434, + "grad_norm": 1.6749899600654317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300460 + }, + { + "epoch": 1.4572252001459796, + "grad_norm": 1.6772501965078845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300470 + }, + { + "epoch": 1.4572736983388157, + "grad_norm": 1.700035596741145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300480 + }, + { + "epoch": 1.4573221965316518, + "grad_norm": 1.76294577158842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300490 + }, + { + "epoch": 1.4573706947244878, + "grad_norm": 1.577977428723898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300500 + }, + { + "epoch": 1.457419192917324, + "grad_norm": 1.6402945846039074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300510 + }, + { + "epoch": 1.45746769111016, + "grad_norm": 1.5897923333341168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300520 + }, + { + "epoch": 1.457516189302996, + "grad_norm": 1.5669726849409926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300530 + }, + { + "epoch": 1.4575646874958321, + "grad_norm": 1.7988787703870912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300540 + }, + { + "epoch": 1.4576131856886683, + "grad_norm": 1.6586071183155582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300550 + }, + { + "epoch": 1.4576616838815044, + "grad_norm": 1.5325430524626427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300560 + }, + { + "epoch": 1.4577101820743406, + "grad_norm": 1.4705506146128755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300570 + }, + { + "epoch": 1.4577586802671765, + "grad_norm": 1.6049172302246006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300580 + }, + { + "epoch": 1.4578071784600126, + "grad_norm": 1.7355593229240185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300590 + }, + { + "epoch": 1.4578556766528488, + "grad_norm": 1.5145570841923472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300600 + }, + { + "epoch": 1.4579041748456847, + "grad_norm": 1.6286850268443231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300610 + }, + { + "epoch": 1.4579526730385208, + "grad_norm": 1.589122149425748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300620 + }, + { + "epoch": 1.458001171231357, + "grad_norm": 1.6525874002581986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300630 + }, + { + "epoch": 1.4580496694241931, + "grad_norm": 1.8030507931143802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300640 + }, + { + "epoch": 1.4580981676170293, + "grad_norm": 1.5161631949922594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300650 + }, + { + "epoch": 1.4581466658098652, + "grad_norm": 2.3483094935272675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300660 + }, + { + "epoch": 1.4581951640027013, + "grad_norm": 1.426319187203262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300670 + }, + { + "epoch": 1.4582436621955375, + "grad_norm": 1.416680248667035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300680 + }, + { + "epoch": 1.4582921603883734, + "grad_norm": 1.7160792253889667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300690 + }, + { + "epoch": 1.4583406585812095, + "grad_norm": 1.3907637708143739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300700 + }, + { + "epoch": 1.4583891567740457, + "grad_norm": 1.3563675338446046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300710 + }, + { + "epoch": 1.4584376549668818, + "grad_norm": 1.4251830293687817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300720 + }, + { + "epoch": 1.458486153159718, + "grad_norm": 1.5311060508338414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300730 + }, + { + "epoch": 1.458534651352554, + "grad_norm": 1.5341308312599722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300740 + }, + { + "epoch": 1.45858314954539, + "grad_norm": 1.4239721224384994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300750 + }, + { + "epoch": 1.4586316477382262, + "grad_norm": 1.3769601991953095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300760 + }, + { + "epoch": 1.4586801459310623, + "grad_norm": 1.3858634417829307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300770 + }, + { + "epoch": 1.4587286441238985, + "grad_norm": 1.4163828154778457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300780 + }, + { + "epoch": 1.4587771423167344, + "grad_norm": 1.5639618311524828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300790 + }, + { + "epoch": 1.4588256405095705, + "grad_norm": 1.362759007861314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300800 + }, + { + "epoch": 1.4588741387024067, + "grad_norm": 1.3270837939671765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300810 + }, + { + "epoch": 1.4589226368952426, + "grad_norm": 1.399132258939062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300820 + }, + { + "epoch": 1.4589711350880787, + "grad_norm": 1.3470717874497495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300830 + }, + { + "epoch": 1.4590196332809149, + "grad_norm": 1.5308238232591975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300840 + }, + { + "epoch": 1.459068131473751, + "grad_norm": 1.3470851456531818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300850 + }, + { + "epoch": 1.4591166296665872, + "grad_norm": 1.3335336745967652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300860 + }, + { + "epoch": 1.459165127859423, + "grad_norm": 1.263729956235693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300870 + }, + { + "epoch": 1.4592136260522592, + "grad_norm": 1.290322302338609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300880 + }, + { + "epoch": 1.4592621242450954, + "grad_norm": 1.4714926521719462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300890 + }, + { + "epoch": 1.4593106224379313, + "grad_norm": 1.254218489066261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300900 + }, + { + "epoch": 1.4593591206307674, + "grad_norm": 1.2820778749755846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300910 + }, + { + "epoch": 1.4594076188236036, + "grad_norm": 1.3467050052895502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300920 + }, + { + "epoch": 1.4594561170164397, + "grad_norm": 1.295109370857972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300930 + }, + { + "epoch": 1.4595046152092759, + "grad_norm": 1.6951547365806618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300940 + }, + { + "epoch": 1.4595531134021118, + "grad_norm": 1.2914031799482473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300950 + }, + { + "epoch": 1.459601611594948, + "grad_norm": 1.231075543728366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300960 + }, + { + "epoch": 1.459650109787784, + "grad_norm": 1.2465061161037738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300970 + }, + { + "epoch": 1.45969860798062, + "grad_norm": 1.2434873042366235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300980 + }, + { + "epoch": 1.4597471061734562, + "grad_norm": 1.645168197228486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 300990 + }, + { + "epoch": 1.4597956043662923, + "grad_norm": 1.3033152868047182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301000 + }, + { + "epoch": 1.4598441025591284, + "grad_norm": 1.184073212812109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301010 + }, + { + "epoch": 1.4598926007519646, + "grad_norm": 1.224261723109521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301020 + }, + { + "epoch": 1.4599410989448005, + "grad_norm": 1.2350815836725815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301030 + }, + { + "epoch": 1.4599895971376367, + "grad_norm": 1.3206282289957016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301040 + }, + { + "epoch": 1.4600380953304728, + "grad_norm": 1.170489625224036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301050 + }, + { + "epoch": 1.4600865935233087, + "grad_norm": 1.2658695425216138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301060 + }, + { + "epoch": 1.4601350917161449, + "grad_norm": 1.128904472125214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301070 + }, + { + "epoch": 1.460183589908981, + "grad_norm": 1.196473817799415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301080 + }, + { + "epoch": 1.4602320881018171, + "grad_norm": 1.4364596268023888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301090 + }, + { + "epoch": 1.4602805862946533, + "grad_norm": 1.2289176254398626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301100 + }, + { + "epoch": 1.4603290844874892, + "grad_norm": 1.1749604311717121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301110 + }, + { + "epoch": 1.4603775826803254, + "grad_norm": 1.1871946270503031e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301120 + }, + { + "epoch": 1.4604260808731615, + "grad_norm": 1.2341880051280896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301130 + }, + { + "epoch": 1.4604745790659974, + "grad_norm": 1.3619713001844502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301140 + }, + { + "epoch": 1.4605230772588336, + "grad_norm": 1.199782957428397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301150 + }, + { + "epoch": 1.4605715754516697, + "grad_norm": 1.0779004355754296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301160 + }, + { + "epoch": 1.4606200736445059, + "grad_norm": 1.1769125052296658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301170 + }, + { + "epoch": 1.460668571837342, + "grad_norm": 1.2430031404164765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301180 + }, + { + "epoch": 1.460717070030178, + "grad_norm": 1.2849288566485484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301190 + }, + { + "epoch": 1.460765568223014, + "grad_norm": 1.126544191265566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301200 + }, + { + "epoch": 1.4608140664158502, + "grad_norm": 1.1829998669554698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301210 + }, + { + "epoch": 1.4608625646086861, + "grad_norm": 1.1240056352335159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301220 + }, + { + "epoch": 1.4609110628015223, + "grad_norm": 1.1050990877947697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301230 + }, + { + "epoch": 1.4609595609943584, + "grad_norm": 1.2394683324146172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301240 + }, + { + "epoch": 1.4610080591871946, + "grad_norm": 1.1315665915390127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301250 + }, + { + "epoch": 1.4610565573800307, + "grad_norm": 1.0744868461642909e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301260 + }, + { + "epoch": 1.4611050555728666, + "grad_norm": 1.2220399980833463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301270 + }, + { + "epoch": 1.4611535537657028, + "grad_norm": 1.1358180529441597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301280 + }, + { + "epoch": 1.461202051958539, + "grad_norm": 1.3038898316608538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301290 + }, + { + "epoch": 1.461250550151375, + "grad_norm": 1.0556684770790525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301300 + }, + { + "epoch": 1.4612990483442112, + "grad_norm": 1.090385595148291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301310 + }, + { + "epoch": 1.4613475465370471, + "grad_norm": 1.0717541698568311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301320 + }, + { + "epoch": 1.4613960447298833, + "grad_norm": 1.0809223027763437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301330 + }, + { + "epoch": 1.4614445429227194, + "grad_norm": 1.2293692464027117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301340 + }, + { + "epoch": 1.4614930411155553, + "grad_norm": 1.0623981694379836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301350 + }, + { + "epoch": 1.4615415393083915, + "grad_norm": 1.0353106461025163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301360 + }, + { + "epoch": 1.4615900375012276, + "grad_norm": 1.0982839171447267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301370 + }, + { + "epoch": 1.4616385356940638, + "grad_norm": 1.0671740824363951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301380 + }, + { + "epoch": 1.4616870338869, + "grad_norm": 1.1914260511503016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301390 + }, + { + "epoch": 1.4617355320797358, + "grad_norm": 1.1644338826499734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301400 + }, + { + "epoch": 1.461784030272572, + "grad_norm": 1.0042808185062313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301410 + }, + { + "epoch": 1.4618325284654081, + "grad_norm": 2.497932598544139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301420 + }, + { + "epoch": 1.461881026658244, + "grad_norm": 1.0101128822270766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301430 + }, + { + "epoch": 1.4619295248510802, + "grad_norm": 1.4071261489334574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301440 + }, + { + "epoch": 1.4619780230439163, + "grad_norm": 1.0919349335836159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301450 + }, + { + "epoch": 1.4620265212367525, + "grad_norm": 9.916469423387753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301460 + }, + { + "epoch": 1.4620750194295886, + "grad_norm": 1.0108169590239413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301470 + }, + { + "epoch": 1.4621235176224245, + "grad_norm": 1.0339220324340204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301480 + }, + { + "epoch": 1.4621720158152607, + "grad_norm": 1.09973029793764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301490 + }, + { + "epoch": 1.4622205140080968, + "grad_norm": 1.0149859264174665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301500 + }, + { + "epoch": 1.4622690122009327, + "grad_norm": 1.00732940211401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301510 + }, + { + "epoch": 1.462317510393769, + "grad_norm": 9.79959295932531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301520 + }, + { + "epoch": 1.462366008586605, + "grad_norm": 1.0174913001037567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301530 + }, + { + "epoch": 1.4624145067794412, + "grad_norm": 1.1416892675697454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301540 + }, + { + "epoch": 1.4624630049722773, + "grad_norm": 1.1756320361655526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301550 + }, + { + "epoch": 1.4625115031651132, + "grad_norm": 9.540073619973555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301560 + }, + { + "epoch": 1.4625600013579494, + "grad_norm": 9.836358572101744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301570 + }, + { + "epoch": 1.4626084995507855, + "grad_norm": 1.065832080371365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301580 + }, + { + "epoch": 1.4626569977436215, + "grad_norm": 1.094684662916734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301590 + }, + { + "epoch": 1.4627054959364576, + "grad_norm": 9.261351863187883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301600 + }, + { + "epoch": 1.4627539941292937, + "grad_norm": 9.26920051824709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301610 + }, + { + "epoch": 1.4628024923221299, + "grad_norm": 1.0391114102503707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301620 + }, + { + "epoch": 1.462850990514966, + "grad_norm": 1.0463246269409865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301630 + }, + { + "epoch": 1.462899488707802, + "grad_norm": 1.0852062359845149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301640 + }, + { + "epoch": 1.462947986900638, + "grad_norm": 1.9069204881816404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301650 + }, + { + "epoch": 1.4629964850934742, + "grad_norm": 9.428572411707137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301660 + }, + { + "epoch": 1.4630449832863102, + "grad_norm": 9.269587053495343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301670 + }, + { + "epoch": 1.4630934814791463, + "grad_norm": 9.560068292557844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301680 + }, + { + "epoch": 1.4631419796719825, + "grad_norm": 1.0944289385861339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301690 + }, + { + "epoch": 1.4631904778648186, + "grad_norm": 9.107769471938809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301700 + }, + { + "epoch": 1.4632389760576547, + "grad_norm": 9.322639016318135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301710 + }, + { + "epoch": 1.4632874742504907, + "grad_norm": 9.241528431402912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301720 + }, + { + "epoch": 1.4633359724433268, + "grad_norm": 9.22043312812093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301730 + }, + { + "epoch": 1.463384470636163, + "grad_norm": 1.0913488068808874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301740 + }, + { + "epoch": 1.463432968828999, + "grad_norm": 9.609326667714413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301750 + }, + { + "epoch": 1.4634814670218352, + "grad_norm": 9.435227354970266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301760 + }, + { + "epoch": 1.4635299652146712, + "grad_norm": 9.185001914602253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301770 + }, + { + "epoch": 1.4635784634075073, + "grad_norm": 9.163297676195725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301780 + }, + { + "epoch": 1.4636269616003434, + "grad_norm": 1.0352371049293652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301790 + }, + { + "epoch": 1.4636754597931794, + "grad_norm": 9.14451732114685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301800 + }, + { + "epoch": 1.4637239579860155, + "grad_norm": 9.438031867148311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301810 + }, + { + "epoch": 1.4637724561788517, + "grad_norm": 8.915233706829895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301820 + }, + { + "epoch": 1.4638209543716878, + "grad_norm": 9.027238689895967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301830 + }, + { + "epoch": 1.463869452564524, + "grad_norm": 1.0304742659172916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301840 + }, + { + "epoch": 1.4639179507573599, + "grad_norm": 8.821151453730636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301850 + }, + { + "epoch": 1.463966448950196, + "grad_norm": 9.702309711201451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301860 + }, + { + "epoch": 1.4640149471430322, + "grad_norm": 8.652720850932383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301870 + }, + { + "epoch": 1.464063445335868, + "grad_norm": 1.3469639270624612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301880 + }, + { + "epoch": 1.4641119435287042, + "grad_norm": 2.2387506248833233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301890 + }, + { + "epoch": 1.4641604417215404, + "grad_norm": 3.279870725236833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301900 + }, + { + "epoch": 1.4642089399143765, + "grad_norm": 1.2813939065381419e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301910 + }, + { + "epoch": 1.4642574381072126, + "grad_norm": 8.32732257549651e-05, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 301920 + }, + { + "epoch": 1.4643059363000486, + "grad_norm": 0.01874152384698391, + "learning_rate": 0.0002, + "loss": 0.0048, + "step": 301930 + }, + { + "epoch": 1.4643544344928847, + "grad_norm": 0.00023243433679454029, + "learning_rate": 0.0002, + "loss": 0.0032, + "step": 301940 + }, + { + "epoch": 1.4644029326857209, + "grad_norm": 0.00022354752582032233, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301950 + }, + { + "epoch": 1.4644514308785568, + "grad_norm": 0.00034790238714776933, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 301960 + }, + { + "epoch": 1.464499929071393, + "grad_norm": 0.06142372265458107, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 301970 + }, + { + "epoch": 1.464548427264229, + "grad_norm": 0.0016737048281356692, + "learning_rate": 0.0002, + "loss": 0.0032, + "step": 301980 + }, + { + "epoch": 1.4645969254570652, + "grad_norm": 5.77715145482216e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 301990 + }, + { + "epoch": 1.4646454236499014, + "grad_norm": 0.00016731556388549507, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 302000 + }, + { + "epoch": 1.4646939218427373, + "grad_norm": 0.0005022163386456668, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 302010 + }, + { + "epoch": 1.4647424200355734, + "grad_norm": 0.0014250688254833221, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 302020 + }, + { + "epoch": 1.4647909182284096, + "grad_norm": 0.0001644972071517259, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 302030 + }, + { + "epoch": 1.4648394164212455, + "grad_norm": 4.99489760841243e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 302040 + }, + { + "epoch": 1.4648879146140816, + "grad_norm": 0.00013792910613119602, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 302050 + }, + { + "epoch": 1.4649364128069178, + "grad_norm": 0.0007390787941403687, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302060 + }, + { + "epoch": 1.464984910999754, + "grad_norm": 0.00013890206173527986, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302070 + }, + { + "epoch": 1.46503340919259, + "grad_norm": 0.0006145643419586122, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 302080 + }, + { + "epoch": 1.465081907385426, + "grad_norm": 6.602948997169733e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 302090 + }, + { + "epoch": 1.4651304055782621, + "grad_norm": 5.986089672660455e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302100 + }, + { + "epoch": 1.4651789037710983, + "grad_norm": 5.766981121269055e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302110 + }, + { + "epoch": 1.4652274019639342, + "grad_norm": 4.587523653754033e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302120 + }, + { + "epoch": 1.4652759001567703, + "grad_norm": 4.613254714058712e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302130 + }, + { + "epoch": 1.4653243983496065, + "grad_norm": 4.1310220694867894e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302140 + }, + { + "epoch": 1.4653728965424426, + "grad_norm": 3.1442388717550784e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302150 + }, + { + "epoch": 1.4654213947352788, + "grad_norm": 5.9566627896856517e-05, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 302160 + }, + { + "epoch": 1.4654698929281147, + "grad_norm": 0.00129315338563174, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302170 + }, + { + "epoch": 1.4655183911209508, + "grad_norm": 0.00019440939649939537, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 302180 + }, + { + "epoch": 1.465566889313787, + "grad_norm": 0.020328480750322342, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 302190 + }, + { + "epoch": 1.465615387506623, + "grad_norm": 0.0001014582157949917, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302200 + }, + { + "epoch": 1.465663885699459, + "grad_norm": 6.88539948896505e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302210 + }, + { + "epoch": 1.4657123838922952, + "grad_norm": 0.00011665109195746481, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302220 + }, + { + "epoch": 1.4657608820851313, + "grad_norm": 4.9839225539471954e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302230 + }, + { + "epoch": 1.4658093802779675, + "grad_norm": 4.901139982393943e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302240 + }, + { + "epoch": 1.4658578784708034, + "grad_norm": 8.058199455263093e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302250 + }, + { + "epoch": 1.4659063766636395, + "grad_norm": 0.08159377425909042, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302260 + }, + { + "epoch": 1.4659548748564757, + "grad_norm": 3.068593650823459e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302270 + }, + { + "epoch": 1.4660033730493118, + "grad_norm": 2.7378819140722044e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302280 + }, + { + "epoch": 1.466051871242148, + "grad_norm": 3.546968582668342e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302290 + }, + { + "epoch": 1.466100369434984, + "grad_norm": 2.5406628992641345e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302300 + }, + { + "epoch": 1.46614886762782, + "grad_norm": 2.0978592147002928e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302310 + }, + { + "epoch": 1.4661973658206562, + "grad_norm": 2.305633643118199e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302320 + }, + { + "epoch": 1.466245864013492, + "grad_norm": 2.4938288333942182e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302330 + }, + { + "epoch": 1.4662943622063283, + "grad_norm": 2.4500031940988265e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302340 + }, + { + "epoch": 1.4663428603991644, + "grad_norm": 1.8554055714048445e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302350 + }, + { + "epoch": 1.4663913585920005, + "grad_norm": 1.795594835130032e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302360 + }, + { + "epoch": 1.4664398567848367, + "grad_norm": 1.4519557225867175e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302370 + }, + { + "epoch": 1.4664883549776726, + "grad_norm": 1.558979056426324e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302380 + }, + { + "epoch": 1.4665368531705087, + "grad_norm": 1.9085904568783008e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302390 + }, + { + "epoch": 1.466585351363345, + "grad_norm": 1.3159354239178356e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302400 + }, + { + "epoch": 1.4666338495561808, + "grad_norm": 1.3101714102958795e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302410 + }, + { + "epoch": 1.466682347749017, + "grad_norm": 1.4534328329318669e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302420 + }, + { + "epoch": 1.466730845941853, + "grad_norm": 1.2407062058628071e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302430 + }, + { + "epoch": 1.4667793441346892, + "grad_norm": 0.09829651564359665, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302440 + }, + { + "epoch": 1.4668278423275254, + "grad_norm": 1.2642469300772063e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302450 + }, + { + "epoch": 1.4668763405203613, + "grad_norm": 1.093461014534114e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302460 + }, + { + "epoch": 1.4669248387131975, + "grad_norm": 1.0639659194566775e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302470 + }, + { + "epoch": 1.4669733369060336, + "grad_norm": 1.1479242857603822e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302480 + }, + { + "epoch": 1.4670218350988695, + "grad_norm": 1.120225806516828e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 302490 + }, + { + "epoch": 1.4670703332917057, + "grad_norm": 1.178937509394018e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302500 + }, + { + "epoch": 1.4671188314845418, + "grad_norm": 1.872268876468297e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302510 + }, + { + "epoch": 1.467167329677378, + "grad_norm": 2.4079205104499124e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302520 + }, + { + "epoch": 1.467215827870214, + "grad_norm": 2.278456486237701e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302530 + }, + { + "epoch": 1.46726432606305, + "grad_norm": 3.589260813896544e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302540 + }, + { + "epoch": 1.4673128242558862, + "grad_norm": 1.3032033166382462e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302550 + }, + { + "epoch": 1.4673613224487223, + "grad_norm": 1.2216356481076218e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302560 + }, + { + "epoch": 1.4674098206415582, + "grad_norm": 1.0500255484657828e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302570 + }, + { + "epoch": 1.4674583188343944, + "grad_norm": 1.0677709724404849e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302580 + }, + { + "epoch": 1.4675068170272305, + "grad_norm": 1.626281846256461e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302590 + }, + { + "epoch": 1.4675553152200667, + "grad_norm": 8.969251211965457e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302600 + }, + { + "epoch": 1.4676038134129028, + "grad_norm": 8.394374162890017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302610 + }, + { + "epoch": 1.4676523116057387, + "grad_norm": 8.365604116988834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302620 + }, + { + "epoch": 1.4677008097985749, + "grad_norm": 7.962713425513357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302630 + }, + { + "epoch": 1.467749307991411, + "grad_norm": 1.3354449947655667e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302640 + }, + { + "epoch": 1.467797806184247, + "grad_norm": 7.457651463482762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302650 + }, + { + "epoch": 1.467846304377083, + "grad_norm": 7.039601769065484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302660 + }, + { + "epoch": 1.4678948025699192, + "grad_norm": 7.121624548744876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302670 + }, + { + "epoch": 1.4679433007627554, + "grad_norm": 7.556994660262717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302680 + }, + { + "epoch": 1.4679917989555915, + "grad_norm": 1.0123871106770821e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302690 + }, + { + "epoch": 1.4680402971484274, + "grad_norm": 6.657759968220489e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302700 + }, + { + "epoch": 1.4680887953412636, + "grad_norm": 6.293189017014811e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302710 + }, + { + "epoch": 1.4681372935340997, + "grad_norm": 5.9853614402527455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302720 + }, + { + "epoch": 1.4681857917269356, + "grad_norm": 5.948266334598884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302730 + }, + { + "epoch": 1.4682342899197718, + "grad_norm": 8.27505937195383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302740 + }, + { + "epoch": 1.468282788112608, + "grad_norm": 5.806438821309712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302750 + }, + { + "epoch": 1.468331286305444, + "grad_norm": 5.598458301392384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302760 + }, + { + "epoch": 1.4683797844982802, + "grad_norm": 5.457498900796054e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302770 + }, + { + "epoch": 1.4684282826911161, + "grad_norm": 6.098121957620606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302780 + }, + { + "epoch": 1.4684767808839523, + "grad_norm": 6.980223588470835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302790 + }, + { + "epoch": 1.4685252790767884, + "grad_norm": 5.702924227080075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302800 + }, + { + "epoch": 1.4685737772696246, + "grad_norm": 6.644352197326953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302810 + }, + { + "epoch": 1.4686222754624607, + "grad_norm": 5.0030371312459465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302820 + }, + { + "epoch": 1.4686707736552966, + "grad_norm": 5.1509177865227684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302830 + }, + { + "epoch": 1.4687192718481328, + "grad_norm": 6.7166124608775135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302840 + }, + { + "epoch": 1.468767770040969, + "grad_norm": 5.038686140323989e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302850 + }, + { + "epoch": 1.4688162682338048, + "grad_norm": 5.129082182975253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302860 + }, + { + "epoch": 1.468864766426641, + "grad_norm": 5.094637344882358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302870 + }, + { + "epoch": 1.4689132646194771, + "grad_norm": 4.853062364418292e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302880 + }, + { + "epoch": 1.4689617628123133, + "grad_norm": 5.922679520153906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302890 + }, + { + "epoch": 1.4690102610051494, + "grad_norm": 4.545767296804115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302900 + }, + { + "epoch": 1.4690587591979853, + "grad_norm": 4.5344431782723404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302910 + }, + { + "epoch": 1.4691072573908215, + "grad_norm": 4.519295544014312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302920 + }, + { + "epoch": 1.4691557555836576, + "grad_norm": 4.267374151822878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302930 + }, + { + "epoch": 1.4692042537764936, + "grad_norm": 5.47456420463277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302940 + }, + { + "epoch": 1.4692527519693297, + "grad_norm": 4.30948648499907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302950 + }, + { + "epoch": 1.4693012501621658, + "grad_norm": 4.1839493860607035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302960 + }, + { + "epoch": 1.469349748355002, + "grad_norm": 4.210663519188529e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302970 + }, + { + "epoch": 1.4693982465478381, + "grad_norm": 4.045671630592551e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302980 + }, + { + "epoch": 1.469446744740674, + "grad_norm": 5.0582825679157395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 302990 + }, + { + "epoch": 1.4694952429335102, + "grad_norm": 3.7986121697031194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303000 + }, + { + "epoch": 1.4695437411263463, + "grad_norm": 3.905983248841949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303010 + }, + { + "epoch": 1.4695922393191823, + "grad_norm": 4.058338163304143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303020 + }, + { + "epoch": 1.4696407375120184, + "grad_norm": 3.837652911897749e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303030 + }, + { + "epoch": 1.4696892357048545, + "grad_norm": 4.780594281328376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303040 + }, + { + "epoch": 1.4697377338976907, + "grad_norm": 3.465785994194448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303050 + }, + { + "epoch": 1.4697862320905268, + "grad_norm": 3.5976006529381266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303060 + }, + { + "epoch": 1.4698347302833628, + "grad_norm": 3.40905285156623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303070 + }, + { + "epoch": 1.469883228476199, + "grad_norm": 3.3719491057127016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303080 + }, + { + "epoch": 1.469931726669035, + "grad_norm": 4.431508841662435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303090 + }, + { + "epoch": 1.469980224861871, + "grad_norm": 3.072650315516512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303100 + }, + { + "epoch": 1.470028723054707, + "grad_norm": 3.2533882858842844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303110 + }, + { + "epoch": 1.4700772212475433, + "grad_norm": 3.2349664706998738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303120 + }, + { + "epoch": 1.4701257194403794, + "grad_norm": 3.1648719414079096e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303130 + }, + { + "epoch": 1.4701742176332155, + "grad_norm": 3.3492199236206943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303140 + }, + { + "epoch": 1.4702227158260515, + "grad_norm": 2.851913677659468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303150 + }, + { + "epoch": 1.4702712140188876, + "grad_norm": 2.852040552170365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303160 + }, + { + "epoch": 1.4703197122117238, + "grad_norm": 2.805044914566679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303170 + }, + { + "epoch": 1.4703682104045597, + "grad_norm": 2.878623718061135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303180 + }, + { + "epoch": 1.4704167085973958, + "grad_norm": 3.2589769034530036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303190 + }, + { + "epoch": 1.470465206790232, + "grad_norm": 2.6952113785227994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303200 + }, + { + "epoch": 1.470513704983068, + "grad_norm": 2.5541664854245028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303210 + }, + { + "epoch": 1.4705622031759042, + "grad_norm": 2.548904831201071e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303220 + }, + { + "epoch": 1.4706107013687402, + "grad_norm": 2.7197929739486426e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303230 + }, + { + "epoch": 1.4706591995615763, + "grad_norm": 9.410760867467616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303240 + }, + { + "epoch": 1.4707076977544125, + "grad_norm": 2.629418077049195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303250 + }, + { + "epoch": 1.4707561959472484, + "grad_norm": 2.4786347694316646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303260 + }, + { + "epoch": 1.4708046941400845, + "grad_norm": 2.3608090486959554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303270 + }, + { + "epoch": 1.4708531923329207, + "grad_norm": 2.45977753365878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303280 + }, + { + "epoch": 1.4709016905257568, + "grad_norm": 2.946388576674508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303290 + }, + { + "epoch": 1.470950188718593, + "grad_norm": 2.2499591523228446e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303300 + }, + { + "epoch": 1.4709986869114289, + "grad_norm": 2.2108445136836963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303310 + }, + { + "epoch": 1.471047185104265, + "grad_norm": 2.2540527879755246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303320 + }, + { + "epoch": 1.4710956832971012, + "grad_norm": 2.2697097392665455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303330 + }, + { + "epoch": 1.4711441814899373, + "grad_norm": 2.7099813451059163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303340 + }, + { + "epoch": 1.4711926796827735, + "grad_norm": 2.1362916413636412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303350 + }, + { + "epoch": 1.4712411778756094, + "grad_norm": 2.1005841972510098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303360 + }, + { + "epoch": 1.4712896760684455, + "grad_norm": 2.0729892185045173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303370 + }, + { + "epoch": 1.4713381742612817, + "grad_norm": 2.5513347736705327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303380 + }, + { + "epoch": 1.4713866724541176, + "grad_norm": 2.5268707304348936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303390 + }, + { + "epoch": 1.4714351706469537, + "grad_norm": 2.0966745069017634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303400 + }, + { + "epoch": 1.4714836688397899, + "grad_norm": 1.9357391920493683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303410 + }, + { + "epoch": 1.471532167032626, + "grad_norm": 2.2526050997839775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303420 + }, + { + "epoch": 1.4715806652254622, + "grad_norm": 1.922401224874193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303430 + }, + { + "epoch": 1.471629163418298, + "grad_norm": 2.477669113432057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303440 + }, + { + "epoch": 1.4716776616111342, + "grad_norm": 2.2532608454639558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303450 + }, + { + "epoch": 1.4717261598039704, + "grad_norm": 1.8014711713476572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303460 + }, + { + "epoch": 1.4717746579968063, + "grad_norm": 1.815932023419009e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303470 + }, + { + "epoch": 1.4718231561896424, + "grad_norm": 1.8494223468223936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303480 + }, + { + "epoch": 1.4718716543824786, + "grad_norm": 2.1007922441640403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303490 + }, + { + "epoch": 1.4719201525753147, + "grad_norm": 1.712279981802567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303500 + }, + { + "epoch": 1.4719686507681509, + "grad_norm": 1.8132321883967961e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303510 + }, + { + "epoch": 1.4720171489609868, + "grad_norm": 1.70246289599163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303520 + }, + { + "epoch": 1.472065647153823, + "grad_norm": 1.7571915122971404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303530 + }, + { + "epoch": 1.472114145346659, + "grad_norm": 2.0801214759558206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303540 + }, + { + "epoch": 1.472162643539495, + "grad_norm": 1.8055931150229299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303550 + }, + { + "epoch": 1.4722111417323311, + "grad_norm": 1.6916487766138744e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303560 + }, + { + "epoch": 1.4722596399251673, + "grad_norm": 1.6856303091117297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303570 + }, + { + "epoch": 1.4723081381180034, + "grad_norm": 1.6810885199447512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303580 + }, + { + "epoch": 1.4723566363108396, + "grad_norm": 2.016849521169206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303590 + }, + { + "epoch": 1.4724051345036755, + "grad_norm": 1.5963785244821338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303600 + }, + { + "epoch": 1.4724536326965116, + "grad_norm": 1.6805624909466133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303610 + }, + { + "epoch": 1.4725021308893478, + "grad_norm": 1.559690645080991e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303620 + }, + { + "epoch": 1.4725506290821837, + "grad_norm": 1.6137062175403116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303630 + }, + { + "epoch": 1.4725991272750198, + "grad_norm": 1.8201805005446658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303640 + }, + { + "epoch": 1.472647625467856, + "grad_norm": 1.615195856174978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303650 + }, + { + "epoch": 1.4726961236606921, + "grad_norm": 1.5443218899235944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303660 + }, + { + "epoch": 1.4727446218535283, + "grad_norm": 1.544511178508401e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303670 + }, + { + "epoch": 1.4727931200463642, + "grad_norm": 1.6751469047449064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303680 + }, + { + "epoch": 1.4728416182392003, + "grad_norm": 1.7510930092612398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303690 + }, + { + "epoch": 1.4728901164320365, + "grad_norm": 1.6253644616881502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303700 + }, + { + "epoch": 1.4729386146248724, + "grad_norm": 1.4143723774395767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303710 + }, + { + "epoch": 1.4729871128177086, + "grad_norm": 1.5546322629234055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303720 + }, + { + "epoch": 1.4730356110105447, + "grad_norm": 1.6154768900378258e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303730 + }, + { + "epoch": 1.4730841092033808, + "grad_norm": 1.6912669025259675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303740 + }, + { + "epoch": 1.473132607396217, + "grad_norm": 1.5251849845299148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303750 + }, + { + "epoch": 1.473181105589053, + "grad_norm": 1.4324824633149547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303760 + }, + { + "epoch": 1.473229603781889, + "grad_norm": 1.453084337299515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303770 + }, + { + "epoch": 1.4732781019747252, + "grad_norm": 1.4733464013261255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303780 + }, + { + "epoch": 1.4733266001675611, + "grad_norm": 1.5335899661295116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303790 + }, + { + "epoch": 1.4733750983603975, + "grad_norm": 1.3954643236502307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303800 + }, + { + "epoch": 1.4734235965532334, + "grad_norm": 1.4300395605459926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303810 + }, + { + "epoch": 1.4734720947460695, + "grad_norm": 1.4091837101659621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303820 + }, + { + "epoch": 1.4735205929389057, + "grad_norm": 1.7939184999704594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303830 + }, + { + "epoch": 1.4735690911317416, + "grad_norm": 1.4740994629391935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303840 + }, + { + "epoch": 1.4736175893245778, + "grad_norm": 1.354540700049256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303850 + }, + { + "epoch": 1.473666087517414, + "grad_norm": 1.2871696526417509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303860 + }, + { + "epoch": 1.47371458571025, + "grad_norm": 1.4977100590840564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303870 + }, + { + "epoch": 1.4737630839030862, + "grad_norm": 1.268271716980962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303880 + }, + { + "epoch": 1.4738115820959221, + "grad_norm": 1.632891780900536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303890 + }, + { + "epoch": 1.4738600802887583, + "grad_norm": 1.4640772860730067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303900 + }, + { + "epoch": 1.4739085784815944, + "grad_norm": 1.184957000077702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303910 + }, + { + "epoch": 1.4739570766744303, + "grad_norm": 1.1940527429032954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303920 + }, + { + "epoch": 1.4740055748672665, + "grad_norm": 1.2951994676768663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303930 + }, + { + "epoch": 1.4740540730601026, + "grad_norm": 1.417181920260191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303940 + }, + { + "epoch": 1.4741025712529388, + "grad_norm": 1.1943525350943673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303950 + }, + { + "epoch": 1.474151069445775, + "grad_norm": 1.1787204812208074e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303960 + }, + { + "epoch": 1.4741995676386108, + "grad_norm": 1.1693894066411303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303970 + }, + { + "epoch": 1.474248065831447, + "grad_norm": 1.2373535582810291e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303980 + }, + { + "epoch": 1.474296564024283, + "grad_norm": 1.3701808256882941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 303990 + }, + { + "epoch": 1.474345062217119, + "grad_norm": 1.0820754141604993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304000 + }, + { + "epoch": 1.4743935604099552, + "grad_norm": 1.0823549700944568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304010 + }, + { + "epoch": 1.4744420586027913, + "grad_norm": 1.1842068943224149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304020 + }, + { + "epoch": 1.4744905567956275, + "grad_norm": 1.1107448472102988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304030 + }, + { + "epoch": 1.4745390549884636, + "grad_norm": 1.2667221653828165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304040 + }, + { + "epoch": 1.4745875531812995, + "grad_norm": 1.1591145039346884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304050 + }, + { + "epoch": 1.4746360513741357, + "grad_norm": 1.0800285963341594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304060 + }, + { + "epoch": 1.4746845495669718, + "grad_norm": 1.0509857020224445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304070 + }, + { + "epoch": 1.4747330477598077, + "grad_norm": 1.022183141685673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304080 + }, + { + "epoch": 1.4747815459526439, + "grad_norm": 1.2620395182239008e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304090 + }, + { + "epoch": 1.47483004414548, + "grad_norm": 1.07075402411283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304100 + }, + { + "epoch": 1.4748785423383162, + "grad_norm": 1.0352639492339222e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304110 + }, + { + "epoch": 1.4749270405311523, + "grad_norm": 1.0976390285577509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304120 + }, + { + "epoch": 1.4749755387239882, + "grad_norm": 1.0089447641803417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304130 + }, + { + "epoch": 1.4750240369168244, + "grad_norm": 1.2012181969112135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304140 + }, + { + "epoch": 1.4750725351096605, + "grad_norm": 1.001294435809541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304150 + }, + { + "epoch": 1.4751210333024964, + "grad_norm": 1.0336993909731973e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304160 + }, + { + "epoch": 1.4751695314953326, + "grad_norm": 9.725331437948626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304170 + }, + { + "epoch": 1.4752180296881687, + "grad_norm": 9.954441111403867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304180 + }, + { + "epoch": 1.4752665278810049, + "grad_norm": 1.1261482768532005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304190 + }, + { + "epoch": 1.475315026073841, + "grad_norm": 9.979321475839242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304200 + }, + { + "epoch": 1.475363524266677, + "grad_norm": 9.327121688329498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304210 + }, + { + "epoch": 1.475412022459513, + "grad_norm": 1.0228201290374273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304220 + }, + { + "epoch": 1.4754605206523492, + "grad_norm": 9.428613907402905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304230 + }, + { + "epoch": 1.4755090188451851, + "grad_norm": 1.1533219321790966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304240 + }, + { + "epoch": 1.4755575170380213, + "grad_norm": 9.733588512972347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304250 + }, + { + "epoch": 1.4756060152308574, + "grad_norm": 9.490373713560984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304260 + }, + { + "epoch": 1.4756545134236936, + "grad_norm": 1.0047785963251954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304270 + }, + { + "epoch": 1.4757030116165297, + "grad_norm": 9.901879138851655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304280 + }, + { + "epoch": 1.4757515098093656, + "grad_norm": 1.2231150776642608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304290 + }, + { + "epoch": 1.4758000080022018, + "grad_norm": 9.007639505398402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304300 + }, + { + "epoch": 1.475848506195038, + "grad_norm": 9.379014613841719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304310 + }, + { + "epoch": 1.475897004387874, + "grad_norm": 8.395312534048571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304320 + }, + { + "epoch": 1.4759455025807102, + "grad_norm": 9.259426860808162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304330 + }, + { + "epoch": 1.4759940007735461, + "grad_norm": 9.941705911842291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304340 + }, + { + "epoch": 1.4760424989663823, + "grad_norm": 8.830389219838253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304350 + }, + { + "epoch": 1.4760909971592184, + "grad_norm": 8.498382726429554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304360 + }, + { + "epoch": 1.4761394953520544, + "grad_norm": 8.75794341936853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304370 + }, + { + "epoch": 1.4761879935448905, + "grad_norm": 8.269510658465151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304380 + }, + { + "epoch": 1.4762364917377266, + "grad_norm": 9.521464221506903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304390 + }, + { + "epoch": 1.4762849899305628, + "grad_norm": 8.286179991046083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304400 + }, + { + "epoch": 1.476333488123399, + "grad_norm": 7.864937856538745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304410 + }, + { + "epoch": 1.4763819863162349, + "grad_norm": 8.370753334929759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304420 + }, + { + "epoch": 1.476430484509071, + "grad_norm": 7.998830824362813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304430 + }, + { + "epoch": 1.4764789827019071, + "grad_norm": 8.97993572834821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304440 + }, + { + "epoch": 1.476527480894743, + "grad_norm": 8.059378728830779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304450 + }, + { + "epoch": 1.4765759790875792, + "grad_norm": 1.2174853054602863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304460 + }, + { + "epoch": 1.4766244772804153, + "grad_norm": 7.663711016903108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304470 + }, + { + "epoch": 1.4766729754732515, + "grad_norm": 7.770748311486386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304480 + }, + { + "epoch": 1.4767214736660876, + "grad_norm": 8.66413017774903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304490 + }, + { + "epoch": 1.4767699718589236, + "grad_norm": 8.438147460765322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304500 + }, + { + "epoch": 1.4768184700517597, + "grad_norm": 7.30845101770683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304510 + }, + { + "epoch": 1.4768669682445958, + "grad_norm": 7.95912512785435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304520 + }, + { + "epoch": 1.4769154664374318, + "grad_norm": 7.450025805155747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304530 + }, + { + "epoch": 1.476963964630268, + "grad_norm": 9.232693400917924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304540 + }, + { + "epoch": 1.477012462823104, + "grad_norm": 7.555642014267505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304550 + }, + { + "epoch": 1.4770609610159402, + "grad_norm": 7.438350166921737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304560 + }, + { + "epoch": 1.4771094592087763, + "grad_norm": 7.597547551085881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304570 + }, + { + "epoch": 1.4771579574016123, + "grad_norm": 7.405246265079768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304580 + }, + { + "epoch": 1.4772064555944484, + "grad_norm": 9.321404945694667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304590 + }, + { + "epoch": 1.4772549537872846, + "grad_norm": 7.207762564576115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304600 + }, + { + "epoch": 1.4773034519801205, + "grad_norm": 7.498935588046152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304610 + }, + { + "epoch": 1.4773519501729566, + "grad_norm": 6.890734880471427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304620 + }, + { + "epoch": 1.4774004483657928, + "grad_norm": 7.070696597111237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304630 + }, + { + "epoch": 1.477448946558629, + "grad_norm": 8.046782227211224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304640 + }, + { + "epoch": 1.477497444751465, + "grad_norm": 6.87601584559161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304650 + }, + { + "epoch": 1.477545942944301, + "grad_norm": 6.798569529564702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304660 + }, + { + "epoch": 1.4775944411371371, + "grad_norm": 6.855528340565797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304670 + }, + { + "epoch": 1.4776429393299733, + "grad_norm": 7.233995233946189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304680 + }, + { + "epoch": 1.4776914375228092, + "grad_norm": 7.286198524525389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304690 + }, + { + "epoch": 1.4777399357156453, + "grad_norm": 7.120661393855698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304700 + }, + { + "epoch": 1.4777884339084815, + "grad_norm": 6.437088018174109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304710 + }, + { + "epoch": 1.4778369321013176, + "grad_norm": 6.564377486029116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304720 + }, + { + "epoch": 1.4778854302941538, + "grad_norm": 6.650528803220368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304730 + }, + { + "epoch": 1.4779339284869897, + "grad_norm": 9.170402677227685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304740 + }, + { + "epoch": 1.4779824266798258, + "grad_norm": 6.182009428812307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304750 + }, + { + "epoch": 1.478030924872662, + "grad_norm": 6.496738933492452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304760 + }, + { + "epoch": 1.4780794230654979, + "grad_norm": 6.137623245194845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304770 + }, + { + "epoch": 1.478127921258334, + "grad_norm": 6.197957418407896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304780 + }, + { + "epoch": 1.4781764194511702, + "grad_norm": 8.242763556154387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304790 + }, + { + "epoch": 1.4782249176440063, + "grad_norm": 5.967194738332182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304800 + }, + { + "epoch": 1.4782734158368425, + "grad_norm": 6.117564339547243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304810 + }, + { + "epoch": 1.4783219140296784, + "grad_norm": 5.930655220254266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304820 + }, + { + "epoch": 1.4783704122225145, + "grad_norm": 5.893593879591208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304830 + }, + { + "epoch": 1.4784189104153507, + "grad_norm": 6.538504635500431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304840 + }, + { + "epoch": 1.4784674086081868, + "grad_norm": 6.552971285600506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304850 + }, + { + "epoch": 1.478515906801023, + "grad_norm": 6.520108968288696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304860 + }, + { + "epoch": 1.4785644049938589, + "grad_norm": 4.4834632717538625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304870 + }, + { + "epoch": 1.478612903186695, + "grad_norm": 5.554286985898216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304880 + }, + { + "epoch": 1.4786614013795312, + "grad_norm": 6.484888785962539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304890 + }, + { + "epoch": 1.478709899572367, + "grad_norm": 6.022836487318273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304900 + }, + { + "epoch": 1.4787583977652032, + "grad_norm": 7.598439992761996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304910 + }, + { + "epoch": 1.4788068959580394, + "grad_norm": 5.565946707974945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304920 + }, + { + "epoch": 1.4788553941508755, + "grad_norm": 5.434310423879651e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304930 + }, + { + "epoch": 1.4789038923437117, + "grad_norm": 6.484180516963534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304940 + }, + { + "epoch": 1.4789523905365476, + "grad_norm": 5.458257987811521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304950 + }, + { + "epoch": 1.4790008887293837, + "grad_norm": 5.317353384270973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304960 + }, + { + "epoch": 1.4790493869222199, + "grad_norm": 9.68141193880001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304970 + }, + { + "epoch": 1.4790978851150558, + "grad_norm": 5.479963078869332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304980 + }, + { + "epoch": 1.479146383307892, + "grad_norm": 6.156832910164667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 304990 + }, + { + "epoch": 1.479194881500728, + "grad_norm": 5.251345100987237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305000 + }, + { + "epoch": 1.4792433796935642, + "grad_norm": 5.565559035858314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305010 + }, + { + "epoch": 1.4792918778864004, + "grad_norm": 5.207318167776975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305020 + }, + { + "epoch": 1.4793403760792363, + "grad_norm": 5.513272185453388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305030 + }, + { + "epoch": 1.4793888742720724, + "grad_norm": 5.814446240037796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305040 + }, + { + "epoch": 1.4794373724649086, + "grad_norm": 5.080188998363155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305050 + }, + { + "epoch": 1.4794858706577445, + "grad_norm": 5.122457764628052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305060 + }, + { + "epoch": 1.4795343688505807, + "grad_norm": 4.836584821532597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305070 + }, + { + "epoch": 1.4795828670434168, + "grad_norm": 5.612575932900654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305080 + }, + { + "epoch": 1.479631365236253, + "grad_norm": 5.892413241781469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305090 + }, + { + "epoch": 1.479679863429089, + "grad_norm": 5.26158999036852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305100 + }, + { + "epoch": 1.479728361621925, + "grad_norm": 4.922281391372962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305110 + }, + { + "epoch": 1.4797768598147611, + "grad_norm": 5.283890800455993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305120 + }, + { + "epoch": 1.4798253580075973, + "grad_norm": 4.967030236002756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305130 + }, + { + "epoch": 1.4798738562004332, + "grad_norm": 5.835611887050618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305140 + }, + { + "epoch": 1.4799223543932694, + "grad_norm": 5.007938170820125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305150 + }, + { + "epoch": 1.4799708525861055, + "grad_norm": 5.924179049543454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305160 + }, + { + "epoch": 1.4800193507789416, + "grad_norm": 0.000994239468127489, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305170 + }, + { + "epoch": 1.4800678489717778, + "grad_norm": 4.997672249373863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305180 + }, + { + "epoch": 1.4801163471646137, + "grad_norm": 5.693527214134519e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305190 + }, + { + "epoch": 1.4801648453574499, + "grad_norm": 4.6433967781922547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305200 + }, + { + "epoch": 1.480213343550286, + "grad_norm": 4.4578095526048855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305210 + }, + { + "epoch": 1.480261841743122, + "grad_norm": 4.6875996417838905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305220 + }, + { + "epoch": 1.480310339935958, + "grad_norm": 4.922455332234676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305230 + }, + { + "epoch": 1.4803588381287942, + "grad_norm": 4.941029487781634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305240 + }, + { + "epoch": 1.4804073363216304, + "grad_norm": 4.2190660565211147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305250 + }, + { + "epoch": 1.4804558345144665, + "grad_norm": 4.2984032688764273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305260 + }, + { + "epoch": 1.4805043327073024, + "grad_norm": 4.6526503183486057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305270 + }, + { + "epoch": 1.4805528309001386, + "grad_norm": 4.5050663288748183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305280 + }, + { + "epoch": 1.4806013290929747, + "grad_norm": 5.149569801687903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305290 + }, + { + "epoch": 1.4806498272858106, + "grad_norm": 4.334867753641447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305300 + }, + { + "epoch": 1.4806983254786468, + "grad_norm": 4.53360541996517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305310 + }, + { + "epoch": 1.480746823671483, + "grad_norm": 4.3752478973146935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305320 + }, + { + "epoch": 1.480795321864319, + "grad_norm": 4.3173508856853005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305330 + }, + { + "epoch": 1.4808438200571552, + "grad_norm": 4.756012401685439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305340 + }, + { + "epoch": 1.4808923182499911, + "grad_norm": 4.4301731350060436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305350 + }, + { + "epoch": 1.4809408164428273, + "grad_norm": 4.1965034824897884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305360 + }, + { + "epoch": 1.4809893146356634, + "grad_norm": 4.2475511463635485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305370 + }, + { + "epoch": 1.4810378128284996, + "grad_norm": 3.9896048065202194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305380 + }, + { + "epoch": 1.4810863110213357, + "grad_norm": 4.0972341253109335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305390 + }, + { + "epoch": 1.4811348092141716, + "grad_norm": 4.270159763564152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305400 + }, + { + "epoch": 1.4811833074070078, + "grad_norm": 4.19039139387678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305410 + }, + { + "epoch": 1.481231805599844, + "grad_norm": 4.3968904606117576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305420 + }, + { + "epoch": 1.4812803037926798, + "grad_norm": 4.118420804388734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305430 + }, + { + "epoch": 1.481328801985516, + "grad_norm": 5.411039865066414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305440 + }, + { + "epoch": 1.4813773001783521, + "grad_norm": 3.9817095398575475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305450 + }, + { + "epoch": 1.4814257983711883, + "grad_norm": 4.0153764757633326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305460 + }, + { + "epoch": 1.4814742965640244, + "grad_norm": 4.131602509005461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305470 + }, + { + "epoch": 1.4815227947568603, + "grad_norm": 4.092936194410868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305480 + }, + { + "epoch": 1.4815712929496965, + "grad_norm": 4.1749589740902593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305490 + }, + { + "epoch": 1.4816197911425326, + "grad_norm": 4.3425521312201454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305500 + }, + { + "epoch": 1.4816682893353685, + "grad_norm": 3.961972652177792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305510 + }, + { + "epoch": 1.4817167875282047, + "grad_norm": 4.307029541905649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305520 + }, + { + "epoch": 1.4817652857210408, + "grad_norm": 3.9102036453186884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305530 + }, + { + "epoch": 1.481813783913877, + "grad_norm": 4.245800937496824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305540 + }, + { + "epoch": 1.4818622821067131, + "grad_norm": 4.201971535167104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305550 + }, + { + "epoch": 1.481910780299549, + "grad_norm": 3.7320239698601654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305560 + }, + { + "epoch": 1.4819592784923852, + "grad_norm": 3.8428007087532023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305570 + }, + { + "epoch": 1.4820077766852213, + "grad_norm": 3.7580758771582623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305580 + }, + { + "epoch": 1.4820562748780572, + "grad_norm": 4.106387336832995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305590 + }, + { + "epoch": 1.4821047730708934, + "grad_norm": 3.8761592691116675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305600 + }, + { + "epoch": 1.4821532712637295, + "grad_norm": 3.498590785966371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305610 + }, + { + "epoch": 1.4822017694565657, + "grad_norm": 3.8544754943359294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305620 + }, + { + "epoch": 1.4822502676494018, + "grad_norm": 4.3161779217371077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305630 + }, + { + "epoch": 1.4822987658422377, + "grad_norm": 3.8362514942491543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305640 + }, + { + "epoch": 1.4823472640350739, + "grad_norm": 3.4662645020944183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305650 + }, + { + "epoch": 1.48239576222791, + "grad_norm": 3.5541444276532275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305660 + }, + { + "epoch": 1.482444260420746, + "grad_norm": 3.8523788248312485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305670 + }, + { + "epoch": 1.482492758613582, + "grad_norm": 3.5682020893545996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305680 + }, + { + "epoch": 1.4825412568064182, + "grad_norm": 4.5681807137043506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305690 + }, + { + "epoch": 1.4825897549992544, + "grad_norm": 3.561889059255918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305700 + }, + { + "epoch": 1.4826382531920905, + "grad_norm": 3.4346859933975793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305710 + }, + { + "epoch": 1.4826867513849264, + "grad_norm": 3.3335126659039815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305720 + }, + { + "epoch": 1.4827352495777626, + "grad_norm": 4.735484253615141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305730 + }, + { + "epoch": 1.4827837477705987, + "grad_norm": 3.649005009265238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305740 + }, + { + "epoch": 1.4828322459634347, + "grad_norm": 3.311307921194384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305750 + }, + { + "epoch": 1.4828807441562708, + "grad_norm": 3.4925326986012806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305760 + }, + { + "epoch": 1.482929242349107, + "grad_norm": 3.485164938865637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305770 + }, + { + "epoch": 1.482977740541943, + "grad_norm": 3.255942147006863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305780 + }, + { + "epoch": 1.4830262387347792, + "grad_norm": 3.796485827933793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305790 + }, + { + "epoch": 1.4830747369276152, + "grad_norm": 3.3616950645409815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305800 + }, + { + "epoch": 1.4831232351204513, + "grad_norm": 3.3835888757494104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305810 + }, + { + "epoch": 1.4831717333132874, + "grad_norm": 3.180051635354175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305820 + }, + { + "epoch": 1.4832202315061234, + "grad_norm": 3.2158948215510463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305830 + }, + { + "epoch": 1.4832687296989595, + "grad_norm": 3.4212766308883147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305840 + }, + { + "epoch": 1.4833172278917957, + "grad_norm": 3.1473993544750556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305850 + }, + { + "epoch": 1.4833657260846318, + "grad_norm": 3.072911738399853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305860 + }, + { + "epoch": 1.483414224277468, + "grad_norm": 3.402840889066283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305870 + }, + { + "epoch": 1.4834627224703039, + "grad_norm": 3.099013383689453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305880 + }, + { + "epoch": 1.48351122066314, + "grad_norm": 3.495689213650621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305890 + }, + { + "epoch": 1.4835597188559762, + "grad_norm": 3.692564689572464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305900 + }, + { + "epoch": 1.4836082170488123, + "grad_norm": 2.9304305826372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305910 + }, + { + "epoch": 1.4836567152416484, + "grad_norm": 2.9265299872349715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305920 + }, + { + "epoch": 1.4837052134344844, + "grad_norm": 3.0353223223755776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305930 + }, + { + "epoch": 1.4837537116273205, + "grad_norm": 3.5911691043111205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305940 + }, + { + "epoch": 1.4838022098201566, + "grad_norm": 2.878932434668968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305950 + }, + { + "epoch": 1.4838507080129926, + "grad_norm": 3.0289365327007545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305960 + }, + { + "epoch": 1.4838992062058287, + "grad_norm": 3.049301540158922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305970 + }, + { + "epoch": 1.4839477043986649, + "grad_norm": 2.9841447712897207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305980 + }, + { + "epoch": 1.483996202591501, + "grad_norm": 3.2865025900719047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 305990 + }, + { + "epoch": 1.4840447007843371, + "grad_norm": 2.756937931280845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306000 + }, + { + "epoch": 1.484093198977173, + "grad_norm": 2.9830070502612216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306010 + }, + { + "epoch": 1.4841416971700092, + "grad_norm": 2.817425865941914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306020 + }, + { + "epoch": 1.4841901953628454, + "grad_norm": 2.8559747988765594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306030 + }, + { + "epoch": 1.4842386935556813, + "grad_norm": 3.273916604484839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306040 + }, + { + "epoch": 1.4842871917485174, + "grad_norm": 2.8597392542906164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306050 + }, + { + "epoch": 1.4843356899413536, + "grad_norm": 2.7859357487614034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306060 + }, + { + "epoch": 1.4843841881341897, + "grad_norm": 2.880527745219297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306070 + }, + { + "epoch": 1.4844326863270259, + "grad_norm": 8.709642997928313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306080 + }, + { + "epoch": 1.4844811845198618, + "grad_norm": 3.0180609655872104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306090 + }, + { + "epoch": 1.484529682712698, + "grad_norm": 2.822490614562412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306100 + }, + { + "epoch": 1.484578180905534, + "grad_norm": 2.8781707328562334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306110 + }, + { + "epoch": 1.48462667909837, + "grad_norm": 2.810347723425366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306120 + }, + { + "epoch": 1.4846751772912061, + "grad_norm": 2.9533654810620646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306130 + }, + { + "epoch": 1.4847236754840423, + "grad_norm": 3.3849994451884413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306140 + }, + { + "epoch": 1.4847721736768784, + "grad_norm": 2.944786103853403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306150 + }, + { + "epoch": 1.4848206718697146, + "grad_norm": 2.727179548855929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306160 + }, + { + "epoch": 1.4848691700625505, + "grad_norm": 2.711258844101394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306170 + }, + { + "epoch": 1.4849176682553866, + "grad_norm": 3.3541346056154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306180 + }, + { + "epoch": 1.4849661664482228, + "grad_norm": 3.0221031011024024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306190 + }, + { + "epoch": 1.4850146646410587, + "grad_norm": 2.645507777288003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306200 + }, + { + "epoch": 1.4850631628338948, + "grad_norm": 2.4903272333176574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306210 + }, + { + "epoch": 1.485111661026731, + "grad_norm": 2.6322524604438513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306220 + }, + { + "epoch": 1.4851601592195671, + "grad_norm": 2.7521912215888733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306230 + }, + { + "epoch": 1.4852086574124033, + "grad_norm": 2.783264676509134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306240 + }, + { + "epoch": 1.4852571556052392, + "grad_norm": 2.761935320449993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306250 + }, + { + "epoch": 1.4853056537980753, + "grad_norm": 2.6792562834998535e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306260 + }, + { + "epoch": 1.4853541519909115, + "grad_norm": 2.890889447826339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306270 + }, + { + "epoch": 1.4854026501837474, + "grad_norm": 2.258270939137219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306280 + }, + { + "epoch": 1.4854511483765835, + "grad_norm": 2.899622018048831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306290 + }, + { + "epoch": 1.4854996465694197, + "grad_norm": 2.527311551148159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306300 + }, + { + "epoch": 1.4855481447622558, + "grad_norm": 2.478719522969186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306310 + }, + { + "epoch": 1.485596642955092, + "grad_norm": 2.3334739296387852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306320 + }, + { + "epoch": 1.485645141147928, + "grad_norm": 2.6154964416491566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306330 + }, + { + "epoch": 1.485693639340764, + "grad_norm": 2.8130349960520107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306340 + }, + { + "epoch": 1.4857421375336002, + "grad_norm": 2.491015607120062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306350 + }, + { + "epoch": 1.4857906357264363, + "grad_norm": 2.4801761355774943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306360 + }, + { + "epoch": 1.4858391339192725, + "grad_norm": 2.479814327216445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306370 + }, + { + "epoch": 1.4858876321121084, + "grad_norm": 2.3917920088933897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306380 + }, + { + "epoch": 1.4859361303049445, + "grad_norm": 2.681002229110163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306390 + }, + { + "epoch": 1.4859846284977807, + "grad_norm": 2.4432964096376963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306400 + }, + { + "epoch": 1.4860331266906166, + "grad_norm": 2.260357234717958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306410 + }, + { + "epoch": 1.4860816248834527, + "grad_norm": 2.3007993377177627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306420 + }, + { + "epoch": 1.486130123076289, + "grad_norm": 2.4004509668884566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306430 + }, + { + "epoch": 1.486178621269125, + "grad_norm": 2.727726098328276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306440 + }, + { + "epoch": 1.4862271194619612, + "grad_norm": 2.3904118506834493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306450 + }, + { + "epoch": 1.486275617654797, + "grad_norm": 2.3878263277765654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306460 + }, + { + "epoch": 1.4863241158476332, + "grad_norm": 2.3554777328627097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306470 + }, + { + "epoch": 1.4863726140404694, + "grad_norm": 2.3064146148499276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306480 + }, + { + "epoch": 1.4864211122333053, + "grad_norm": 2.576473718818306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306490 + }, + { + "epoch": 1.4864696104261415, + "grad_norm": 2.193409756046094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306500 + }, + { + "epoch": 1.4865181086189776, + "grad_norm": 2.3120941250454052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306510 + }, + { + "epoch": 1.4865666068118137, + "grad_norm": 2.162358470059189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306520 + }, + { + "epoch": 1.4866151050046499, + "grad_norm": 2.2135635902031936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306530 + }, + { + "epoch": 1.4866636031974858, + "grad_norm": 2.519083466268057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306540 + }, + { + "epoch": 1.486712101390322, + "grad_norm": 2.14732665426709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306550 + }, + { + "epoch": 1.486760599583158, + "grad_norm": 2.27997404067537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306560 + }, + { + "epoch": 1.486809097775994, + "grad_norm": 2.0843624781718972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306570 + }, + { + "epoch": 1.4868575959688302, + "grad_norm": 2.0929050492668466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306580 + }, + { + "epoch": 1.4869060941616663, + "grad_norm": 2.567421688581817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306590 + }, + { + "epoch": 1.4869545923545024, + "grad_norm": 1.9254616745456588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306600 + }, + { + "epoch": 1.4870030905473386, + "grad_norm": 2.0968163028101117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306610 + }, + { + "epoch": 1.4870515887401745, + "grad_norm": 1.9779427873345412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306620 + }, + { + "epoch": 1.4871000869330107, + "grad_norm": 2.0058281791079935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306630 + }, + { + "epoch": 1.4871485851258468, + "grad_norm": 2.338585289862749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306640 + }, + { + "epoch": 1.4871970833186827, + "grad_norm": 2.139628634267865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306650 + }, + { + "epoch": 1.4872455815115189, + "grad_norm": 2.0276242196359817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306660 + }, + { + "epoch": 1.487294079704355, + "grad_norm": 2.0077717977073917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306670 + }, + { + "epoch": 1.4873425778971912, + "grad_norm": 2.0503777875546803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306680 + }, + { + "epoch": 1.4873910760900273, + "grad_norm": 2.2245933450903976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306690 + }, + { + "epoch": 1.4874395742828632, + "grad_norm": 1.9015811858480447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306700 + }, + { + "epoch": 1.4874880724756994, + "grad_norm": 5.087187560093298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306710 + }, + { + "epoch": 1.4875365706685355, + "grad_norm": 1.9735421119548846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306720 + }, + { + "epoch": 1.4875850688613714, + "grad_norm": 2.0863051020114654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306730 + }, + { + "epoch": 1.4876335670542076, + "grad_norm": 2.2549991740561381e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306740 + }, + { + "epoch": 1.4876820652470437, + "grad_norm": 2.017940801124496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306750 + }, + { + "epoch": 1.4877305634398799, + "grad_norm": 2.0051523108577385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306760 + }, + { + "epoch": 1.487779061632716, + "grad_norm": 1.859615395005676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306770 + }, + { + "epoch": 1.487827559825552, + "grad_norm": 1.77838671788777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306780 + }, + { + "epoch": 1.487876058018388, + "grad_norm": 2.2033721336356393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306790 + }, + { + "epoch": 1.4879245562112242, + "grad_norm": 1.877317856724403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306800 + }, + { + "epoch": 1.4879730544040601, + "grad_norm": 1.8645245347670425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306810 + }, + { + "epoch": 1.4880215525968963, + "grad_norm": 1.8416393743336812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306820 + }, + { + "epoch": 1.4880700507897324, + "grad_norm": 1.9341447909937415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306830 + }, + { + "epoch": 1.4881185489825686, + "grad_norm": 2.2606465677199594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306840 + }, + { + "epoch": 1.4881670471754047, + "grad_norm": 2.2191707671481709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306850 + }, + { + "epoch": 1.4882155453682406, + "grad_norm": 1.8368886856023892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306860 + }, + { + "epoch": 1.4882640435610768, + "grad_norm": 1.8538176504989679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306870 + }, + { + "epoch": 1.488312541753913, + "grad_norm": 1.836928618104139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306880 + }, + { + "epoch": 1.488361039946749, + "grad_norm": 2.3072644239618967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306890 + }, + { + "epoch": 1.4884095381395852, + "grad_norm": 2.2342051408941188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306900 + }, + { + "epoch": 1.4884580363324211, + "grad_norm": 1.7660784124018392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306910 + }, + { + "epoch": 1.4885065345252573, + "grad_norm": 1.7233224980373052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306920 + }, + { + "epoch": 1.4885550327180934, + "grad_norm": 1.7810224051117984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306930 + }, + { + "epoch": 1.4886035309109293, + "grad_norm": 2.2098389251823392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306940 + }, + { + "epoch": 1.4886520291037655, + "grad_norm": 1.745048052725906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306950 + }, + { + "epoch": 1.4887005272966016, + "grad_norm": 1.7166651389288745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306960 + }, + { + "epoch": 1.4887490254894378, + "grad_norm": 1.7452516942739749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306970 + }, + { + "epoch": 1.488797523682274, + "grad_norm": 1.6882306397292268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306980 + }, + { + "epoch": 1.4888460218751098, + "grad_norm": 1.9238312631841836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 306990 + }, + { + "epoch": 1.488894520067946, + "grad_norm": 1.673954557190882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307000 + }, + { + "epoch": 1.4889430182607821, + "grad_norm": 1.662663038359824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307010 + }, + { + "epoch": 1.488991516453618, + "grad_norm": 1.694392040008097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307020 + }, + { + "epoch": 1.4890400146464542, + "grad_norm": 1.8200138640622754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307030 + }, + { + "epoch": 1.4890885128392903, + "grad_norm": 1.9207520551844937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307040 + }, + { + "epoch": 1.4891370110321265, + "grad_norm": 1.6383064860292507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307050 + }, + { + "epoch": 1.4891855092249626, + "grad_norm": 1.628939116926631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307060 + }, + { + "epoch": 1.4892340074177985, + "grad_norm": 1.4556066219029162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307070 + }, + { + "epoch": 1.4892825056106347, + "grad_norm": 1.6512854017491918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307080 + }, + { + "epoch": 1.4893310038034708, + "grad_norm": 1.961809772410561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307090 + }, + { + "epoch": 1.4893795019963068, + "grad_norm": 1.6011620118661085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307100 + }, + { + "epoch": 1.489428000189143, + "grad_norm": 1.4919952207037568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307110 + }, + { + "epoch": 1.489476498381979, + "grad_norm": 1.6449230599846487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307120 + }, + { + "epoch": 1.4895249965748152, + "grad_norm": 1.606955493116402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307130 + }, + { + "epoch": 1.4895734947676513, + "grad_norm": 1.800249975758561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307140 + }, + { + "epoch": 1.4896219929604873, + "grad_norm": 1.7314907552190562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307150 + }, + { + "epoch": 1.4896704911533234, + "grad_norm": 1.6413956416272413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307160 + }, + { + "epoch": 1.4897189893461595, + "grad_norm": 1.5129289465676266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307170 + }, + { + "epoch": 1.4897674875389955, + "grad_norm": 1.5628144467427774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307180 + }, + { + "epoch": 1.4898159857318316, + "grad_norm": 1.8615604346905457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307190 + }, + { + "epoch": 1.4898644839246677, + "grad_norm": 1.77222730712856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307200 + }, + { + "epoch": 1.489912982117504, + "grad_norm": 1.4419703120438498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307210 + }, + { + "epoch": 1.48996148031034, + "grad_norm": 1.510836398210813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307220 + }, + { + "epoch": 1.490009978503176, + "grad_norm": 1.4234812795166363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307230 + }, + { + "epoch": 1.490058476696012, + "grad_norm": 1.73638156297784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307240 + }, + { + "epoch": 1.4901069748888482, + "grad_norm": 1.461660161794498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307250 + }, + { + "epoch": 1.4901554730816842, + "grad_norm": 1.390475006246561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307260 + }, + { + "epoch": 1.4902039712745203, + "grad_norm": 1.4416987426102423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307270 + }, + { + "epoch": 1.4902524694673565, + "grad_norm": 1.4148467641916795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307280 + }, + { + "epoch": 1.4903009676601926, + "grad_norm": 1.6924475687574159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307290 + }, + { + "epoch": 1.4903494658530287, + "grad_norm": 1.421631878884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307300 + }, + { + "epoch": 1.4903979640458647, + "grad_norm": 1.3801721365780395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307310 + }, + { + "epoch": 1.4904464622387008, + "grad_norm": 1.5451469437266496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307320 + }, + { + "epoch": 1.490494960431537, + "grad_norm": 1.425969458068721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307330 + }, + { + "epoch": 1.4905434586243729, + "grad_norm": 1.711819521688085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307340 + }, + { + "epoch": 1.490591956817209, + "grad_norm": 3.762996527711948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307350 + }, + { + "epoch": 1.4906404550100452, + "grad_norm": 1.305484715885541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307360 + }, + { + "epoch": 1.4906889532028813, + "grad_norm": 1.3203728599364695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307370 + }, + { + "epoch": 1.4907374513957174, + "grad_norm": 1.4080340804412117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307380 + }, + { + "epoch": 1.4907859495885534, + "grad_norm": 1.5866766034378088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307390 + }, + { + "epoch": 1.4908344477813895, + "grad_norm": 1.3678985055776138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307400 + }, + { + "epoch": 1.4908829459742257, + "grad_norm": 1.3342751969958044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307410 + }, + { + "epoch": 1.4909314441670618, + "grad_norm": 1.296024692010178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307420 + }, + { + "epoch": 1.490979942359898, + "grad_norm": 1.3666705456216732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307430 + }, + { + "epoch": 1.4910284405527339, + "grad_norm": 1.596590237795681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307440 + }, + { + "epoch": 1.49107693874557, + "grad_norm": 1.3227996475961845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307450 + }, + { + "epoch": 1.4911254369384062, + "grad_norm": 1.4179173035699932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307460 + }, + { + "epoch": 1.491173935131242, + "grad_norm": 1.3506361540294165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307470 + }, + { + "epoch": 1.4912224333240782, + "grad_norm": 1.2968943963187485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307480 + }, + { + "epoch": 1.4912709315169144, + "grad_norm": 1.6169003913546476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307490 + }, + { + "epoch": 1.4913194297097505, + "grad_norm": 1.3991130742851965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307500 + }, + { + "epoch": 1.4913679279025867, + "grad_norm": 1.2408504801442177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307510 + }, + { + "epoch": 1.4914164260954226, + "grad_norm": 1.2040510455335607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307520 + }, + { + "epoch": 1.4914649242882587, + "grad_norm": 1.3447906610508653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307530 + }, + { + "epoch": 1.4915134224810949, + "grad_norm": 1.5196241065495997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307540 + }, + { + "epoch": 1.4915619206739308, + "grad_norm": 1.321073455073929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307550 + }, + { + "epoch": 1.491610418866767, + "grad_norm": 1.242949423385653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307560 + }, + { + "epoch": 1.491658917059603, + "grad_norm": 1.2826829731693579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307570 + }, + { + "epoch": 1.4917074152524392, + "grad_norm": 1.2731068466109718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307580 + }, + { + "epoch": 1.4917559134452754, + "grad_norm": 1.4668096071090986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307590 + }, + { + "epoch": 1.4918044116381113, + "grad_norm": 1.3123208475462889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307600 + }, + { + "epoch": 1.4918529098309474, + "grad_norm": 1.2135457438944286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307610 + }, + { + "epoch": 1.4919014080237836, + "grad_norm": 1.2432997209543828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307620 + }, + { + "epoch": 1.4919499062166195, + "grad_norm": 1.274830765396473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307630 + }, + { + "epoch": 1.4919984044094556, + "grad_norm": 1.42863555652184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307640 + }, + { + "epoch": 1.4920469026022918, + "grad_norm": 1.2686959394159203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307650 + }, + { + "epoch": 1.492095400795128, + "grad_norm": 1.2954043882018595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307660 + }, + { + "epoch": 1.492143898987964, + "grad_norm": 1.2313078912029596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307670 + }, + { + "epoch": 1.4921923971808, + "grad_norm": 1.259561770439177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307680 + }, + { + "epoch": 1.4922408953736361, + "grad_norm": 1.4512588109028002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307690 + }, + { + "epoch": 1.4922893935664723, + "grad_norm": 1.1988811365881702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307700 + }, + { + "epoch": 1.4923378917593082, + "grad_norm": 1.1900089447181017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307710 + }, + { + "epoch": 1.4923863899521443, + "grad_norm": 1.2039463115343096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307720 + }, + { + "epoch": 1.4924348881449805, + "grad_norm": 1.2880826716354932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307730 + }, + { + "epoch": 1.4924833863378166, + "grad_norm": 1.484673077811749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307740 + }, + { + "epoch": 1.4925318845306528, + "grad_norm": 1.2797191573099553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307750 + }, + { + "epoch": 1.4925803827234887, + "grad_norm": 1.222133221290278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307760 + }, + { + "epoch": 1.4926288809163248, + "grad_norm": 1.1564533508590102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307770 + }, + { + "epoch": 1.492677379109161, + "grad_norm": 1.1700983293394529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307780 + }, + { + "epoch": 1.492725877301997, + "grad_norm": 1.3550577193655045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307790 + }, + { + "epoch": 1.492774375494833, + "grad_norm": 1.2012660022264754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307800 + }, + { + "epoch": 1.4928228736876692, + "grad_norm": 1.3020861899804004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307810 + }, + { + "epoch": 1.4928713718805053, + "grad_norm": 1.3559838407672942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307820 + }, + { + "epoch": 1.4929198700733415, + "grad_norm": 1.1460977589194954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307830 + }, + { + "epoch": 1.4929683682661774, + "grad_norm": 1.328716336956859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307840 + }, + { + "epoch": 1.4930168664590135, + "grad_norm": 1.2052579734245228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307850 + }, + { + "epoch": 1.4930653646518497, + "grad_norm": 1.5234712691380992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307860 + }, + { + "epoch": 1.4931138628446856, + "grad_norm": 1.1469918348439023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307870 + }, + { + "epoch": 1.4931623610375218, + "grad_norm": 1.1163054125518102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307880 + }, + { + "epoch": 1.493210859230358, + "grad_norm": 2.5102440304181073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307890 + }, + { + "epoch": 1.493259357423194, + "grad_norm": 1.0891995572137603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307900 + }, + { + "epoch": 1.4933078556160302, + "grad_norm": 1.1362807583736867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307910 + }, + { + "epoch": 1.493356353808866, + "grad_norm": 1.1592037196805904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307920 + }, + { + "epoch": 1.4934048520017023, + "grad_norm": 1.1545939315737996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307930 + }, + { + "epoch": 1.4934533501945384, + "grad_norm": 1.2316476727392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307940 + }, + { + "epoch": 1.4935018483873745, + "grad_norm": 1.1216496886845562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307950 + }, + { + "epoch": 1.4935503465802107, + "grad_norm": 1.145237433775037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307960 + }, + { + "epoch": 1.4935988447730466, + "grad_norm": 1.1226939733433028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307970 + }, + { + "epoch": 1.4936473429658828, + "grad_norm": 1.1365682439645752e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307980 + }, + { + "epoch": 1.493695841158719, + "grad_norm": 1.2410400529461185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 307990 + }, + { + "epoch": 1.4937443393515548, + "grad_norm": 1.10133676400892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308000 + }, + { + "epoch": 1.493792837544391, + "grad_norm": 1.1628994656120994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308010 + }, + { + "epoch": 1.493841335737227, + "grad_norm": 1.2016636219414067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308020 + }, + { + "epoch": 1.4938898339300632, + "grad_norm": 2.1092733959449106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308030 + }, + { + "epoch": 1.4939383321228994, + "grad_norm": 4.6100333861431864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308040 + }, + { + "epoch": 1.4939868303157353, + "grad_norm": 2.431490202070563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308050 + }, + { + "epoch": 1.4940353285085715, + "grad_norm": 3.544541584687977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308060 + }, + { + "epoch": 1.4940838267014076, + "grad_norm": 4.276493257293623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308070 + }, + { + "epoch": 1.4941323248942435, + "grad_norm": 7.139223043850507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308080 + }, + { + "epoch": 1.4941808230870797, + "grad_norm": 0.26284322142601013, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 308090 + }, + { + "epoch": 1.4942293212799158, + "grad_norm": 1.087181999537279e-06, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 308100 + }, + { + "epoch": 1.494277819472752, + "grad_norm": 5.657776455336716e-06, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 308110 + }, + { + "epoch": 1.494326317665588, + "grad_norm": 0.11818218976259232, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 308120 + }, + { + "epoch": 1.494374815858424, + "grad_norm": 0.007151181809604168, + "learning_rate": 0.0002, + "loss": 0.0027, + "step": 308130 + }, + { + "epoch": 1.4944233140512602, + "grad_norm": 0.009479387663304806, + "learning_rate": 0.0002, + "loss": 0.0036, + "step": 308140 + }, + { + "epoch": 1.4944718122440963, + "grad_norm": 7.905538950581104e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 308150 + }, + { + "epoch": 1.4945203104369322, + "grad_norm": 0.1409396529197693, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 308160 + }, + { + "epoch": 1.4945688086297684, + "grad_norm": 0.040650565177202225, + "learning_rate": 0.0002, + "loss": 0.0018, + "step": 308170 + }, + { + "epoch": 1.4946173068226045, + "grad_norm": 0.0006330895703285933, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 308180 + }, + { + "epoch": 1.4946658050154407, + "grad_norm": 0.00023020655498839915, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 308190 + }, + { + "epoch": 1.4947143032082768, + "grad_norm": 0.00018998443556483835, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 308200 + }, + { + "epoch": 1.4947628014011127, + "grad_norm": 0.0002050869952654466, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308210 + }, + { + "epoch": 1.4948112995939489, + "grad_norm": 0.00016271656204480678, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308220 + }, + { + "epoch": 1.494859797786785, + "grad_norm": 0.00011094153887825087, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 308230 + }, + { + "epoch": 1.494908295979621, + "grad_norm": 0.0010138005018234253, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308240 + }, + { + "epoch": 1.494956794172457, + "grad_norm": 0.00019418100418988615, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 308250 + }, + { + "epoch": 1.4950052923652932, + "grad_norm": 0.40524762868881226, + "learning_rate": 0.0002, + "loss": 0.0148, + "step": 308260 + }, + { + "epoch": 1.4950537905581294, + "grad_norm": 0.013353127054870129, + "learning_rate": 0.0002, + "loss": 0.0658, + "step": 308270 + }, + { + "epoch": 1.4951022887509655, + "grad_norm": 0.0030003630090504885, + "learning_rate": 0.0002, + "loss": 0.0029, + "step": 308280 + }, + { + "epoch": 1.4951507869438014, + "grad_norm": 0.08309502899646759, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 308290 + }, + { + "epoch": 1.4951992851366376, + "grad_norm": 0.04692290350794792, + "learning_rate": 0.0002, + "loss": 0.0035, + "step": 308300 + }, + { + "epoch": 1.4952477833294737, + "grad_norm": 0.0013936440227553248, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 308310 + }, + { + "epoch": 1.4952962815223096, + "grad_norm": 3.347919482621364e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308320 + }, + { + "epoch": 1.4953447797151458, + "grad_norm": 8.209768566302955e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308330 + }, + { + "epoch": 1.495393277907982, + "grad_norm": 3.748006565729156e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308340 + }, + { + "epoch": 1.495441776100818, + "grad_norm": 2.7984631742583588e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308350 + }, + { + "epoch": 1.4954902742936542, + "grad_norm": 3.33326643158216e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308360 + }, + { + "epoch": 1.4955387724864901, + "grad_norm": 6.50440706522204e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308370 + }, + { + "epoch": 1.4955872706793263, + "grad_norm": 0.009364654310047626, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308380 + }, + { + "epoch": 1.4956357688721624, + "grad_norm": 0.010130297392606735, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 308390 + }, + { + "epoch": 1.4956842670649984, + "grad_norm": 7.512994488934055e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308400 + }, + { + "epoch": 1.4957327652578347, + "grad_norm": 2.2370963051798753e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308410 + }, + { + "epoch": 1.4957812634506706, + "grad_norm": 2.9342556445044465e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308420 + }, + { + "epoch": 1.4958297616435068, + "grad_norm": 2.1315086996764876e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308430 + }, + { + "epoch": 1.495878259836343, + "grad_norm": 4.72596257168334e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308440 + }, + { + "epoch": 1.4959267580291788, + "grad_norm": 3.8237194530665874e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308450 + }, + { + "epoch": 1.495975256222015, + "grad_norm": 1.5409917978104204e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308460 + }, + { + "epoch": 1.4960237544148511, + "grad_norm": 1.9230592442909256e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308470 + }, + { + "epoch": 1.4960722526076873, + "grad_norm": 2.0137198589509353e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308480 + }, + { + "epoch": 1.4961207508005234, + "grad_norm": 2.267829404445365e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308490 + }, + { + "epoch": 1.4961692489933593, + "grad_norm": 1.7366102838423103e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308500 + }, + { + "epoch": 1.4962177471861955, + "grad_norm": 1.2993265045224689e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308510 + }, + { + "epoch": 1.4962662453790316, + "grad_norm": 1.604584394954145e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308520 + }, + { + "epoch": 1.4963147435718676, + "grad_norm": 1.3488564945873804e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308530 + }, + { + "epoch": 1.4963632417647037, + "grad_norm": 2.1657990146195516e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308540 + }, + { + "epoch": 1.4964117399575398, + "grad_norm": 1.3148346624802798e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308550 + }, + { + "epoch": 1.496460238150376, + "grad_norm": 1.2751663234666921e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308560 + }, + { + "epoch": 1.4965087363432121, + "grad_norm": 1.0683957043511327e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308570 + }, + { + "epoch": 1.496557234536048, + "grad_norm": 1.1382264347048476e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308580 + }, + { + "epoch": 1.4966057327288842, + "grad_norm": 9.489539661444724e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308590 + }, + { + "epoch": 1.4966542309217203, + "grad_norm": 1.1936800547118764e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308600 + }, + { + "epoch": 1.4967027291145563, + "grad_norm": 1.739186336635612e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308610 + }, + { + "epoch": 1.4967512273073924, + "grad_norm": 1.0291811122442596e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308620 + }, + { + "epoch": 1.4967997255002286, + "grad_norm": 1.3688360013475176e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308630 + }, + { + "epoch": 1.4968482236930647, + "grad_norm": 1.723871537251398e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308640 + }, + { + "epoch": 1.4968967218859008, + "grad_norm": 8.595595318183769e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308650 + }, + { + "epoch": 1.4969452200787368, + "grad_norm": 8.875321327650454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308660 + }, + { + "epoch": 1.496993718271573, + "grad_norm": 9.017749107442796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308670 + }, + { + "epoch": 1.497042216464409, + "grad_norm": 8.85274766915245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308680 + }, + { + "epoch": 1.497090714657245, + "grad_norm": 1.454601988370996e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308690 + }, + { + "epoch": 1.4971392128500811, + "grad_norm": 8.643144610687159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308700 + }, + { + "epoch": 1.4971877110429173, + "grad_norm": 1.0157252290809993e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308710 + }, + { + "epoch": 1.4972362092357534, + "grad_norm": 9.924418009177316e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 308720 + }, + { + "epoch": 1.4972847074285895, + "grad_norm": 0.0002908884489443153, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308730 + }, + { + "epoch": 1.4973332056214255, + "grad_norm": 1.79502949322341e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308740 + }, + { + "epoch": 1.4973817038142616, + "grad_norm": 1.0291740181855857e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 308750 + }, + { + "epoch": 1.4974302020070978, + "grad_norm": 2.708477586566005e-05, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 308760 + }, + { + "epoch": 1.4974787001999337, + "grad_norm": 0.00010801042662933469, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308770 + }, + { + "epoch": 1.4975271983927698, + "grad_norm": 0.000224367729970254, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308780 + }, + { + "epoch": 1.497575696585606, + "grad_norm": 0.0002325485402252525, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308790 + }, + { + "epoch": 1.497624194778442, + "grad_norm": 0.00021809805184602737, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308800 + }, + { + "epoch": 1.4976726929712783, + "grad_norm": 3.5297027352498844e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 308810 + }, + { + "epoch": 1.4977211911641142, + "grad_norm": 2.1111871319590136e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 308820 + }, + { + "epoch": 1.4977696893569503, + "grad_norm": 3.7640413211192936e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308830 + }, + { + "epoch": 1.4978181875497865, + "grad_norm": 0.0003093212435487658, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308840 + }, + { + "epoch": 1.4978666857426224, + "grad_norm": 1.9883593267877586e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308850 + }, + { + "epoch": 1.4979151839354585, + "grad_norm": 1.0536290574236773e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308860 + }, + { + "epoch": 1.4979636821282947, + "grad_norm": 3.865384860546328e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308870 + }, + { + "epoch": 1.4980121803211308, + "grad_norm": 0.00014144799206405878, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308880 + }, + { + "epoch": 1.498060678513967, + "grad_norm": 2.0347890313132666e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308890 + }, + { + "epoch": 1.4981091767068029, + "grad_norm": 1.0725460015237331e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308900 + }, + { + "epoch": 1.498157674899639, + "grad_norm": 0.00010396974539617077, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308910 + }, + { + "epoch": 1.4982061730924752, + "grad_norm": 7.100908987922594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308920 + }, + { + "epoch": 1.4982546712853113, + "grad_norm": 8.232505933847278e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308930 + }, + { + "epoch": 1.4983031694781475, + "grad_norm": 3.927864963770844e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308940 + }, + { + "epoch": 1.4983516676709834, + "grad_norm": 6.050875072105555e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 308950 + }, + { + "epoch": 1.4984001658638195, + "grad_norm": 6.521327577502234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308960 + }, + { + "epoch": 1.4984486640566557, + "grad_norm": 6.554133506142534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 308970 + }, + { + "epoch": 1.4984971622494916, + "grad_norm": 8.740267730900086e-06, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 308980 + }, + { + "epoch": 1.4985456604423277, + "grad_norm": 9.41492326091975e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 308990 + }, + { + "epoch": 1.4985941586351639, + "grad_norm": 8.62228698679246e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309000 + }, + { + "epoch": 1.498642656828, + "grad_norm": 9.202162618748844e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309010 + }, + { + "epoch": 1.4986911550208362, + "grad_norm": 6.833139923401177e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 309020 + }, + { + "epoch": 1.498739653213672, + "grad_norm": 7.64937067287974e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309030 + }, + { + "epoch": 1.4987881514065082, + "grad_norm": 0.0004563281836453825, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 309040 + }, + { + "epoch": 1.4988366495993444, + "grad_norm": 3.0827719456283376e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 309050 + }, + { + "epoch": 1.4988851477921803, + "grad_norm": 3.189529525116086e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309060 + }, + { + "epoch": 1.4989336459850164, + "grad_norm": 3.0477920518023893e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309070 + }, + { + "epoch": 1.4989821441778526, + "grad_norm": 3.692455356940627e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309080 + }, + { + "epoch": 1.4990306423706887, + "grad_norm": 4.3598643969744444e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309090 + }, + { + "epoch": 1.4990791405635249, + "grad_norm": 2.101595964631997e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309100 + }, + { + "epoch": 1.4991276387563608, + "grad_norm": 2.047218549705576e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309110 + }, + { + "epoch": 1.499176136949197, + "grad_norm": 1.963917020475492e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309120 + }, + { + "epoch": 1.499224635142033, + "grad_norm": 1.797899676603265e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309130 + }, + { + "epoch": 1.499273133334869, + "grad_norm": 2.8994081731070764e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309140 + }, + { + "epoch": 1.4993216315277051, + "grad_norm": 1.574045563756954e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309150 + }, + { + "epoch": 1.4993701297205413, + "grad_norm": 1.5449861166416667e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309160 + }, + { + "epoch": 1.4994186279133774, + "grad_norm": 1.4311227459984366e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309170 + }, + { + "epoch": 1.4994671261062136, + "grad_norm": 1.6760535800131038e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309180 + }, + { + "epoch": 1.4995156242990495, + "grad_norm": 2.6008719942183234e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309190 + }, + { + "epoch": 1.4995641224918856, + "grad_norm": 1.3635771210829262e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309200 + }, + { + "epoch": 1.4996126206847218, + "grad_norm": 1.2384583897073753e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309210 + }, + { + "epoch": 1.4996611188775577, + "grad_norm": 1.1612031812546775e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309220 + }, + { + "epoch": 1.4997096170703939, + "grad_norm": 1.3940993085270748e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309230 + }, + { + "epoch": 1.49975811526323, + "grad_norm": 1.6129848518176004e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309240 + }, + { + "epoch": 1.4998066134560661, + "grad_norm": 9.969775419449434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309250 + }, + { + "epoch": 1.4998551116489023, + "grad_norm": 1.0659280633262824e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309260 + }, + { + "epoch": 1.4999036098417382, + "grad_norm": 1.1132175131933764e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309270 + }, + { + "epoch": 1.4999521080345743, + "grad_norm": 1.4985496818553656e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309280 + }, + { + "epoch": 1.5000006062274105, + "grad_norm": 1.4217955140338745e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309290 + }, + { + "epoch": 1.5000491044202464, + "grad_norm": 9.313643204222899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309300 + }, + { + "epoch": 1.5000976026130828, + "grad_norm": 7.986553100636229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309310 + }, + { + "epoch": 1.5001461008059187, + "grad_norm": 1.018402417685138e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309320 + }, + { + "epoch": 1.5001945989987548, + "grad_norm": 1.1465075658634305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309330 + }, + { + "epoch": 1.500243097191591, + "grad_norm": 1.1948795872740448e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309340 + }, + { + "epoch": 1.500291595384427, + "grad_norm": 7.152541911636945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309350 + }, + { + "epoch": 1.500340093577263, + "grad_norm": 8.93455398909282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309360 + }, + { + "epoch": 1.5003885917700992, + "grad_norm": 6.829298854427179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309370 + }, + { + "epoch": 1.5004370899629351, + "grad_norm": 7.804037522873841e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309380 + }, + { + "epoch": 1.5004855881557715, + "grad_norm": 9.834163392952178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309390 + }, + { + "epoch": 1.5005340863486074, + "grad_norm": 5.990784302412067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309400 + }, + { + "epoch": 1.5005825845414436, + "grad_norm": 7.349637598963454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309410 + }, + { + "epoch": 1.5006310827342797, + "grad_norm": 6.356056019285461e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309420 + }, + { + "epoch": 1.5006795809271156, + "grad_norm": 7.1315071181743406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309430 + }, + { + "epoch": 1.5007280791199518, + "grad_norm": 9.974924978450872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309440 + }, + { + "epoch": 1.500776577312788, + "grad_norm": 9.085817509912886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309450 + }, + { + "epoch": 1.5008250755056238, + "grad_norm": 6.714727078360738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309460 + }, + { + "epoch": 1.5008735736984602, + "grad_norm": 5.670548489433713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309470 + }, + { + "epoch": 1.5009220718912961, + "grad_norm": 6.9983911998861e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309480 + }, + { + "epoch": 1.5009705700841323, + "grad_norm": 9.367404345539398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309490 + }, + { + "epoch": 1.5010190682769684, + "grad_norm": 5.087547833682038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309500 + }, + { + "epoch": 1.5010675664698043, + "grad_norm": 5.119781690154923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309510 + }, + { + "epoch": 1.5011160646626405, + "grad_norm": 4.9442874114902224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309520 + }, + { + "epoch": 1.5011645628554766, + "grad_norm": 6.087938345444854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309530 + }, + { + "epoch": 1.5012130610483125, + "grad_norm": 6.797285095672123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309540 + }, + { + "epoch": 1.501261559241149, + "grad_norm": 4.5260558181325905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309550 + }, + { + "epoch": 1.5013100574339848, + "grad_norm": 4.689798515755683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309560 + }, + { + "epoch": 1.501358555626821, + "grad_norm": 4.27721442974871e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309570 + }, + { + "epoch": 1.5014070538196571, + "grad_norm": 4.280416760593653e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309580 + }, + { + "epoch": 1.501455552012493, + "grad_norm": 6.487792234111112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309590 + }, + { + "epoch": 1.5015040502053292, + "grad_norm": 4.551254278339911e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309600 + }, + { + "epoch": 1.5015525483981653, + "grad_norm": 4.260617060936056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309610 + }, + { + "epoch": 1.5016010465910012, + "grad_norm": 3.7285146845533745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309620 + }, + { + "epoch": 1.5016495447838376, + "grad_norm": 4.33124750998104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309630 + }, + { + "epoch": 1.5016980429766735, + "grad_norm": 5.392404546000762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309640 + }, + { + "epoch": 1.5017465411695097, + "grad_norm": 4.25526332037407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309650 + }, + { + "epoch": 1.5017950393623458, + "grad_norm": 3.6914602787874173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309660 + }, + { + "epoch": 1.5018435375551817, + "grad_norm": 3.87498403142672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309670 + }, + { + "epoch": 1.5018920357480179, + "grad_norm": 3.541563955877791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309680 + }, + { + "epoch": 1.501940533940854, + "grad_norm": 5.160669388715178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309690 + }, + { + "epoch": 1.5019890321336902, + "grad_norm": 3.1664314974477747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309700 + }, + { + "epoch": 1.5020375303265263, + "grad_norm": 3.62930518349458e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309710 + }, + { + "epoch": 1.5020860285193622, + "grad_norm": 4.173979505139869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309720 + }, + { + "epoch": 1.5021345267121984, + "grad_norm": 3.6432825254451018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309730 + }, + { + "epoch": 1.5021830249050345, + "grad_norm": 5.317535851645516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309740 + }, + { + "epoch": 1.5022315230978704, + "grad_norm": 3.75018362319679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309750 + }, + { + "epoch": 1.5022800212907068, + "grad_norm": 3.7213660561974393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309760 + }, + { + "epoch": 1.5023285194835427, + "grad_norm": 3.179423629262601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309770 + }, + { + "epoch": 1.5023770176763789, + "grad_norm": 4.678281129599782e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309780 + }, + { + "epoch": 1.502425515869215, + "grad_norm": 4.6896252570149954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309790 + }, + { + "epoch": 1.502474014062051, + "grad_norm": 3.57913472726068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309800 + }, + { + "epoch": 1.502522512254887, + "grad_norm": 3.135384531560703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309810 + }, + { + "epoch": 1.5025710104477232, + "grad_norm": 3.7453341974469367e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309820 + }, + { + "epoch": 1.5026195086405592, + "grad_norm": 2.820567942762864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309830 + }, + { + "epoch": 1.5026680068333955, + "grad_norm": 4.14942860516021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309840 + }, + { + "epoch": 1.5027165050262314, + "grad_norm": 2.7541627787286416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309850 + }, + { + "epoch": 1.5027650032190676, + "grad_norm": 2.527405513319536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309860 + }, + { + "epoch": 1.5028135014119037, + "grad_norm": 2.8999627375014825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309870 + }, + { + "epoch": 1.5028619996047397, + "grad_norm": 2.9905738756497158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309880 + }, + { + "epoch": 1.5029104977975758, + "grad_norm": 5.691632850357564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309890 + }, + { + "epoch": 1.502958995990412, + "grad_norm": 2.9041293601039797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309900 + }, + { + "epoch": 1.5030074941832479, + "grad_norm": 2.8556464712892193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309910 + }, + { + "epoch": 1.5030559923760842, + "grad_norm": 2.479200702509843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309920 + }, + { + "epoch": 1.5031044905689201, + "grad_norm": 2.362359509788803e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309930 + }, + { + "epoch": 1.5031529887617563, + "grad_norm": 6.064511580916587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309940 + }, + { + "epoch": 1.5032014869545924, + "grad_norm": 2.405085069767665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309950 + }, + { + "epoch": 1.5032499851474284, + "grad_norm": 6.940244929865003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309960 + }, + { + "epoch": 1.5032984833402645, + "grad_norm": 3.573616595531348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309970 + }, + { + "epoch": 1.5033469815331006, + "grad_norm": 2.1952960196358617e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309980 + }, + { + "epoch": 1.5033954797259366, + "grad_norm": 4.642733983928338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 309990 + }, + { + "epoch": 1.503443977918773, + "grad_norm": 2.0387533368193544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310000 + }, + { + "epoch": 1.5034924761116089, + "grad_norm": 2.290891416123486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310010 + }, + { + "epoch": 1.503540974304445, + "grad_norm": 2.393948989265482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310020 + }, + { + "epoch": 1.5035894724972811, + "grad_norm": 2.6860236630454892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310030 + }, + { + "epoch": 1.503637970690117, + "grad_norm": 3.0796284136158647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310040 + }, + { + "epoch": 1.5036864688829532, + "grad_norm": 2.221692511739093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310050 + }, + { + "epoch": 1.5037349670757894, + "grad_norm": 2.4164273781934753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310060 + }, + { + "epoch": 1.5037834652686253, + "grad_norm": 2.0584900539688533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310070 + }, + { + "epoch": 1.5038319634614616, + "grad_norm": 1.9810620415228186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310080 + }, + { + "epoch": 1.5038804616542976, + "grad_norm": 2.839440412572003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310090 + }, + { + "epoch": 1.5039289598471337, + "grad_norm": 2.2484923647425603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310100 + }, + { + "epoch": 1.5039774580399699, + "grad_norm": 2.3718389456917066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310110 + }, + { + "epoch": 1.5040259562328058, + "grad_norm": 1.8191568642578204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310120 + }, + { + "epoch": 1.504074454425642, + "grad_norm": 2.0886022866761778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310130 + }, + { + "epoch": 1.504122952618478, + "grad_norm": 3.6726607959280955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310140 + }, + { + "epoch": 1.504171450811314, + "grad_norm": 2.0120075987506425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310150 + }, + { + "epoch": 1.5042199490041503, + "grad_norm": 2.1270652723615058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310160 + }, + { + "epoch": 1.5042684471969863, + "grad_norm": 1.8034941149380757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310170 + }, + { + "epoch": 1.5043169453898224, + "grad_norm": 3.583003262974671e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310180 + }, + { + "epoch": 1.5043654435826586, + "grad_norm": 4.351706593297422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310190 + }, + { + "epoch": 1.5044139417754945, + "grad_norm": 1.937088200065773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310200 + }, + { + "epoch": 1.5044624399683308, + "grad_norm": 2.0665204374381574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310210 + }, + { + "epoch": 1.5045109381611668, + "grad_norm": 1.6411739807153936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310220 + }, + { + "epoch": 1.504559436354003, + "grad_norm": 1.881939169834368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310230 + }, + { + "epoch": 1.504607934546839, + "grad_norm": 2.711523848120123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310240 + }, + { + "epoch": 1.504656432739675, + "grad_norm": 1.7432918184567825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310250 + }, + { + "epoch": 1.5047049309325111, + "grad_norm": 1.8397360008748365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310260 + }, + { + "epoch": 1.5047534291253473, + "grad_norm": 1.7864741721496102e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310270 + }, + { + "epoch": 1.5048019273181832, + "grad_norm": 1.6207875432883156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310280 + }, + { + "epoch": 1.5048504255110196, + "grad_norm": 2.768526655927417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310290 + }, + { + "epoch": 1.5048989237038555, + "grad_norm": 0.0007575141498818994, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310300 + }, + { + "epoch": 1.5049474218966916, + "grad_norm": 1.5827224615350133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310310 + }, + { + "epoch": 1.5049959200895278, + "grad_norm": 1.3991456171424943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310320 + }, + { + "epoch": 1.5050444182823637, + "grad_norm": 1.5731654912087834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310330 + }, + { + "epoch": 1.5050929164751998, + "grad_norm": 2.3772595341142733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310340 + }, + { + "epoch": 1.505141414668036, + "grad_norm": 3.0065739338169806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310350 + }, + { + "epoch": 1.505189912860872, + "grad_norm": 1.358303961751517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310360 + }, + { + "epoch": 1.5052384110537083, + "grad_norm": 1.30374132822908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310370 + }, + { + "epoch": 1.5052869092465442, + "grad_norm": 1.7638982399148517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310380 + }, + { + "epoch": 1.5053354074393803, + "grad_norm": 2.3743750716676004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310390 + }, + { + "epoch": 1.5053839056322165, + "grad_norm": 1.8432240267429734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310400 + }, + { + "epoch": 1.5054324038250524, + "grad_norm": 1.68602548455965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310410 + }, + { + "epoch": 1.5054809020178885, + "grad_norm": 1.2454341913326061e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310420 + }, + { + "epoch": 1.5055294002107247, + "grad_norm": 1.6440257013528026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310430 + }, + { + "epoch": 1.5055778984035606, + "grad_norm": 1.620368379917636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310440 + }, + { + "epoch": 1.505626396596397, + "grad_norm": 2.7687094643624732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310450 + }, + { + "epoch": 1.5056748947892329, + "grad_norm": 1.4488343822449679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310460 + }, + { + "epoch": 1.505723392982069, + "grad_norm": 1.3371374052439933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310470 + }, + { + "epoch": 1.5057718911749052, + "grad_norm": 3.203503865734092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310480 + }, + { + "epoch": 1.505820389367741, + "grad_norm": 2.842647063516779e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310490 + }, + { + "epoch": 1.5058688875605772, + "grad_norm": 1.1935500197068905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310500 + }, + { + "epoch": 1.5059173857534134, + "grad_norm": 1.198347035824554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310510 + }, + { + "epoch": 1.5059658839462493, + "grad_norm": 1.5316588815039722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310520 + }, + { + "epoch": 1.5060143821390857, + "grad_norm": 1.5916854181341478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310530 + }, + { + "epoch": 1.5060628803319216, + "grad_norm": 1.7012681610140135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310540 + }, + { + "epoch": 1.5061113785247577, + "grad_norm": 1.23173549582134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310550 + }, + { + "epoch": 1.5061598767175939, + "grad_norm": 1.1115394045191351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310560 + }, + { + "epoch": 1.5062083749104298, + "grad_norm": 1.1174359997312422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310570 + }, + { + "epoch": 1.506256873103266, + "grad_norm": 1.3711753581446828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310580 + }, + { + "epoch": 1.506305371296102, + "grad_norm": 1.772145424183691e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310590 + }, + { + "epoch": 1.506353869488938, + "grad_norm": 1.2316752417973476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310600 + }, + { + "epoch": 1.5064023676817744, + "grad_norm": 1.1591990869419533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310610 + }, + { + "epoch": 1.5064508658746103, + "grad_norm": 1.3414116892818129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310620 + }, + { + "epoch": 1.5064993640674464, + "grad_norm": 3.0875494303472806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310630 + }, + { + "epoch": 1.5065478622602826, + "grad_norm": 1.4902924476700719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310640 + }, + { + "epoch": 1.5065963604531185, + "grad_norm": 1.1255676781729562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310650 + }, + { + "epoch": 1.5066448586459547, + "grad_norm": 1.1538033959368477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310660 + }, + { + "epoch": 1.5066933568387908, + "grad_norm": 1.0267822290188633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310670 + }, + { + "epoch": 1.5067418550316267, + "grad_norm": 2.7082614906248637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310680 + }, + { + "epoch": 1.506790353224463, + "grad_norm": 1.6950949657257297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310690 + }, + { + "epoch": 1.506838851417299, + "grad_norm": 1.144877273873135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310700 + }, + { + "epoch": 1.5068873496101352, + "grad_norm": 1.085091071217903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310710 + }, + { + "epoch": 1.5069358478029713, + "grad_norm": 1.1464384215287282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310720 + }, + { + "epoch": 1.5069843459958072, + "grad_norm": 9.496613984083524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310730 + }, + { + "epoch": 1.5070328441886436, + "grad_norm": 1.434187311133428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310740 + }, + { + "epoch": 1.5070813423814795, + "grad_norm": 1.0436458524054615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310750 + }, + { + "epoch": 1.5071298405743156, + "grad_norm": 1.1646828852462932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310760 + }, + { + "epoch": 1.5071783387671518, + "grad_norm": 1.1314773473714013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310770 + }, + { + "epoch": 1.5072268369599877, + "grad_norm": 9.454797122998571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310780 + }, + { + "epoch": 1.5072753351528239, + "grad_norm": 1.4769830158911645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310790 + }, + { + "epoch": 1.50732383334566, + "grad_norm": 9.572607950758538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310800 + }, + { + "epoch": 1.507372331538496, + "grad_norm": 1.278281160921324e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310810 + }, + { + "epoch": 1.5074208297313323, + "grad_norm": 1.2162553275629762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310820 + }, + { + "epoch": 1.5074693279241682, + "grad_norm": 1.058474367710005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310830 + }, + { + "epoch": 1.5075178261170044, + "grad_norm": 1.3533325500247884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310840 + }, + { + "epoch": 1.5075663243098405, + "grad_norm": 9.882469385047443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310850 + }, + { + "epoch": 1.5076148225026764, + "grad_norm": 9.583885685060523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310860 + }, + { + "epoch": 1.5076633206955126, + "grad_norm": 9.351397238788195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310870 + }, + { + "epoch": 1.5077118188883487, + "grad_norm": 1.1627181493167882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310880 + }, + { + "epoch": 1.5077603170811846, + "grad_norm": 1.1052845820813673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310890 + }, + { + "epoch": 1.507808815274021, + "grad_norm": 9.098836244447739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310900 + }, + { + "epoch": 1.507857313466857, + "grad_norm": 1.1051020010199863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310910 + }, + { + "epoch": 1.507905811659693, + "grad_norm": 9.571934924679226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310920 + }, + { + "epoch": 1.5079543098525292, + "grad_norm": 1.0526829328227905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310930 + }, + { + "epoch": 1.5080028080453651, + "grad_norm": 1.4113816177996341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310940 + }, + { + "epoch": 1.5080513062382013, + "grad_norm": 8.661523338560073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310950 + }, + { + "epoch": 1.5080998044310374, + "grad_norm": 1.2601288972291513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310960 + }, + { + "epoch": 1.5081483026238733, + "grad_norm": 8.362410426343558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310970 + }, + { + "epoch": 1.5081968008167097, + "grad_norm": 7.69970085912064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310980 + }, + { + "epoch": 1.5082452990095456, + "grad_norm": 1.0534878356338595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 310990 + }, + { + "epoch": 1.5082937972023818, + "grad_norm": 8.537823532606126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311000 + }, + { + "epoch": 1.508342295395218, + "grad_norm": 7.335065674851649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311010 + }, + { + "epoch": 1.5083907935880538, + "grad_norm": 8.560663786738587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311020 + }, + { + "epoch": 1.50843929178089, + "grad_norm": 8.392849508709332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311030 + }, + { + "epoch": 1.5084877899737261, + "grad_norm": 1.040854385792045e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311040 + }, + { + "epoch": 1.508536288166562, + "grad_norm": 6.945508062017325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311050 + }, + { + "epoch": 1.5085847863593984, + "grad_norm": 9.42217980082205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311060 + }, + { + "epoch": 1.5086332845522343, + "grad_norm": 8.353399039151554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311070 + }, + { + "epoch": 1.5086817827450705, + "grad_norm": 7.810837132637971e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311080 + }, + { + "epoch": 1.5087302809379066, + "grad_norm": 1.0325869652660913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311090 + }, + { + "epoch": 1.5087787791307425, + "grad_norm": 1.1572886933208792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311100 + }, + { + "epoch": 1.5088272773235787, + "grad_norm": 9.003339300761581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311110 + }, + { + "epoch": 1.5088757755164148, + "grad_norm": 7.475259167222248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311120 + }, + { + "epoch": 1.5089242737092508, + "grad_norm": 7.437162707901734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311130 + }, + { + "epoch": 1.5089727719020871, + "grad_norm": 1.226587073688279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311140 + }, + { + "epoch": 1.509021270094923, + "grad_norm": 6.923520459167776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311150 + }, + { + "epoch": 1.5090697682877592, + "grad_norm": 7.196319984359434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311160 + }, + { + "epoch": 1.5091182664805953, + "grad_norm": 5.308075742505025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311170 + }, + { + "epoch": 1.5091667646734312, + "grad_norm": 7.914005095699395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311180 + }, + { + "epoch": 1.5092152628662674, + "grad_norm": 1.0657676057235221e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311190 + }, + { + "epoch": 1.5092637610591035, + "grad_norm": 6.826705885032425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311200 + }, + { + "epoch": 1.5093122592519395, + "grad_norm": 8.549895369469596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311210 + }, + { + "epoch": 1.5093607574447758, + "grad_norm": 8.844800163387845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311220 + }, + { + "epoch": 1.5094092556376117, + "grad_norm": 7.016537892923225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311230 + }, + { + "epoch": 1.509457753830448, + "grad_norm": 9.761314458955894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311240 + }, + { + "epoch": 1.509506252023284, + "grad_norm": 5.364458388612547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311250 + }, + { + "epoch": 1.50955475021612, + "grad_norm": 6.021205081196968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311260 + }, + { + "epoch": 1.5096032484089563, + "grad_norm": 1.2807422535843216e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311270 + }, + { + "epoch": 1.5096517466017922, + "grad_norm": 8.671270279592136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311280 + }, + { + "epoch": 1.5097002447946284, + "grad_norm": 1.0587091310299002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311290 + }, + { + "epoch": 1.5097487429874645, + "grad_norm": 7.693009251852345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311300 + }, + { + "epoch": 1.5097972411803005, + "grad_norm": 1.9227970824431395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311310 + }, + { + "epoch": 1.5098457393731366, + "grad_norm": 7.506913561883266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311320 + }, + { + "epoch": 1.5098942375659727, + "grad_norm": 6.715720815009263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311330 + }, + { + "epoch": 1.5099427357588087, + "grad_norm": 9.735716730574495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311340 + }, + { + "epoch": 1.509991233951645, + "grad_norm": 7.076474162204249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311350 + }, + { + "epoch": 1.510039732144481, + "grad_norm": 6.190975341269223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311360 + }, + { + "epoch": 1.510088230337317, + "grad_norm": 7.509555643991916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311370 + }, + { + "epoch": 1.5101367285301532, + "grad_norm": 6.366300340232556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311380 + }, + { + "epoch": 1.5101852267229892, + "grad_norm": 8.706975904715364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311390 + }, + { + "epoch": 1.5102337249158253, + "grad_norm": 5.589678835349332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311400 + }, + { + "epoch": 1.5102822231086614, + "grad_norm": 1.7202107756020268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311410 + }, + { + "epoch": 1.5103307213014974, + "grad_norm": 1.5731541225250112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311420 + }, + { + "epoch": 1.5103792194943337, + "grad_norm": 7.149195653255447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311430 + }, + { + "epoch": 1.5104277176871697, + "grad_norm": 4.287375304556917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311440 + }, + { + "epoch": 1.5104762158800058, + "grad_norm": 6.01263195676438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311450 + }, + { + "epoch": 1.510524714072842, + "grad_norm": 7.83961638717301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311460 + }, + { + "epoch": 1.5105732122656779, + "grad_norm": 6.01998294769146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311470 + }, + { + "epoch": 1.510621710458514, + "grad_norm": 7.167103035499167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311480 + }, + { + "epoch": 1.5106702086513502, + "grad_norm": 6.687897666779463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311490 + }, + { + "epoch": 1.510718706844186, + "grad_norm": 5.834843932461808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311500 + }, + { + "epoch": 1.5107672050370224, + "grad_norm": 7.606836902596115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311510 + }, + { + "epoch": 1.5108157032298584, + "grad_norm": 5.656559665112582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311520 + }, + { + "epoch": 1.5108642014226945, + "grad_norm": 6.085683708079159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311530 + }, + { + "epoch": 1.5109126996155307, + "grad_norm": 8.464074312541925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311540 + }, + { + "epoch": 1.5109611978083666, + "grad_norm": 5.829369911225513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311550 + }, + { + "epoch": 1.5110096960012027, + "grad_norm": 5.66591211281775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311560 + }, + { + "epoch": 1.5110581941940389, + "grad_norm": 5.461338332679588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311570 + }, + { + "epoch": 1.5111066923868748, + "grad_norm": 6.010243396303849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311580 + }, + { + "epoch": 1.5111551905797111, + "grad_norm": 6.267346179811284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311590 + }, + { + "epoch": 1.511203688772547, + "grad_norm": 5.955089932285773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311600 + }, + { + "epoch": 1.5112521869653832, + "grad_norm": 6.49872845315258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311610 + }, + { + "epoch": 1.5113006851582194, + "grad_norm": 7.161909252317855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311620 + }, + { + "epoch": 1.5113491833510553, + "grad_norm": 4.750996254188067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311630 + }, + { + "epoch": 1.5113976815438914, + "grad_norm": 7.02801116858609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311640 + }, + { + "epoch": 1.5114461797367276, + "grad_norm": 4.5512337010222836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311650 + }, + { + "epoch": 1.5114946779295635, + "grad_norm": 4.930440127282054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311660 + }, + { + "epoch": 1.5115431761223999, + "grad_norm": 4.5570908469017013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311670 + }, + { + "epoch": 1.5115916743152358, + "grad_norm": 5.492306058840768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311680 + }, + { + "epoch": 1.511640172508072, + "grad_norm": 1.2099681043764576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311690 + }, + { + "epoch": 1.511688670700908, + "grad_norm": 7.120673330973659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311700 + }, + { + "epoch": 1.511737168893744, + "grad_norm": 5.67736321954726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311710 + }, + { + "epoch": 1.5117856670865801, + "grad_norm": 5.353672918317898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311720 + }, + { + "epoch": 1.5118341652794163, + "grad_norm": 5.626272923109354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311730 + }, + { + "epoch": 1.5118826634722524, + "grad_norm": 1.2631426216103137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311740 + }, + { + "epoch": 1.5119311616650886, + "grad_norm": 4.778783591063984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311750 + }, + { + "epoch": 1.5119796598579245, + "grad_norm": 4.840157998842187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311760 + }, + { + "epoch": 1.5120281580507606, + "grad_norm": 4.244920148721576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311770 + }, + { + "epoch": 1.5120766562435968, + "grad_norm": 6.824480465184024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311780 + }, + { + "epoch": 1.5121251544364327, + "grad_norm": 7.721272936578316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311790 + }, + { + "epoch": 1.512173652629269, + "grad_norm": 4.777384106091631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311800 + }, + { + "epoch": 1.512222150822105, + "grad_norm": 4.108847520001291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311810 + }, + { + "epoch": 1.5122706490149411, + "grad_norm": 4.2202822214676416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311820 + }, + { + "epoch": 1.5123191472077773, + "grad_norm": 5.125017423779354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311830 + }, + { + "epoch": 1.5123676454006132, + "grad_norm": 6.950431838959048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311840 + }, + { + "epoch": 1.5124161435934493, + "grad_norm": 4.65763292822885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311850 + }, + { + "epoch": 1.5124646417862855, + "grad_norm": 4.199064278509468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311860 + }, + { + "epoch": 1.5125131399791214, + "grad_norm": 4.585407680224307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311870 + }, + { + "epoch": 1.5125616381719578, + "grad_norm": 4.3342791400391434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311880 + }, + { + "epoch": 1.5126101363647937, + "grad_norm": 7.377139468189853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311890 + }, + { + "epoch": 1.5126586345576298, + "grad_norm": 4.79044047096977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311900 + }, + { + "epoch": 1.512707132750466, + "grad_norm": 5.152856488166435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311910 + }, + { + "epoch": 1.512755630943302, + "grad_norm": 5.615452209895011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311920 + }, + { + "epoch": 1.512804129136138, + "grad_norm": 4.964077220392937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311930 + }, + { + "epoch": 1.5128526273289742, + "grad_norm": 5.539341145777144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311940 + }, + { + "epoch": 1.51290112552181, + "grad_norm": 5.611893243440136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311950 + }, + { + "epoch": 1.5129496237146465, + "grad_norm": 5.022248501518334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311960 + }, + { + "epoch": 1.5129981219074824, + "grad_norm": 6.227506901268498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311970 + }, + { + "epoch": 1.5130466201003185, + "grad_norm": 5.537229412766465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311980 + }, + { + "epoch": 1.5130951182931547, + "grad_norm": 5.858648250978149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 311990 + }, + { + "epoch": 1.5131436164859906, + "grad_norm": 5.5421276101697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312000 + }, + { + "epoch": 1.5131921146788267, + "grad_norm": 4.098548913589184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312010 + }, + { + "epoch": 1.513240612871663, + "grad_norm": 3.9243667515620473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312020 + }, + { + "epoch": 1.5132891110644988, + "grad_norm": 3.7405504826892866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312030 + }, + { + "epoch": 1.5133376092573352, + "grad_norm": 4.813292093786004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312040 + }, + { + "epoch": 1.513386107450171, + "grad_norm": 4.215798128370807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312050 + }, + { + "epoch": 1.5134346056430072, + "grad_norm": 4.185339435025526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312060 + }, + { + "epoch": 1.5134831038358434, + "grad_norm": 4.029884053124988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312070 + }, + { + "epoch": 1.5135316020286793, + "grad_norm": 5.37594928573526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312080 + }, + { + "epoch": 1.5135801002215155, + "grad_norm": 5.632156785395637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312090 + }, + { + "epoch": 1.5136285984143516, + "grad_norm": 9.965620847651735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312100 + }, + { + "epoch": 1.5136770966071875, + "grad_norm": 3.4627302625267475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312110 + }, + { + "epoch": 1.5137255948000239, + "grad_norm": 3.873373088936205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312120 + }, + { + "epoch": 1.5137740929928598, + "grad_norm": 3.762013136565656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312130 + }, + { + "epoch": 1.513822591185696, + "grad_norm": 6.914390269230353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312140 + }, + { + "epoch": 1.513871089378532, + "grad_norm": 4.643538318305218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312150 + }, + { + "epoch": 1.513919587571368, + "grad_norm": 3.375182018316991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312160 + }, + { + "epoch": 1.5139680857642042, + "grad_norm": 2.9876323992539255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312170 + }, + { + "epoch": 1.5140165839570403, + "grad_norm": 5.142548502590216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312180 + }, + { + "epoch": 1.5140650821498762, + "grad_norm": 5.253987183095887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312190 + }, + { + "epoch": 1.5141135803427126, + "grad_norm": 3.94047219742788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312200 + }, + { + "epoch": 1.5141620785355485, + "grad_norm": 3.690636276587611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312210 + }, + { + "epoch": 1.5142105767283847, + "grad_norm": 4.7255787194444565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312220 + }, + { + "epoch": 1.5142590749212208, + "grad_norm": 3.5760720606958785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312230 + }, + { + "epoch": 1.5143075731140567, + "grad_norm": 6.24125618742255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312240 + }, + { + "epoch": 1.514356071306893, + "grad_norm": 4.754191706979327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312250 + }, + { + "epoch": 1.514404569499729, + "grad_norm": 7.630031291228079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312260 + }, + { + "epoch": 1.5144530676925652, + "grad_norm": 3.599148215016612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312270 + }, + { + "epoch": 1.5145015658854013, + "grad_norm": 3.84624428306779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312280 + }, + { + "epoch": 1.5145500640782372, + "grad_norm": 5.922418608861335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312290 + }, + { + "epoch": 1.5145985622710734, + "grad_norm": 1.3287919500726275e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312300 + }, + { + "epoch": 1.5146470604639095, + "grad_norm": 3.9528259776488994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312310 + }, + { + "epoch": 1.5146955586567454, + "grad_norm": 3.334076836836175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312320 + }, + { + "epoch": 1.5147440568495818, + "grad_norm": 6.70255985824042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312330 + }, + { + "epoch": 1.5147925550424177, + "grad_norm": 5.073204079053539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312340 + }, + { + "epoch": 1.5148410532352539, + "grad_norm": 3.8332902363436006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312350 + }, + { + "epoch": 1.51488955142809, + "grad_norm": 4.386533305478224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312360 + }, + { + "epoch": 1.514938049620926, + "grad_norm": 3.3397964216419496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312370 + }, + { + "epoch": 1.514986547813762, + "grad_norm": 3.0609854206886666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312380 + }, + { + "epoch": 1.5150350460065982, + "grad_norm": 8.766554628891754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312390 + }, + { + "epoch": 1.5150835441994341, + "grad_norm": 4.917720275443571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312400 + }, + { + "epoch": 1.5151320423922705, + "grad_norm": 5.462227363750571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312410 + }, + { + "epoch": 1.5151805405851064, + "grad_norm": 2.862392705083039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312420 + }, + { + "epoch": 1.5152290387779426, + "grad_norm": 3.938906445455359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312430 + }, + { + "epoch": 1.5152775369707787, + "grad_norm": 4.108943016944977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312440 + }, + { + "epoch": 1.5153260351636146, + "grad_norm": 4.786443241755478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312450 + }, + { + "epoch": 1.5153745333564508, + "grad_norm": 2.7841244332194037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312460 + }, + { + "epoch": 1.515423031549287, + "grad_norm": 7.160136874517775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312470 + }, + { + "epoch": 1.5154715297421228, + "grad_norm": 3.3857727999020426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312480 + }, + { + "epoch": 1.5155200279349592, + "grad_norm": 4.5603030685015256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312490 + }, + { + "epoch": 1.5155685261277951, + "grad_norm": 2.788291908473184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312500 + }, + { + "epoch": 1.5156170243206313, + "grad_norm": 3.0576146059502207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312510 + }, + { + "epoch": 1.5156655225134674, + "grad_norm": 5.379795879889571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312520 + }, + { + "epoch": 1.5157140207063033, + "grad_norm": 3.6407791981218907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312530 + }, + { + "epoch": 1.5157625188991395, + "grad_norm": 4.693220034823753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312540 + }, + { + "epoch": 1.5158110170919756, + "grad_norm": 3.0990878485681606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312550 + }, + { + "epoch": 1.5158595152848116, + "grad_norm": 3.0423760222220153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312560 + }, + { + "epoch": 1.515908013477648, + "grad_norm": 3.460123707554885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312570 + }, + { + "epoch": 1.5159565116704838, + "grad_norm": 2.79757813359538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312580 + }, + { + "epoch": 1.51600500986332, + "grad_norm": 3.4752133615256753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312590 + }, + { + "epoch": 1.5160535080561561, + "grad_norm": 3.1436854897037847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312600 + }, + { + "epoch": 1.516102006248992, + "grad_norm": 2.946426036487537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312610 + }, + { + "epoch": 1.5161505044418282, + "grad_norm": 3.0592747179980506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312620 + }, + { + "epoch": 1.5161990026346643, + "grad_norm": 3.238141061956412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312630 + }, + { + "epoch": 1.5162475008275003, + "grad_norm": 8.665476229907654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312640 + }, + { + "epoch": 1.5162959990203366, + "grad_norm": 3.3282302069892467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312650 + }, + { + "epoch": 1.5163444972131725, + "grad_norm": 2.7848068384628277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312660 + }, + { + "epoch": 1.5163929954060087, + "grad_norm": 6.626269168918952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312670 + }, + { + "epoch": 1.5164414935988448, + "grad_norm": 3.13483582203844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312680 + }, + { + "epoch": 1.5164899917916808, + "grad_norm": 3.458697221958573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312690 + }, + { + "epoch": 1.516538489984517, + "grad_norm": 2.8881274261038925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312700 + }, + { + "epoch": 1.516586988177353, + "grad_norm": 2.830233540862537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312710 + }, + { + "epoch": 1.516635486370189, + "grad_norm": 1.747505734783772e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312720 + }, + { + "epoch": 1.5166839845630253, + "grad_norm": 4.254322334418248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312730 + }, + { + "epoch": 1.5167324827558613, + "grad_norm": 3.1892585639070603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312740 + }, + { + "epoch": 1.5167809809486974, + "grad_norm": 2.500293589946523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312750 + }, + { + "epoch": 1.5168294791415335, + "grad_norm": 2.1656306614659115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312760 + }, + { + "epoch": 1.5168779773343695, + "grad_norm": 3.4258698633493623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312770 + }, + { + "epoch": 1.5169264755272058, + "grad_norm": 2.922227508861397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312780 + }, + { + "epoch": 1.5169749737200418, + "grad_norm": 4.331694754000637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312790 + }, + { + "epoch": 1.517023471912878, + "grad_norm": 2.863797021745995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312800 + }, + { + "epoch": 1.517071970105714, + "grad_norm": 3.532482537593751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312810 + }, + { + "epoch": 1.51712046829855, + "grad_norm": 2.549140845076181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312820 + }, + { + "epoch": 1.517168966491386, + "grad_norm": 4.150107031364314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312830 + }, + { + "epoch": 1.5172174646842223, + "grad_norm": 4.1368079450876394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312840 + }, + { + "epoch": 1.5172659628770582, + "grad_norm": 2.4881848048607935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312850 + }, + { + "epoch": 1.5173144610698945, + "grad_norm": 3.001814832259697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312860 + }, + { + "epoch": 1.5173629592627305, + "grad_norm": 3.4317164931962907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312870 + }, + { + "epoch": 1.5174114574555666, + "grad_norm": 2.487791732619371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312880 + }, + { + "epoch": 1.5174599556484027, + "grad_norm": 2.9811408808200213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312890 + }, + { + "epoch": 1.5175084538412387, + "grad_norm": 2.8931472684234905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312900 + }, + { + "epoch": 1.5175569520340748, + "grad_norm": 2.6934299057757016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312910 + }, + { + "epoch": 1.517605450226911, + "grad_norm": 3.950584641643218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312920 + }, + { + "epoch": 1.5176539484197469, + "grad_norm": 5.043335704613128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312930 + }, + { + "epoch": 1.5177024466125832, + "grad_norm": 2.3736345156066818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312940 + }, + { + "epoch": 1.5177509448054192, + "grad_norm": 3.5677561527336366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312950 + }, + { + "epoch": 1.5177994429982553, + "grad_norm": 2.455600736084307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312960 + }, + { + "epoch": 1.5178479411910915, + "grad_norm": 2.742897038388037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312970 + }, + { + "epoch": 1.5178964393839274, + "grad_norm": 4.1660476313154504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312980 + }, + { + "epoch": 1.5179449375767635, + "grad_norm": 3.4673257687245496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 312990 + }, + { + "epoch": 1.5179934357695997, + "grad_norm": 3.1187224180939666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313000 + }, + { + "epoch": 1.5180419339624356, + "grad_norm": 2.5131120651167294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313010 + }, + { + "epoch": 1.518090432155272, + "grad_norm": 2.836811177076015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313020 + }, + { + "epoch": 1.5181389303481079, + "grad_norm": 3.718812138231442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313030 + }, + { + "epoch": 1.518187428540944, + "grad_norm": 4.589419972944597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313040 + }, + { + "epoch": 1.5182359267337802, + "grad_norm": 1.561527597004897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313050 + }, + { + "epoch": 1.518284424926616, + "grad_norm": 5.055540555076732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313060 + }, + { + "epoch": 1.5183329231194522, + "grad_norm": 2.3664225068387168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313070 + }, + { + "epoch": 1.5183814213122884, + "grad_norm": 3.9139027308010554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313080 + }, + { + "epoch": 1.5184299195051243, + "grad_norm": 4.47977043904757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313090 + }, + { + "epoch": 1.5184784176979607, + "grad_norm": 3.3423054901504656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313100 + }, + { + "epoch": 1.5185269158907966, + "grad_norm": 3.1776514219927776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313110 + }, + { + "epoch": 1.5185754140836327, + "grad_norm": 2.3419023875703715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313120 + }, + { + "epoch": 1.5186239122764689, + "grad_norm": 3.694883048410702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313130 + }, + { + "epoch": 1.5186724104693048, + "grad_norm": 3.153927536914125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313140 + }, + { + "epoch": 1.518720908662141, + "grad_norm": 2.52946961154521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313150 + }, + { + "epoch": 1.518769406854977, + "grad_norm": 2.083791201812346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313160 + }, + { + "epoch": 1.518817905047813, + "grad_norm": 2.4028142320275947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313170 + }, + { + "epoch": 1.5188664032406494, + "grad_norm": 2.485392940343445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313180 + }, + { + "epoch": 1.5189149014334853, + "grad_norm": 3.240525643377623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313190 + }, + { + "epoch": 1.5189633996263214, + "grad_norm": 2.4519454200344626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313200 + }, + { + "epoch": 1.5190118978191576, + "grad_norm": 2.3141726046560507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313210 + }, + { + "epoch": 1.5190603960119935, + "grad_norm": 2.4498569928255165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313220 + }, + { + "epoch": 1.5191088942048296, + "grad_norm": 1.8805856427661638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313230 + }, + { + "epoch": 1.5191573923976658, + "grad_norm": 3.3204759120053495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313240 + }, + { + "epoch": 1.5192058905905017, + "grad_norm": 2.21166814640128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313250 + }, + { + "epoch": 1.519254388783338, + "grad_norm": 1.9697741038271488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313260 + }, + { + "epoch": 1.519302886976174, + "grad_norm": 2.472409619258542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313270 + }, + { + "epoch": 1.5193513851690101, + "grad_norm": 2.4763437522778986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313280 + }, + { + "epoch": 1.5193998833618463, + "grad_norm": 3.3475356531198486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313290 + }, + { + "epoch": 1.5194483815546822, + "grad_norm": 2.1815193917973374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313300 + }, + { + "epoch": 1.5194968797475186, + "grad_norm": 1.803611553441442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313310 + }, + { + "epoch": 1.5195453779403545, + "grad_norm": 3.114972173534625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313320 + }, + { + "epoch": 1.5195938761331906, + "grad_norm": 2.2368234908753948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313330 + }, + { + "epoch": 1.5196423743260268, + "grad_norm": 3.93843237134206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313340 + }, + { + "epoch": 1.5196908725188627, + "grad_norm": 2.738127591328521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313350 + }, + { + "epoch": 1.5197393707116988, + "grad_norm": 1.6535919939997257e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313360 + }, + { + "epoch": 1.519787868904535, + "grad_norm": 1.817429762240863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313370 + }, + { + "epoch": 1.519836367097371, + "grad_norm": 1.9828966912882606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313380 + }, + { + "epoch": 1.5198848652902073, + "grad_norm": 2.2932769638828177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313390 + }, + { + "epoch": 1.5199333634830432, + "grad_norm": 2.409824446658604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313400 + }, + { + "epoch": 1.5199818616758793, + "grad_norm": 2.190224819287323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313410 + }, + { + "epoch": 1.5200303598687155, + "grad_norm": 1.5496650007662538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313420 + }, + { + "epoch": 1.5200788580615514, + "grad_norm": 2.836742680756288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313430 + }, + { + "epoch": 1.5201273562543876, + "grad_norm": 2.727658170442737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313440 + }, + { + "epoch": 1.5201758544472237, + "grad_norm": 2.3334452237122605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313450 + }, + { + "epoch": 1.5202243526400596, + "grad_norm": 1.8927019596048922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313460 + }, + { + "epoch": 1.520272850832896, + "grad_norm": 3.122544285361073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313470 + }, + { + "epoch": 1.520321349025732, + "grad_norm": 1.658964947637287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313480 + }, + { + "epoch": 1.520369847218568, + "grad_norm": 3.143624098811415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313490 + }, + { + "epoch": 1.5204183454114042, + "grad_norm": 1.7917733430294902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313500 + }, + { + "epoch": 1.5204668436042401, + "grad_norm": 1.6971219451988873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313510 + }, + { + "epoch": 1.5205153417970763, + "grad_norm": 1.6348155895684613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313520 + }, + { + "epoch": 1.5205638399899124, + "grad_norm": 2.0053440152878466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313530 + }, + { + "epoch": 1.5206123381827483, + "grad_norm": 3.142939704048331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313540 + }, + { + "epoch": 1.5206608363755847, + "grad_norm": 3.498479372865404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313550 + }, + { + "epoch": 1.5207093345684206, + "grad_norm": 1.8627868314524676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313560 + }, + { + "epoch": 1.5207578327612568, + "grad_norm": 1.9802583040018362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313570 + }, + { + "epoch": 1.520806330954093, + "grad_norm": 1.7505635696579702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313580 + }, + { + "epoch": 1.5208548291469288, + "grad_norm": 2.4827693323459243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313590 + }, + { + "epoch": 1.520903327339765, + "grad_norm": 1.6646245626361633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313600 + }, + { + "epoch": 1.520951825532601, + "grad_norm": 1.6717515904929314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313610 + }, + { + "epoch": 1.521000323725437, + "grad_norm": 1.7303506183452555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313620 + }, + { + "epoch": 1.5210488219182734, + "grad_norm": 1.5369627703876176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313630 + }, + { + "epoch": 1.5210973201111093, + "grad_norm": 2.1355945989398606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313640 + }, + { + "epoch": 1.5211458183039455, + "grad_norm": 1.4702119699450122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313650 + }, + { + "epoch": 1.5211943164967816, + "grad_norm": 4.101634658582043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313660 + }, + { + "epoch": 1.5212428146896175, + "grad_norm": 1.5206504144771316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313670 + }, + { + "epoch": 1.5212913128824537, + "grad_norm": 2.0276807788377482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313680 + }, + { + "epoch": 1.5213398110752898, + "grad_norm": 2.282562689970291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313690 + }, + { + "epoch": 1.5213883092681257, + "grad_norm": 2.187284877663842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313700 + }, + { + "epoch": 1.521436807460962, + "grad_norm": 1.8692631442718266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313710 + }, + { + "epoch": 1.521485305653798, + "grad_norm": 0.5761693716049194, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 313720 + }, + { + "epoch": 1.5215338038466342, + "grad_norm": 0.08234293013811111, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 313730 + }, + { + "epoch": 1.5215823020394703, + "grad_norm": 9.044873877428472e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 313740 + }, + { + "epoch": 1.5216308002323062, + "grad_norm": 6.505568308057263e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313750 + }, + { + "epoch": 1.5216792984251424, + "grad_norm": 0.0001235373638337478, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 313760 + }, + { + "epoch": 1.5217277966179785, + "grad_norm": 0.00019523102673701942, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313770 + }, + { + "epoch": 1.5217762948108147, + "grad_norm": 4.995486506231828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313780 + }, + { + "epoch": 1.5218247930036508, + "grad_norm": 1.568062543810811e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313790 + }, + { + "epoch": 1.5218732911964867, + "grad_norm": 1.0791515023811371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313800 + }, + { + "epoch": 1.5219217893893229, + "grad_norm": 1.2068640899087768e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 313810 + }, + { + "epoch": 1.521970287582159, + "grad_norm": 5.3591102187056094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313820 + }, + { + "epoch": 1.522018785774995, + "grad_norm": 7.140478146538953e-07, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 313830 + }, + { + "epoch": 1.5220672839678313, + "grad_norm": 2.2286474631982855e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313840 + }, + { + "epoch": 1.5221157821606672, + "grad_norm": 4.3568368823798664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313850 + }, + { + "epoch": 1.5221642803535034, + "grad_norm": 6.01061310590012e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313860 + }, + { + "epoch": 1.5222127785463395, + "grad_norm": 1.8922639810625697e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313870 + }, + { + "epoch": 1.5222612767391754, + "grad_norm": 2.5024739898071857e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313880 + }, + { + "epoch": 1.5223097749320116, + "grad_norm": 4.365144377516117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313890 + }, + { + "epoch": 1.5223582731248477, + "grad_norm": 1.6294284250761848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313900 + }, + { + "epoch": 1.5224067713176836, + "grad_norm": 1.7602162643015618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313910 + }, + { + "epoch": 1.52245526951052, + "grad_norm": 1.6382359717681538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313920 + }, + { + "epoch": 1.522503767703356, + "grad_norm": 1.246574265678646e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 313930 + }, + { + "epoch": 1.522552265896192, + "grad_norm": 0.0001049886632245034, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313940 + }, + { + "epoch": 1.5226007640890282, + "grad_norm": 3.327567901578732e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313950 + }, + { + "epoch": 1.5226492622818641, + "grad_norm": 1.1315349183860235e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 313960 + }, + { + "epoch": 1.5226977604747003, + "grad_norm": 0.04592354968190193, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 313970 + }, + { + "epoch": 1.5227462586675364, + "grad_norm": 0.001049036392942071, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 313980 + }, + { + "epoch": 1.5227947568603724, + "grad_norm": 4.856355008087121e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 313990 + }, + { + "epoch": 1.5228432550532087, + "grad_norm": 1.0222760465694591e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314000 + }, + { + "epoch": 1.5228917532460446, + "grad_norm": 8.909834832593333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314010 + }, + { + "epoch": 1.5229402514388808, + "grad_norm": 3.201344588887878e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314020 + }, + { + "epoch": 1.522988749631717, + "grad_norm": 2.599163053673692e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314030 + }, + { + "epoch": 1.5230372478245529, + "grad_norm": 1.3575123375630938e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314040 + }, + { + "epoch": 1.523085746017389, + "grad_norm": 2.3543077986687422e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 314050 + }, + { + "epoch": 1.5231342442102251, + "grad_norm": 0.0006737990188412368, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314060 + }, + { + "epoch": 1.523182742403061, + "grad_norm": 0.017077088356018066, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314070 + }, + { + "epoch": 1.5232312405958974, + "grad_norm": 7.82151473686099e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 314080 + }, + { + "epoch": 1.5232797387887334, + "grad_norm": 6.843879964435473e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314090 + }, + { + "epoch": 1.5233282369815695, + "grad_norm": 9.849187335930765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314100 + }, + { + "epoch": 1.5233767351744056, + "grad_norm": 2.8921267585246824e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314110 + }, + { + "epoch": 1.5234252333672416, + "grad_norm": 1.500582311564358e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314120 + }, + { + "epoch": 1.5234737315600777, + "grad_norm": 9.291459718951955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314130 + }, + { + "epoch": 1.5235222297529138, + "grad_norm": 8.279605026473291e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314140 + }, + { + "epoch": 1.5235707279457498, + "grad_norm": 3.974297669628868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314150 + }, + { + "epoch": 1.5236192261385861, + "grad_norm": 3.6550868571794126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314160 + }, + { + "epoch": 1.523667724331422, + "grad_norm": 5.011444955016486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314170 + }, + { + "epoch": 1.5237162225242582, + "grad_norm": 6.219638180482434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314180 + }, + { + "epoch": 1.5237647207170943, + "grad_norm": 7.205534075183095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314190 + }, + { + "epoch": 1.5238132189099303, + "grad_norm": 3.7687464100599755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314200 + }, + { + "epoch": 1.5238617171027664, + "grad_norm": 3.37338474309945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314210 + }, + { + "epoch": 1.5239102152956026, + "grad_norm": 2.1649295831593918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314220 + }, + { + "epoch": 1.5239587134884385, + "grad_norm": 2.8568699690367794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314230 + }, + { + "epoch": 1.5240072116812748, + "grad_norm": 4.20904825659818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314240 + }, + { + "epoch": 1.5240557098741108, + "grad_norm": 2.082500941469334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314250 + }, + { + "epoch": 1.524104208066947, + "grad_norm": 2.3048035018291557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314260 + }, + { + "epoch": 1.524152706259783, + "grad_norm": 1.6873526647032122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314270 + }, + { + "epoch": 1.524201204452619, + "grad_norm": 2.5074882614717353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314280 + }, + { + "epoch": 1.5242497026454553, + "grad_norm": 2.9526370326493634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314290 + }, + { + "epoch": 1.5242982008382913, + "grad_norm": 2.4020960154302884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314300 + }, + { + "epoch": 1.5243466990311274, + "grad_norm": 4.652902134694159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314310 + }, + { + "epoch": 1.5243951972239635, + "grad_norm": 1.4558935390596162e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314320 + }, + { + "epoch": 1.5244436954167995, + "grad_norm": 3.899701823684154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314330 + }, + { + "epoch": 1.5244921936096356, + "grad_norm": 3.5495565953169717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314340 + }, + { + "epoch": 1.5245406918024718, + "grad_norm": 1.861288524196425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314350 + }, + { + "epoch": 1.5245891899953077, + "grad_norm": 2.277751718793297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314360 + }, + { + "epoch": 1.524637688188144, + "grad_norm": 3.3005608202074654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314370 + }, + { + "epoch": 1.52468618638098, + "grad_norm": 3.4523468457337003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314380 + }, + { + "epoch": 1.5247346845738161, + "grad_norm": 4.333830020186724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314390 + }, + { + "epoch": 1.5247831827666523, + "grad_norm": 1.6267630371658015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314400 + }, + { + "epoch": 1.5248316809594882, + "grad_norm": 5.2208802117093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314410 + }, + { + "epoch": 1.5248801791523243, + "grad_norm": 2.3360630621027667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314420 + }, + { + "epoch": 1.5249286773451605, + "grad_norm": 3.1182016755337827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314430 + }, + { + "epoch": 1.5249771755379964, + "grad_norm": 0.0016735929530113935, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 314440 + }, + { + "epoch": 1.5250256737308328, + "grad_norm": 0.0002016878133872524, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314450 + }, + { + "epoch": 1.5250741719236687, + "grad_norm": 0.00010830460087163374, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314460 + }, + { + "epoch": 1.5251226701165048, + "grad_norm": 7.884895603638142e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314470 + }, + { + "epoch": 1.525171168309341, + "grad_norm": 3.000280776177533e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314480 + }, + { + "epoch": 1.5252196665021769, + "grad_norm": 3.685175033751875e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314490 + }, + { + "epoch": 1.525268164695013, + "grad_norm": 1.566665741847828e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314500 + }, + { + "epoch": 1.5253166628878492, + "grad_norm": 1.3059197044640314e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314510 + }, + { + "epoch": 1.525365161080685, + "grad_norm": 1.11333738459507e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314520 + }, + { + "epoch": 1.5254136592735215, + "grad_norm": 1.3298927115101833e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314530 + }, + { + "epoch": 1.5254621574663574, + "grad_norm": 1.7036047211149707e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314540 + }, + { + "epoch": 1.5255106556591935, + "grad_norm": 7.704215022386052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314550 + }, + { + "epoch": 1.5255591538520297, + "grad_norm": 0.08475937694311142, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 314560 + }, + { + "epoch": 1.5256076520448656, + "grad_norm": 0.0005811837618239224, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314570 + }, + { + "epoch": 1.5256561502377017, + "grad_norm": 0.0001276377879548818, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314580 + }, + { + "epoch": 1.5257046484305379, + "grad_norm": 3.736687358468771e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314590 + }, + { + "epoch": 1.5257531466233738, + "grad_norm": 1.4897449545969721e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314600 + }, + { + "epoch": 1.5258016448162102, + "grad_norm": 1.0978439604514278e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314610 + }, + { + "epoch": 1.525850143009046, + "grad_norm": 8.684207386977505e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314620 + }, + { + "epoch": 1.5258986412018822, + "grad_norm": 1.0084782843478024e-05, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 314630 + }, + { + "epoch": 1.5259471393947184, + "grad_norm": 0.08902926743030548, + "learning_rate": 0.0002, + "loss": 0.0056, + "step": 314640 + }, + { + "epoch": 1.5259956375875543, + "grad_norm": 0.037484098225831985, + "learning_rate": 0.0002, + "loss": 0.0021, + "step": 314650 + }, + { + "epoch": 1.5260441357803904, + "grad_norm": 0.05059441179037094, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 314660 + }, + { + "epoch": 1.5260926339732266, + "grad_norm": 0.00043909341911785305, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 314670 + }, + { + "epoch": 1.5261411321660625, + "grad_norm": 0.02306508645415306, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 314680 + }, + { + "epoch": 1.5261896303588989, + "grad_norm": 0.10054275393486023, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 314690 + }, + { + "epoch": 1.5262381285517348, + "grad_norm": 0.28696683049201965, + "learning_rate": 0.0002, + "loss": 0.0289, + "step": 314700 + }, + { + "epoch": 1.526286626744571, + "grad_norm": 0.015011902898550034, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 314710 + }, + { + "epoch": 1.526335124937407, + "grad_norm": 0.0826103463768959, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 314720 + }, + { + "epoch": 1.526383623130243, + "grad_norm": 0.0014864722033962607, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314730 + }, + { + "epoch": 1.5264321213230791, + "grad_norm": 4.968307985109277e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314740 + }, + { + "epoch": 1.5264806195159153, + "grad_norm": 2.8690355975413695e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314750 + }, + { + "epoch": 1.5265291177087512, + "grad_norm": 2.3919028535601683e-05, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 314760 + }, + { + "epoch": 1.5265776159015876, + "grad_norm": 5.958323527011089e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 314770 + }, + { + "epoch": 1.5266261140944235, + "grad_norm": 0.00012219793279655278, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314780 + }, + { + "epoch": 1.5266746122872596, + "grad_norm": 0.0001272188383154571, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314790 + }, + { + "epoch": 1.5267231104800958, + "grad_norm": 0.0003646512923296541, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314800 + }, + { + "epoch": 1.5267716086729317, + "grad_norm": 7.063159864628688e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 314810 + }, + { + "epoch": 1.526820106865768, + "grad_norm": 0.0002727190440054983, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 314820 + }, + { + "epoch": 1.526868605058604, + "grad_norm": 0.00010108916467288509, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 314830 + }, + { + "epoch": 1.5269171032514401, + "grad_norm": 0.0005603375029750168, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 314840 + }, + { + "epoch": 1.5269656014442763, + "grad_norm": 0.00029471024754457176, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314850 + }, + { + "epoch": 1.5270140996371122, + "grad_norm": 0.00030038997647352517, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314860 + }, + { + "epoch": 1.5270625978299484, + "grad_norm": 9.451628284296021e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314870 + }, + { + "epoch": 1.5271110960227845, + "grad_norm": 0.003220484359189868, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 314880 + }, + { + "epoch": 1.5271595942156204, + "grad_norm": 0.00042643211781978607, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 314890 + }, + { + "epoch": 1.5272080924084568, + "grad_norm": 0.00011734791041817516, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314900 + }, + { + "epoch": 1.5272565906012927, + "grad_norm": 5.378275454859249e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314910 + }, + { + "epoch": 1.5273050887941289, + "grad_norm": 3.6075311072636396e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314920 + }, + { + "epoch": 1.527353586986965, + "grad_norm": 7.314215326914564e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 314930 + }, + { + "epoch": 1.527402085179801, + "grad_norm": 0.0001577414950588718, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 314940 + }, + { + "epoch": 1.527450583372637, + "grad_norm": 0.00010599029337754473, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314950 + }, + { + "epoch": 1.5274990815654732, + "grad_norm": 9.323774429503828e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314960 + }, + { + "epoch": 1.5275475797583091, + "grad_norm": 3.976956577389501e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314970 + }, + { + "epoch": 1.5275960779511455, + "grad_norm": 3.5211025533499196e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314980 + }, + { + "epoch": 1.5276445761439814, + "grad_norm": 3.7864014302613214e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 314990 + }, + { + "epoch": 1.5276930743368176, + "grad_norm": 1.7636824850342236e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315000 + }, + { + "epoch": 1.5277415725296537, + "grad_norm": 3.1155545002548024e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315010 + }, + { + "epoch": 1.5277900707224896, + "grad_norm": 2.4590575776528567e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315020 + }, + { + "epoch": 1.5278385689153258, + "grad_norm": 6.520465103676543e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315030 + }, + { + "epoch": 1.527887067108162, + "grad_norm": 2.0482662876020186e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315040 + }, + { + "epoch": 1.5279355653009978, + "grad_norm": 2.1344229025999084e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315050 + }, + { + "epoch": 1.5279840634938342, + "grad_norm": 1.067838184098946e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315060 + }, + { + "epoch": 1.5280325616866701, + "grad_norm": 2.5024841306731105e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315070 + }, + { + "epoch": 1.5280810598795063, + "grad_norm": 1.7878815924632363e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315080 + }, + { + "epoch": 1.5281295580723424, + "grad_norm": 1.2711893759842496e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315090 + }, + { + "epoch": 1.5281780562651783, + "grad_norm": 9.616186616767664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315100 + }, + { + "epoch": 1.5282265544580145, + "grad_norm": 1.7514501450932585e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315110 + }, + { + "epoch": 1.5282750526508506, + "grad_norm": 1.185148812510306e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315120 + }, + { + "epoch": 1.5283235508436865, + "grad_norm": 1.0520830073801335e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315130 + }, + { + "epoch": 1.528372049036523, + "grad_norm": 9.829145710682496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315140 + }, + { + "epoch": 1.5284205472293588, + "grad_norm": 1.4115805242909119e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315150 + }, + { + "epoch": 1.528469045422195, + "grad_norm": 5.589670763583854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315160 + }, + { + "epoch": 1.5285175436150311, + "grad_norm": 6.454794402088737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315170 + }, + { + "epoch": 1.528566041807867, + "grad_norm": 1.0294756066286936e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315180 + }, + { + "epoch": 1.5286145400007032, + "grad_norm": 9.110285645874683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315190 + }, + { + "epoch": 1.5286630381935393, + "grad_norm": 6.9561824602715205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315200 + }, + { + "epoch": 1.5287115363863752, + "grad_norm": 5.777491878689034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315210 + }, + { + "epoch": 1.5287600345792116, + "grad_norm": 6.768498678866308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315220 + }, + { + "epoch": 1.5288085327720475, + "grad_norm": 7.282909791683778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315230 + }, + { + "epoch": 1.5288570309648837, + "grad_norm": 7.376474059128668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315240 + }, + { + "epoch": 1.5289055291577198, + "grad_norm": 7.003022346907528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315250 + }, + { + "epoch": 1.5289540273505557, + "grad_norm": 4.635808181774337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315260 + }, + { + "epoch": 1.5290025255433919, + "grad_norm": 7.808403097442351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315270 + }, + { + "epoch": 1.529051023736228, + "grad_norm": 4.708857886726037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315280 + }, + { + "epoch": 1.529099521929064, + "grad_norm": 5.049652372690616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315290 + }, + { + "epoch": 1.5291480201219003, + "grad_norm": 4.477902166399872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315300 + }, + { + "epoch": 1.5291965183147362, + "grad_norm": 9.90990793070523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315310 + }, + { + "epoch": 1.5292450165075724, + "grad_norm": 1.5662766600144096e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315320 + }, + { + "epoch": 1.5292935147004085, + "grad_norm": 3.695633722600178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315330 + }, + { + "epoch": 1.5293420128932445, + "grad_norm": 9.257811143470462e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315340 + }, + { + "epoch": 1.5293905110860808, + "grad_norm": 1.0793277397169732e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315350 + }, + { + "epoch": 1.5294390092789167, + "grad_norm": 0.001424482325091958, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315360 + }, + { + "epoch": 1.5294875074717529, + "grad_norm": 4.340391569712665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315370 + }, + { + "epoch": 1.529536005664589, + "grad_norm": 2.9713310141232796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315380 + }, + { + "epoch": 1.529584503857425, + "grad_norm": 3.564038479453302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315390 + }, + { + "epoch": 1.529633002050261, + "grad_norm": 3.3533456189616118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315400 + }, + { + "epoch": 1.5296815002430972, + "grad_norm": 2.841285549948225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315410 + }, + { + "epoch": 1.5297299984359332, + "grad_norm": 2.4268015295092482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315420 + }, + { + "epoch": 1.5297784966287695, + "grad_norm": 6.11770010436885e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315430 + }, + { + "epoch": 1.5298269948216054, + "grad_norm": 5.195806807023473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315440 + }, + { + "epoch": 1.5298754930144416, + "grad_norm": 3.3983433240791783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315450 + }, + { + "epoch": 1.5299239912072777, + "grad_norm": 0.004357027821242809, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 315460 + }, + { + "epoch": 1.5299724894001137, + "grad_norm": 3.8008165574865416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315470 + }, + { + "epoch": 1.5300209875929498, + "grad_norm": 6.069560186006129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315480 + }, + { + "epoch": 1.530069485785786, + "grad_norm": 4.558631644613342e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315490 + }, + { + "epoch": 1.5301179839786219, + "grad_norm": 4.460846867004875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315500 + }, + { + "epoch": 1.5301664821714582, + "grad_norm": 3.2824395930219907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315510 + }, + { + "epoch": 1.5302149803642942, + "grad_norm": 3.588630761441891e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315520 + }, + { + "epoch": 1.5302634785571303, + "grad_norm": 3.6710173390019918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315530 + }, + { + "epoch": 1.5303119767499664, + "grad_norm": 5.919447175983805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315540 + }, + { + "epoch": 1.5303604749428024, + "grad_norm": 2.4862522423063638e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315550 + }, + { + "epoch": 1.5304089731356385, + "grad_norm": 2.6684513159125345e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315560 + }, + { + "epoch": 1.5304574713284747, + "grad_norm": 3.904911409335909e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315570 + }, + { + "epoch": 1.5305059695213106, + "grad_norm": 4.194402663415531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315580 + }, + { + "epoch": 1.530554467714147, + "grad_norm": 4.273438207746949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315590 + }, + { + "epoch": 1.5306029659069829, + "grad_norm": 5.422282356448704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315600 + }, + { + "epoch": 1.530651464099819, + "grad_norm": 4.765351150126662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315610 + }, + { + "epoch": 1.5306999622926551, + "grad_norm": 2.3242841962201055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315620 + }, + { + "epoch": 1.530748460485491, + "grad_norm": 2.9031257327005733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315630 + }, + { + "epoch": 1.5307969586783272, + "grad_norm": 4.0415366129309405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315640 + }, + { + "epoch": 1.5308454568711634, + "grad_norm": 2.1228206605883315e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315650 + }, + { + "epoch": 1.5308939550639993, + "grad_norm": 2.0099910216231365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315660 + }, + { + "epoch": 1.5309424532568356, + "grad_norm": 1.8091535594066954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315670 + }, + { + "epoch": 1.5309909514496716, + "grad_norm": 1.9113122107228264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315680 + }, + { + "epoch": 1.5310394496425077, + "grad_norm": 4.104502295376733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315690 + }, + { + "epoch": 1.5310879478353439, + "grad_norm": 4.109360816073604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315700 + }, + { + "epoch": 1.5311364460281798, + "grad_norm": 4.145229468122125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315710 + }, + { + "epoch": 1.531184944221016, + "grad_norm": 2.7434950879978715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315720 + }, + { + "epoch": 1.531233442413852, + "grad_norm": 1.8626178643899038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315730 + }, + { + "epoch": 1.531281940606688, + "grad_norm": 5.400900590757374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315740 + }, + { + "epoch": 1.5313304387995244, + "grad_norm": 2.6439215616846923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315750 + }, + { + "epoch": 1.5313789369923603, + "grad_norm": 2.1461191863636486e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 315760 + }, + { + "epoch": 1.5314274351851964, + "grad_norm": 2.65312337432988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315770 + }, + { + "epoch": 1.5314759333780326, + "grad_norm": 2.123696503986139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315780 + }, + { + "epoch": 1.5315244315708685, + "grad_norm": 3.966014901379822e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315790 + }, + { + "epoch": 1.5315729297637046, + "grad_norm": 2.231786083939369e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315800 + }, + { + "epoch": 1.5316214279565408, + "grad_norm": 9.028033673530445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315810 + }, + { + "epoch": 1.5316699261493767, + "grad_norm": 1.4054717212275136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315820 + }, + { + "epoch": 1.531718424342213, + "grad_norm": 1.3586878594651353e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315830 + }, + { + "epoch": 1.531766922535049, + "grad_norm": 4.54671862826217e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315840 + }, + { + "epoch": 1.5318154207278851, + "grad_norm": 1.9835090370179387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315850 + }, + { + "epoch": 1.5318639189207213, + "grad_norm": 3.84107534046052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315860 + }, + { + "epoch": 1.5319124171135572, + "grad_norm": 1.402652515025693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315870 + }, + { + "epoch": 1.5319609153063936, + "grad_norm": 1.836535716392973e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315880 + }, + { + "epoch": 1.5320094134992295, + "grad_norm": 2.6269037789461436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315890 + }, + { + "epoch": 1.5320579116920656, + "grad_norm": 2.013145149248885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315900 + }, + { + "epoch": 1.5321064098849018, + "grad_norm": 2.7768116979132174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315910 + }, + { + "epoch": 1.5321549080777377, + "grad_norm": 2.8355752874631435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315920 + }, + { + "epoch": 1.5322034062705738, + "grad_norm": 3.2217922125710174e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315930 + }, + { + "epoch": 1.53225190446341, + "grad_norm": 2.340488208574243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315940 + }, + { + "epoch": 1.532300402656246, + "grad_norm": 1.9908134163415525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315950 + }, + { + "epoch": 1.5323489008490823, + "grad_norm": 1.5232125178954448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315960 + }, + { + "epoch": 1.5323973990419182, + "grad_norm": 3.013287141584442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315970 + }, + { + "epoch": 1.5324458972347543, + "grad_norm": 1.346104340882448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315980 + }, + { + "epoch": 1.5324943954275905, + "grad_norm": 5.741303084505489e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 315990 + }, + { + "epoch": 1.5325428936204264, + "grad_norm": 1.9780457023443887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316000 + }, + { + "epoch": 1.5325913918132625, + "grad_norm": 1.063102558873652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316010 + }, + { + "epoch": 1.5326398900060987, + "grad_norm": 1.321483409810753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316020 + }, + { + "epoch": 1.5326883881989346, + "grad_norm": 1.6138847058755346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316030 + }, + { + "epoch": 1.532736886391771, + "grad_norm": 1.662523573031649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316040 + }, + { + "epoch": 1.532785384584607, + "grad_norm": 1.8606274352350738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316050 + }, + { + "epoch": 1.532833882777443, + "grad_norm": 1.256542191185872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316060 + }, + { + "epoch": 1.5328823809702792, + "grad_norm": 4.839483608520823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316070 + }, + { + "epoch": 1.532930879163115, + "grad_norm": 1.7976211665882147e-06, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 316080 + }, + { + "epoch": 1.5329793773559512, + "grad_norm": 4.085793534613913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316090 + }, + { + "epoch": 1.5330278755487874, + "grad_norm": 1.7535632650833577e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 316100 + }, + { + "epoch": 1.5330763737416233, + "grad_norm": 7.283163995452924e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 316110 + }, + { + "epoch": 1.5331248719344597, + "grad_norm": 1.6435351426480338e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316120 + }, + { + "epoch": 1.5331733701272956, + "grad_norm": 1.4954630387364887e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316130 + }, + { + "epoch": 1.5332218683201317, + "grad_norm": 1.7081505575333722e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316140 + }, + { + "epoch": 1.5332703665129679, + "grad_norm": 1.142620931204874e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316150 + }, + { + "epoch": 1.5333188647058038, + "grad_norm": 1.141165012086276e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316160 + }, + { + "epoch": 1.53336736289864, + "grad_norm": 5.488372698891908e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316170 + }, + { + "epoch": 1.533415861091476, + "grad_norm": 8.381749466934707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316180 + }, + { + "epoch": 1.533464359284312, + "grad_norm": 8.754083864914719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316190 + }, + { + "epoch": 1.5335128574771484, + "grad_norm": 8.059406354732346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316200 + }, + { + "epoch": 1.5335613556699843, + "grad_norm": 7.240221748361364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316210 + }, + { + "epoch": 1.5336098538628204, + "grad_norm": 9.250708899344318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316220 + }, + { + "epoch": 1.5336583520556566, + "grad_norm": 7.130774974939413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316230 + }, + { + "epoch": 1.5337068502484925, + "grad_norm": 1.3187530385039281e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316240 + }, + { + "epoch": 1.5337553484413287, + "grad_norm": 8.227802027249709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316250 + }, + { + "epoch": 1.5338038466341648, + "grad_norm": 3.880280473822495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316260 + }, + { + "epoch": 1.5338523448270007, + "grad_norm": 5.915660040045623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316270 + }, + { + "epoch": 1.533900843019837, + "grad_norm": 4.3794739212899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316280 + }, + { + "epoch": 1.533949341212673, + "grad_norm": 5.3010330702818464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316290 + }, + { + "epoch": 1.5339978394055092, + "grad_norm": 5.896921720704995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316300 + }, + { + "epoch": 1.5340463375983453, + "grad_norm": 3.406353471291368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316310 + }, + { + "epoch": 1.5340948357911812, + "grad_norm": 5.930211045779288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316320 + }, + { + "epoch": 1.5341433339840174, + "grad_norm": 1.4582487892766949e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316330 + }, + { + "epoch": 1.5341918321768535, + "grad_norm": 9.428580597159453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316340 + }, + { + "epoch": 1.5342403303696897, + "grad_norm": 2.9751490728813224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316350 + }, + { + "epoch": 1.5342888285625258, + "grad_norm": 3.3749402064131573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316360 + }, + { + "epoch": 1.5343373267553617, + "grad_norm": 2.272462097607786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316370 + }, + { + "epoch": 1.5343858249481979, + "grad_norm": 3.389757239347091e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316380 + }, + { + "epoch": 1.534434323141034, + "grad_norm": 2.569953949205228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316390 + }, + { + "epoch": 1.53448282133387, + "grad_norm": 5.66513472222141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316400 + }, + { + "epoch": 1.5345313195267063, + "grad_norm": 3.6601688861992443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316410 + }, + { + "epoch": 1.5345798177195422, + "grad_norm": 2.8283486699365312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316420 + }, + { + "epoch": 1.5346283159123784, + "grad_norm": 2.6378459097031737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316430 + }, + { + "epoch": 1.5346768141052145, + "grad_norm": 3.930605544155696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316440 + }, + { + "epoch": 1.5347253122980504, + "grad_norm": 4.273495505913161e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316450 + }, + { + "epoch": 1.5347738104908866, + "grad_norm": 2.2006890958436998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316460 + }, + { + "epoch": 1.5348223086837227, + "grad_norm": 2.17450360651128e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316470 + }, + { + "epoch": 1.5348708068765586, + "grad_norm": 2.1452701730595436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316480 + }, + { + "epoch": 1.534919305069395, + "grad_norm": 2.0075808606634382e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316490 + }, + { + "epoch": 1.534967803262231, + "grad_norm": 2.01229363483435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316500 + }, + { + "epoch": 1.535016301455067, + "grad_norm": 2.0121733541600406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316510 + }, + { + "epoch": 1.5350647996479032, + "grad_norm": 2.749743089225376e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316520 + }, + { + "epoch": 1.5351132978407391, + "grad_norm": 1.7216909782291623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316530 + }, + { + "epoch": 1.5351617960335753, + "grad_norm": 9.833430340222549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316540 + }, + { + "epoch": 1.5352102942264114, + "grad_norm": 1.8138671293854713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316550 + }, + { + "epoch": 1.5352587924192473, + "grad_norm": 2.0722445697174408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316560 + }, + { + "epoch": 1.5353072906120837, + "grad_norm": 2.9900472782173892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316570 + }, + { + "epoch": 1.5353557888049196, + "grad_norm": 2.130601387761999e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316580 + }, + { + "epoch": 1.5354042869977558, + "grad_norm": 3.3216197152796667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316590 + }, + { + "epoch": 1.535452785190592, + "grad_norm": 1.9951239664806053e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316600 + }, + { + "epoch": 1.5355012833834278, + "grad_norm": 1.6635111705909367e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316610 + }, + { + "epoch": 1.535549781576264, + "grad_norm": 2.8944537007191684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316620 + }, + { + "epoch": 1.5355982797691001, + "grad_norm": 1.969646746147191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316630 + }, + { + "epoch": 1.535646777961936, + "grad_norm": 1.9991655335616088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316640 + }, + { + "epoch": 1.5356952761547724, + "grad_norm": 1.9185504243068863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316650 + }, + { + "epoch": 1.5357437743476083, + "grad_norm": 1.5463593854292412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316660 + }, + { + "epoch": 1.5357922725404445, + "grad_norm": 2.018014356508502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316670 + }, + { + "epoch": 1.5358407707332806, + "grad_norm": 1.8147219407183002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316680 + }, + { + "epoch": 1.5358892689261165, + "grad_norm": 1.7170210639960715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316690 + }, + { + "epoch": 1.5359377671189527, + "grad_norm": 1.6689475614839466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316700 + }, + { + "epoch": 1.5359862653117888, + "grad_norm": 3.074484311582637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316710 + }, + { + "epoch": 1.5360347635046248, + "grad_norm": 1.3488586318999296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316720 + }, + { + "epoch": 1.5360832616974611, + "grad_norm": 1.3461487924359972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316730 + }, + { + "epoch": 1.536131759890297, + "grad_norm": 1.7325870658169151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316740 + }, + { + "epoch": 1.5361802580831332, + "grad_norm": 1.2543646334961522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316750 + }, + { + "epoch": 1.5362287562759693, + "grad_norm": 1.6170663457160117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316760 + }, + { + "epoch": 1.5362772544688053, + "grad_norm": 1.4893089428369422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316770 + }, + { + "epoch": 1.5363257526616414, + "grad_norm": 1.5904962538115797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316780 + }, + { + "epoch": 1.5363742508544775, + "grad_norm": 1.5875427834544098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316790 + }, + { + "epoch": 1.5364227490473135, + "grad_norm": 2.034758381341817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316800 + }, + { + "epoch": 1.5364712472401498, + "grad_norm": 1.8286998511030106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316810 + }, + { + "epoch": 1.5365197454329858, + "grad_norm": 1.0181549896515207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316820 + }, + { + "epoch": 1.536568243625822, + "grad_norm": 1.4133534023130778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316830 + }, + { + "epoch": 1.536616741818658, + "grad_norm": 2.2780227482144255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316840 + }, + { + "epoch": 1.536665240011494, + "grad_norm": 1.486245764681371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316850 + }, + { + "epoch": 1.5367137382043303, + "grad_norm": 1.1672592563627404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316860 + }, + { + "epoch": 1.5367622363971662, + "grad_norm": 2.5203537461493397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316870 + }, + { + "epoch": 1.5368107345900024, + "grad_norm": 1.0591169257168076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316880 + }, + { + "epoch": 1.5368592327828385, + "grad_norm": 1.2566895293275593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316890 + }, + { + "epoch": 1.5369077309756745, + "grad_norm": 1.1584124877117574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316900 + }, + { + "epoch": 1.5369562291685106, + "grad_norm": 9.54854954215989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316910 + }, + { + "epoch": 1.5370047273613467, + "grad_norm": 1.081681034520443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316920 + }, + { + "epoch": 1.5370532255541827, + "grad_norm": 1.2005889402644243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316930 + }, + { + "epoch": 1.537101723747019, + "grad_norm": 7.71990671637468e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316940 + }, + { + "epoch": 1.537150221939855, + "grad_norm": 1.0338508218410425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316950 + }, + { + "epoch": 1.537198720132691, + "grad_norm": 1.2938099871462327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316960 + }, + { + "epoch": 1.5372472183255272, + "grad_norm": 9.599627901479835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316970 + }, + { + "epoch": 1.5372957165183632, + "grad_norm": 1.2458020819394733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316980 + }, + { + "epoch": 1.5373442147111993, + "grad_norm": 1.336869217993808e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 316990 + }, + { + "epoch": 1.5373927129040355, + "grad_norm": 8.814325269668188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317000 + }, + { + "epoch": 1.5374412110968714, + "grad_norm": 1.1057055644414504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317010 + }, + { + "epoch": 1.5374897092897077, + "grad_norm": 9.880278639684548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317020 + }, + { + "epoch": 1.5375382074825437, + "grad_norm": 9.796995072974823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317030 + }, + { + "epoch": 1.5375867056753798, + "grad_norm": 1.0744256542238872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317040 + }, + { + "epoch": 1.537635203868216, + "grad_norm": 8.744368074076192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317050 + }, + { + "epoch": 1.5376837020610519, + "grad_norm": 1.1721094779204577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317060 + }, + { + "epoch": 1.537732200253888, + "grad_norm": 9.076610467673163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317070 + }, + { + "epoch": 1.5377806984467242, + "grad_norm": 8.787942533672322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317080 + }, + { + "epoch": 1.53782919663956, + "grad_norm": 1.0454059520270675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317090 + }, + { + "epoch": 1.5378776948323964, + "grad_norm": 1.2038900649713469e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317100 + }, + { + "epoch": 1.5379261930252324, + "grad_norm": 8.464326128887478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317110 + }, + { + "epoch": 1.5379746912180685, + "grad_norm": 1.0292702654624009e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317120 + }, + { + "epoch": 1.5380231894109047, + "grad_norm": 1.1406077646824997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317130 + }, + { + "epoch": 1.5380716876037406, + "grad_norm": 8.397936994697375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317140 + }, + { + "epoch": 1.5381201857965767, + "grad_norm": 1.1086476661148481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317150 + }, + { + "epoch": 1.5381686839894129, + "grad_norm": 8.932080959311861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317160 + }, + { + "epoch": 1.5382171821822488, + "grad_norm": 8.206417874134786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317170 + }, + { + "epoch": 1.5382656803750852, + "grad_norm": 1.0468482969372417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317180 + }, + { + "epoch": 1.538314178567921, + "grad_norm": 7.866487976571079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317190 + }, + { + "epoch": 1.5383626767607572, + "grad_norm": 8.829307489577332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317200 + }, + { + "epoch": 1.5384111749535934, + "grad_norm": 1.00869635843992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317210 + }, + { + "epoch": 1.5384596731464293, + "grad_norm": 7.70256747273379e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317220 + }, + { + "epoch": 1.5385081713392654, + "grad_norm": 7.783189630572451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317230 + }, + { + "epoch": 1.5385566695321016, + "grad_norm": 9.396805467076774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317240 + }, + { + "epoch": 1.5386051677249375, + "grad_norm": 1.1783381523855496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317250 + }, + { + "epoch": 1.5386536659177739, + "grad_norm": 8.247972118624602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317260 + }, + { + "epoch": 1.5387021641106098, + "grad_norm": 7.716153049841523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317270 + }, + { + "epoch": 1.538750662303446, + "grad_norm": 9.700511327537242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317280 + }, + { + "epoch": 1.538799160496282, + "grad_norm": 1.7205984477186576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317290 + }, + { + "epoch": 1.538847658689118, + "grad_norm": 7.912166211099247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317300 + }, + { + "epoch": 1.5388961568819541, + "grad_norm": 8.26313225843478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317310 + }, + { + "epoch": 1.5389446550747903, + "grad_norm": 7.572239724140672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317320 + }, + { + "epoch": 1.5389931532676262, + "grad_norm": 7.729290700808633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317330 + }, + { + "epoch": 1.5390416514604626, + "grad_norm": 1.1108201078968705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317340 + }, + { + "epoch": 1.5390901496532985, + "grad_norm": 9.071109161595814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317350 + }, + { + "epoch": 1.5391386478461346, + "grad_norm": 8.645953926134098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317360 + }, + { + "epoch": 1.5391871460389708, + "grad_norm": 6.781013439649541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317370 + }, + { + "epoch": 1.5392356442318067, + "grad_norm": 1.1022010539818439e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317380 + }, + { + "epoch": 1.539284142424643, + "grad_norm": 7.538966997344687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317390 + }, + { + "epoch": 1.539332640617479, + "grad_norm": 7.53353447180416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317400 + }, + { + "epoch": 1.5393811388103151, + "grad_norm": 7.861218591642682e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317410 + }, + { + "epoch": 1.5394296370031513, + "grad_norm": 9.097851147998881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317420 + }, + { + "epoch": 1.5394781351959872, + "grad_norm": 7.258502705553838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317430 + }, + { + "epoch": 1.5395266333888233, + "grad_norm": 1.6792445194369066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317440 + }, + { + "epoch": 1.5395751315816595, + "grad_norm": 8.25603876819514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317450 + }, + { + "epoch": 1.5396236297744954, + "grad_norm": 7.291608881132561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317460 + }, + { + "epoch": 1.5396721279673318, + "grad_norm": 6.881610374875891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317470 + }, + { + "epoch": 1.5397206261601677, + "grad_norm": 6.672381687167217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317480 + }, + { + "epoch": 1.5397691243530038, + "grad_norm": 7.066669240884949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317490 + }, + { + "epoch": 1.53981762254584, + "grad_norm": 6.350844046210113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317500 + }, + { + "epoch": 1.539866120738676, + "grad_norm": 7.28971031094261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317510 + }, + { + "epoch": 1.539914618931512, + "grad_norm": 6.536619707731006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317520 + }, + { + "epoch": 1.5399631171243482, + "grad_norm": 6.3747808098924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317530 + }, + { + "epoch": 1.5400116153171841, + "grad_norm": 7.538735644629924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317540 + }, + { + "epoch": 1.5400601135100205, + "grad_norm": 1.517111627435952e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317550 + }, + { + "epoch": 1.5401086117028564, + "grad_norm": 8.824122801343037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317560 + }, + { + "epoch": 1.5401571098956925, + "grad_norm": 6.043720190973545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317570 + }, + { + "epoch": 1.5402056080885287, + "grad_norm": 1.0955784546240466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317580 + }, + { + "epoch": 1.5402541062813646, + "grad_norm": 6.303975510491e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317590 + }, + { + "epoch": 1.5403026044742008, + "grad_norm": 5.999005452395068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317600 + }, + { + "epoch": 1.540351102667037, + "grad_norm": 6.645858547926764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317610 + }, + { + "epoch": 1.5403996008598728, + "grad_norm": 5.192775915929815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317620 + }, + { + "epoch": 1.5404480990527092, + "grad_norm": 7.242115316330455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317630 + }, + { + "epoch": 1.540496597245545, + "grad_norm": 5.128264319864684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317640 + }, + { + "epoch": 1.5405450954383813, + "grad_norm": 5.380020979828259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317650 + }, + { + "epoch": 1.5405935936312174, + "grad_norm": 1.33533126245311e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317660 + }, + { + "epoch": 1.5406420918240533, + "grad_norm": 6.538872980854649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317670 + }, + { + "epoch": 1.5406905900168895, + "grad_norm": 6.93648473770736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317680 + }, + { + "epoch": 1.5407390882097256, + "grad_norm": 7.183946877376002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317690 + }, + { + "epoch": 1.5407875864025615, + "grad_norm": 5.493824914992729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317700 + }, + { + "epoch": 1.540836084595398, + "grad_norm": 6.991583063609141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317710 + }, + { + "epoch": 1.5408845827882338, + "grad_norm": 6.687295694973727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317720 + }, + { + "epoch": 1.54093308098107, + "grad_norm": 4.964484787706169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317730 + }, + { + "epoch": 1.540981579173906, + "grad_norm": 5.270301244308939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317740 + }, + { + "epoch": 1.541030077366742, + "grad_norm": 4.924567065245355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317750 + }, + { + "epoch": 1.5410785755595782, + "grad_norm": 1.004507566904067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317760 + }, + { + "epoch": 1.5411270737524143, + "grad_norm": 4.721965183307475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317770 + }, + { + "epoch": 1.5411755719452502, + "grad_norm": 3.9637023974137264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317780 + }, + { + "epoch": 1.5412240701380866, + "grad_norm": 4.541858800166665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317790 + }, + { + "epoch": 1.5412725683309225, + "grad_norm": 5.349739922166918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317800 + }, + { + "epoch": 1.5413210665237587, + "grad_norm": 4.669524855671625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317810 + }, + { + "epoch": 1.5413695647165948, + "grad_norm": 4.961220270160993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317820 + }, + { + "epoch": 1.5414180629094307, + "grad_norm": 4.4090725737078174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317830 + }, + { + "epoch": 1.5414665611022669, + "grad_norm": 4.3814560513055767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317840 + }, + { + "epoch": 1.541515059295103, + "grad_norm": 1.0823500815604348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317850 + }, + { + "epoch": 1.541563557487939, + "grad_norm": 4.915514182357583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317860 + }, + { + "epoch": 1.5416120556807753, + "grad_norm": 6.42115651316999e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317870 + }, + { + "epoch": 1.5416605538736112, + "grad_norm": 4.827890052183648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317880 + }, + { + "epoch": 1.5417090520664474, + "grad_norm": 9.804867886487045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317890 + }, + { + "epoch": 1.5417575502592835, + "grad_norm": 3.9396641682287736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317900 + }, + { + "epoch": 1.5418060484521194, + "grad_norm": 5.165240395399451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317910 + }, + { + "epoch": 1.5418545466449558, + "grad_norm": 4.813078930965276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317920 + }, + { + "epoch": 1.5419030448377917, + "grad_norm": 4.848897106057848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317930 + }, + { + "epoch": 1.5419515430306279, + "grad_norm": 9.379698440170614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317940 + }, + { + "epoch": 1.542000041223464, + "grad_norm": 5.133967420078989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317950 + }, + { + "epoch": 1.5420485394163, + "grad_norm": 4.6286706378850795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317960 + }, + { + "epoch": 1.542097037609136, + "grad_norm": 3.832699917438731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317970 + }, + { + "epoch": 1.5421455358019722, + "grad_norm": 4.878386903328646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317980 + }, + { + "epoch": 1.5421940339948081, + "grad_norm": 3.332787628096412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 317990 + }, + { + "epoch": 1.5422425321876445, + "grad_norm": 4.107085260329768e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318000 + }, + { + "epoch": 1.5422910303804804, + "grad_norm": 3.4188104791610385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318010 + }, + { + "epoch": 1.5423395285733166, + "grad_norm": 3.793741996105382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318020 + }, + { + "epoch": 1.5423880267661527, + "grad_norm": 3.832206232345925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318030 + }, + { + "epoch": 1.5424365249589886, + "grad_norm": 3.875307186262944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318040 + }, + { + "epoch": 1.5424850231518248, + "grad_norm": 3.860429842461599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318050 + }, + { + "epoch": 1.542533521344661, + "grad_norm": 5.051488187746145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318060 + }, + { + "epoch": 1.5425820195374969, + "grad_norm": 3.510777730753034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318070 + }, + { + "epoch": 1.5426305177303332, + "grad_norm": 4.3589366782725847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318080 + }, + { + "epoch": 1.5426790159231691, + "grad_norm": 4.4955990574635507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318090 + }, + { + "epoch": 1.5427275141160053, + "grad_norm": 1.6702259017620236e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318100 + }, + { + "epoch": 1.5427760123088414, + "grad_norm": 4.509184918788378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318110 + }, + { + "epoch": 1.5428245105016773, + "grad_norm": 3.401196693175734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318120 + }, + { + "epoch": 1.5428730086945135, + "grad_norm": 3.9010603813949274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318130 + }, + { + "epoch": 1.5429215068873496, + "grad_norm": 3.550555049969262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318140 + }, + { + "epoch": 1.5429700050801856, + "grad_norm": 3.3643328833932173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318150 + }, + { + "epoch": 1.543018503273022, + "grad_norm": 6.94489358465944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318160 + }, + { + "epoch": 1.5430670014658578, + "grad_norm": 9.95819277704868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318170 + }, + { + "epoch": 1.543115499658694, + "grad_norm": 3.0008607154741185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318180 + }, + { + "epoch": 1.5431639978515301, + "grad_norm": 4.799584871761908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318190 + }, + { + "epoch": 1.543212496044366, + "grad_norm": 3.6273235082262545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318200 + }, + { + "epoch": 1.5432609942372022, + "grad_norm": 3.778569066525961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318210 + }, + { + "epoch": 1.5433094924300383, + "grad_norm": 7.37074117296288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318220 + }, + { + "epoch": 1.5433579906228743, + "grad_norm": 4.434207312442595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318230 + }, + { + "epoch": 1.5434064888157106, + "grad_norm": 3.9220270764417364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318240 + }, + { + "epoch": 1.5434549870085466, + "grad_norm": 3.18174045332853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318250 + }, + { + "epoch": 1.5435034852013827, + "grad_norm": 4.325497116042243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318260 + }, + { + "epoch": 1.5435519833942188, + "grad_norm": 3.343572245739779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318270 + }, + { + "epoch": 1.5436004815870548, + "grad_norm": 3.1531726563116536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318280 + }, + { + "epoch": 1.543648979779891, + "grad_norm": 4.556563339974673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318290 + }, + { + "epoch": 1.543697477972727, + "grad_norm": 4.002883997600293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318300 + }, + { + "epoch": 1.543745976165563, + "grad_norm": 5.61858882974775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318310 + }, + { + "epoch": 1.5437944743583993, + "grad_norm": 3.948604216930107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318320 + }, + { + "epoch": 1.5438429725512353, + "grad_norm": 3.310831289127236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318330 + }, + { + "epoch": 1.5438914707440714, + "grad_norm": 3.8708054717062623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318340 + }, + { + "epoch": 1.5439399689369075, + "grad_norm": 3.066487295200204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318350 + }, + { + "epoch": 1.5439884671297435, + "grad_norm": 3.113731850135082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318360 + }, + { + "epoch": 1.5440369653225796, + "grad_norm": 2.973307857701002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318370 + }, + { + "epoch": 1.5440854635154158, + "grad_norm": 2.3815321981146553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318380 + }, + { + "epoch": 1.544133961708252, + "grad_norm": 2.928847777639021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318390 + }, + { + "epoch": 1.544182459901088, + "grad_norm": 1.2252800161149935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318400 + }, + { + "epoch": 1.544230958093924, + "grad_norm": 4.4144272237645055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318410 + }, + { + "epoch": 1.54427945628676, + "grad_norm": 2.9392040801212715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318420 + }, + { + "epoch": 1.5443279544795963, + "grad_norm": 2.642566698796145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318430 + }, + { + "epoch": 1.5443764526724322, + "grad_norm": 3.36335375550334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318440 + }, + { + "epoch": 1.5444249508652685, + "grad_norm": 7.536314114986453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318450 + }, + { + "epoch": 1.5444734490581045, + "grad_norm": 3.057459707633825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318460 + }, + { + "epoch": 1.5445219472509406, + "grad_norm": 3.246578330617922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318470 + }, + { + "epoch": 1.5445704454437768, + "grad_norm": 3.4902640777545457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318480 + }, + { + "epoch": 1.5446189436366127, + "grad_norm": 2.4703311396478966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318490 + }, + { + "epoch": 1.5446674418294488, + "grad_norm": 3.6769782241208304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318500 + }, + { + "epoch": 1.544715940022285, + "grad_norm": 6.979605586820981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318510 + }, + { + "epoch": 1.5447644382151209, + "grad_norm": 3.666582699679566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318520 + }, + { + "epoch": 1.5448129364079572, + "grad_norm": 5.999919494570349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318530 + }, + { + "epoch": 1.5448614346007932, + "grad_norm": 2.899072626405541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318540 + }, + { + "epoch": 1.5449099327936293, + "grad_norm": 2.7242154487794323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318550 + }, + { + "epoch": 1.5449584309864655, + "grad_norm": 3.935191443815711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318560 + }, + { + "epoch": 1.5450069291793014, + "grad_norm": 5.470357109516044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318570 + }, + { + "epoch": 1.5450554273721375, + "grad_norm": 2.5350408350277576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318580 + }, + { + "epoch": 1.5451039255649737, + "grad_norm": 3.090580662501452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318590 + }, + { + "epoch": 1.5451524237578096, + "grad_norm": 2.676417807379039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318600 + }, + { + "epoch": 1.545200921950646, + "grad_norm": 2.8455247047531884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318610 + }, + { + "epoch": 1.5452494201434819, + "grad_norm": 3.1552769996778807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318620 + }, + { + "epoch": 1.545297918336318, + "grad_norm": 2.7212590225644817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318630 + }, + { + "epoch": 1.5453464165291542, + "grad_norm": 2.684307673916919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318640 + }, + { + "epoch": 1.54539491472199, + "grad_norm": 2.857188690086332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318650 + }, + { + "epoch": 1.5454434129148262, + "grad_norm": 2.4528043240934494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318660 + }, + { + "epoch": 1.5454919111076624, + "grad_norm": 3.077612973356736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318670 + }, + { + "epoch": 1.5455404093004983, + "grad_norm": 2.1797788463118195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318680 + }, + { + "epoch": 1.5455889074933347, + "grad_norm": 2.9346441010602575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318690 + }, + { + "epoch": 1.5456374056861706, + "grad_norm": 2.15200159914275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318700 + }, + { + "epoch": 1.5456859038790067, + "grad_norm": 3.4431920425959106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318710 + }, + { + "epoch": 1.5457344020718429, + "grad_norm": 2.0797236288672138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318720 + }, + { + "epoch": 1.5457829002646788, + "grad_norm": 2.431694099414017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318730 + }, + { + "epoch": 1.545831398457515, + "grad_norm": 4.030379727737454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318740 + }, + { + "epoch": 1.545879896650351, + "grad_norm": 4.869626195613819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318750 + }, + { + "epoch": 1.545928394843187, + "grad_norm": 2.180622544756261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318760 + }, + { + "epoch": 1.5459768930360234, + "grad_norm": 2.067845201736418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318770 + }, + { + "epoch": 1.5460253912288593, + "grad_norm": 2.1348213863348064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318780 + }, + { + "epoch": 1.5460738894216954, + "grad_norm": 2.8846329769294243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318790 + }, + { + "epoch": 1.5461223876145316, + "grad_norm": 2.6845930278796004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318800 + }, + { + "epoch": 1.5461708858073675, + "grad_norm": 2.3622204992079787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318810 + }, + { + "epoch": 1.5462193840002036, + "grad_norm": 2.0053998639468773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318820 + }, + { + "epoch": 1.5462678821930398, + "grad_norm": 2.3051718756050832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318830 + }, + { + "epoch": 1.5463163803858757, + "grad_norm": 2.5453084617765853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318840 + }, + { + "epoch": 1.546364878578712, + "grad_norm": 2.709999478156533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318850 + }, + { + "epoch": 1.546413376771548, + "grad_norm": 1.963733637921905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318860 + }, + { + "epoch": 1.5464618749643841, + "grad_norm": 4.6095240691101935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318870 + }, + { + "epoch": 1.5465103731572203, + "grad_norm": 2.1963452923046134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318880 + }, + { + "epoch": 1.5465588713500562, + "grad_norm": 4.0370122178501333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318890 + }, + { + "epoch": 1.5466073695428926, + "grad_norm": 1.8002495494329196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318900 + }, + { + "epoch": 1.5466558677357285, + "grad_norm": 2.077221239460414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318910 + }, + { + "epoch": 1.5467043659285646, + "grad_norm": 1.9441787912910513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318920 + }, + { + "epoch": 1.5467528641214008, + "grad_norm": 1.900684907241157e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318930 + }, + { + "epoch": 1.5468013623142367, + "grad_norm": 2.6489996685086226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318940 + }, + { + "epoch": 1.5468498605070728, + "grad_norm": 1.8183062877596967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318950 + }, + { + "epoch": 1.546898358699909, + "grad_norm": 2.453673175750737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318960 + }, + { + "epoch": 1.546946856892745, + "grad_norm": 1.8976380999902176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318970 + }, + { + "epoch": 1.5469953550855813, + "grad_norm": 2.2101279739672464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318980 + }, + { + "epoch": 1.5470438532784172, + "grad_norm": 2.0743790685173735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 318990 + }, + { + "epoch": 1.5470923514712533, + "grad_norm": 1.8078992525261128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319000 + }, + { + "epoch": 1.5471408496640895, + "grad_norm": 1.656189567711408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319010 + }, + { + "epoch": 1.5471893478569254, + "grad_norm": 2.1626559032483783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319020 + }, + { + "epoch": 1.5472378460497616, + "grad_norm": 3.17501019253541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319030 + }, + { + "epoch": 1.5472863442425977, + "grad_norm": 2.750186638422747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319040 + }, + { + "epoch": 1.5473348424354336, + "grad_norm": 2.0511645004717138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319050 + }, + { + "epoch": 1.54738334062827, + "grad_norm": 3.1788582077751926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319060 + }, + { + "epoch": 1.547431838821106, + "grad_norm": 2.763923703241744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319070 + }, + { + "epoch": 1.547480337013942, + "grad_norm": 2.0745208928474312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319080 + }, + { + "epoch": 1.5475288352067782, + "grad_norm": 3.4512433444433555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319090 + }, + { + "epoch": 1.5475773333996141, + "grad_norm": 1.840775354366997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319100 + }, + { + "epoch": 1.5476258315924503, + "grad_norm": 1.7078947678328404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319110 + }, + { + "epoch": 1.5476743297852864, + "grad_norm": 1.9389607075481763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319120 + }, + { + "epoch": 1.5477228279781223, + "grad_norm": 2.418458393549372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319130 + }, + { + "epoch": 1.5477713261709587, + "grad_norm": 1.8724598760400113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319140 + }, + { + "epoch": 1.5478198243637946, + "grad_norm": 1.6863263851973898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319150 + }, + { + "epoch": 1.5478683225566308, + "grad_norm": 1.5176480872014508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319160 + }, + { + "epoch": 1.547916820749467, + "grad_norm": 1.4748654564300523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319170 + }, + { + "epoch": 1.5479653189423028, + "grad_norm": 1.6431214078238554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319180 + }, + { + "epoch": 1.548013817135139, + "grad_norm": 1.905980724359324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319190 + }, + { + "epoch": 1.5480623153279751, + "grad_norm": 1.806774747592499e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319200 + }, + { + "epoch": 1.548110813520811, + "grad_norm": 1.9361367264991713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319210 + }, + { + "epoch": 1.5481593117136474, + "grad_norm": 2.590840608718281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319220 + }, + { + "epoch": 1.5482078099064833, + "grad_norm": 1.7527334250644344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319230 + }, + { + "epoch": 1.5482563080993195, + "grad_norm": 1.9142385099257808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319240 + }, + { + "epoch": 1.5483048062921556, + "grad_norm": 1.708876880002208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319250 + }, + { + "epoch": 1.5483533044849915, + "grad_norm": 2.5411523552065773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319260 + }, + { + "epoch": 1.5484018026778277, + "grad_norm": 2.525159459310089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319270 + }, + { + "epoch": 1.5484503008706638, + "grad_norm": 1.4877754495046247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319280 + }, + { + "epoch": 1.5484987990634997, + "grad_norm": 2.2236376651108003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319290 + }, + { + "epoch": 1.548547297256336, + "grad_norm": 1.48440008729267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319300 + }, + { + "epoch": 1.548595795449172, + "grad_norm": 1.6459296148241265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319310 + }, + { + "epoch": 1.5486442936420082, + "grad_norm": 1.65888096148592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319320 + }, + { + "epoch": 1.5486927918348443, + "grad_norm": 1.8764973219731473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319330 + }, + { + "epoch": 1.5487412900276802, + "grad_norm": 1.833986829069545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319340 + }, + { + "epoch": 1.5487897882205164, + "grad_norm": 1.8129891543594567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319350 + }, + { + "epoch": 1.5488382864133525, + "grad_norm": 1.4650044022346265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319360 + }, + { + "epoch": 1.5488867846061884, + "grad_norm": 1.2935345239384333e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319370 + }, + { + "epoch": 1.5489352827990248, + "grad_norm": 1.476311268788777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319380 + }, + { + "epoch": 1.5489837809918607, + "grad_norm": 1.783457150850154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319390 + }, + { + "epoch": 1.5490322791846969, + "grad_norm": 1.466744521394503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319400 + }, + { + "epoch": 1.549080777377533, + "grad_norm": 2.736928479407652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319410 + }, + { + "epoch": 1.549129275570369, + "grad_norm": 1.7486605941030575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319420 + }, + { + "epoch": 1.5491777737632053, + "grad_norm": 1.483433464954942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319430 + }, + { + "epoch": 1.5492262719560412, + "grad_norm": 1.8252056577239273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319440 + }, + { + "epoch": 1.5492747701488774, + "grad_norm": 1.7835968435520044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319450 + }, + { + "epoch": 1.5493232683417135, + "grad_norm": 1.3270754095628945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319460 + }, + { + "epoch": 1.5493717665345494, + "grad_norm": 1.5109731066331733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319470 + }, + { + "epoch": 1.5494202647273856, + "grad_norm": 1.8501819454286306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319480 + }, + { + "epoch": 1.5494687629202217, + "grad_norm": 1.9995910349734913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319490 + }, + { + "epoch": 1.5495172611130577, + "grad_norm": 2.1286321327806945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319500 + }, + { + "epoch": 1.549565759305894, + "grad_norm": 1.3220422090398642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319510 + }, + { + "epoch": 1.54961425749873, + "grad_norm": 1.6195757268633315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319520 + }, + { + "epoch": 1.549662755691566, + "grad_norm": 1.1424324242170769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319530 + }, + { + "epoch": 1.5497112538844022, + "grad_norm": 1.8021995629169396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319540 + }, + { + "epoch": 1.5497597520772382, + "grad_norm": 1.1640113228850169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319550 + }, + { + "epoch": 1.5498082502700743, + "grad_norm": 1.6603515007318492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319560 + }, + { + "epoch": 1.5498567484629104, + "grad_norm": 1.2472969501686748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319570 + }, + { + "epoch": 1.5499052466557464, + "grad_norm": 2.1061845245640143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319580 + }, + { + "epoch": 1.5499537448485827, + "grad_norm": 1.608967181709886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319590 + }, + { + "epoch": 1.5500022430414186, + "grad_norm": 2.216103780483536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319600 + }, + { + "epoch": 1.5500507412342548, + "grad_norm": 1.3917018293341243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319610 + }, + { + "epoch": 1.550099239427091, + "grad_norm": 1.9088609803930012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319620 + }, + { + "epoch": 1.5501477376199269, + "grad_norm": 1.6518058032488625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319630 + }, + { + "epoch": 1.550196235812763, + "grad_norm": 2.5714101070661854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319640 + }, + { + "epoch": 1.5502447340055991, + "grad_norm": 1.3462162939958944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319650 + }, + { + "epoch": 1.550293232198435, + "grad_norm": 2.553186675413599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319660 + }, + { + "epoch": 1.5503417303912714, + "grad_norm": 1.534149447479649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319670 + }, + { + "epoch": 1.5503902285841074, + "grad_norm": 1.4405938486561354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319680 + }, + { + "epoch": 1.5504387267769435, + "grad_norm": 2.545596942127304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319690 + }, + { + "epoch": 1.5504872249697796, + "grad_norm": 1.2425410034211382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319700 + }, + { + "epoch": 1.5505357231626156, + "grad_norm": 1.209326541129485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319710 + }, + { + "epoch": 1.5505842213554517, + "grad_norm": 1.3999550674270722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319720 + }, + { + "epoch": 1.5506327195482879, + "grad_norm": 2.087209622914088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319730 + }, + { + "epoch": 1.5506812177411238, + "grad_norm": 1.2918155789520824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319740 + }, + { + "epoch": 1.5507297159339601, + "grad_norm": 1.2515175740190898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319750 + }, + { + "epoch": 1.550778214126796, + "grad_norm": 1.4949645787964982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319760 + }, + { + "epoch": 1.5508267123196322, + "grad_norm": 1.3601885484604281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319770 + }, + { + "epoch": 1.5508752105124683, + "grad_norm": 1.286307309555923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319780 + }, + { + "epoch": 1.5509237087053043, + "grad_norm": 1.6043739492488385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319790 + }, + { + "epoch": 1.5509722068981404, + "grad_norm": 1.220956278302765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319800 + }, + { + "epoch": 1.5510207050909766, + "grad_norm": 1.1141044353735197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319810 + }, + { + "epoch": 1.5510692032838125, + "grad_norm": 2.6141410103264207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319820 + }, + { + "epoch": 1.5511177014766488, + "grad_norm": 2.692593739084259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319830 + }, + { + "epoch": 1.5511661996694848, + "grad_norm": 4.361954495379905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319840 + }, + { + "epoch": 1.551214697862321, + "grad_norm": 1.5214965287668747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319850 + }, + { + "epoch": 1.551263196055157, + "grad_norm": 1.2366130874852388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319860 + }, + { + "epoch": 1.551311694247993, + "grad_norm": 1.246321232883929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319870 + }, + { + "epoch": 1.5513601924408291, + "grad_norm": 1.3228726913894207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319880 + }, + { + "epoch": 1.5514086906336653, + "grad_norm": 1.630587149747953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319890 + }, + { + "epoch": 1.5514571888265012, + "grad_norm": 2.4260197051262367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319900 + }, + { + "epoch": 1.5515056870193376, + "grad_norm": 2.6140483555536775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319910 + }, + { + "epoch": 1.5515541852121735, + "grad_norm": 1.4371322265560593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319920 + }, + { + "epoch": 1.5516026834050096, + "grad_norm": 1.1757023798963928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319930 + }, + { + "epoch": 1.5516511815978458, + "grad_norm": 2.3409927507600514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319940 + }, + { + "epoch": 1.5516996797906817, + "grad_norm": 1.1721563453193085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319950 + }, + { + "epoch": 1.551748177983518, + "grad_norm": 1.53440538497307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319960 + }, + { + "epoch": 1.551796676176354, + "grad_norm": 1.1602559624179776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319970 + }, + { + "epoch": 1.5518451743691901, + "grad_norm": 1.0351440238309806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319980 + }, + { + "epoch": 1.5518936725620263, + "grad_norm": 1.4535260106640635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 319990 + }, + { + "epoch": 1.5519421707548622, + "grad_norm": 2.555286471306317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320000 + }, + { + "epoch": 1.5519906689476983, + "grad_norm": 1.1696385371351425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320010 + }, + { + "epoch": 1.5520391671405345, + "grad_norm": 1.4952706806070637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320020 + }, + { + "epoch": 1.5520876653333704, + "grad_norm": 1.1594190141295257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320030 + }, + { + "epoch": 1.5521361635262068, + "grad_norm": 2.046014913048566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320040 + }, + { + "epoch": 1.5521846617190427, + "grad_norm": 1.453939688644823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320050 + }, + { + "epoch": 1.5522331599118788, + "grad_norm": 9.990104388180043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320060 + }, + { + "epoch": 1.552281658104715, + "grad_norm": 1.1860463189350412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320070 + }, + { + "epoch": 1.552330156297551, + "grad_norm": 1.1054815729494294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320080 + }, + { + "epoch": 1.552378654490387, + "grad_norm": 2.325837442640477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320090 + }, + { + "epoch": 1.5524271526832232, + "grad_norm": 1.3855910196980403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320100 + }, + { + "epoch": 1.552475650876059, + "grad_norm": 1.172284953554481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320110 + }, + { + "epoch": 1.5525241490688955, + "grad_norm": 1.1681719058742601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320120 + }, + { + "epoch": 1.5525726472617314, + "grad_norm": 1.2744622779337078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320130 + }, + { + "epoch": 1.5526211454545675, + "grad_norm": 1.7276357766604633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320140 + }, + { + "epoch": 1.5526696436474037, + "grad_norm": 9.911769893733435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320150 + }, + { + "epoch": 1.5527181418402396, + "grad_norm": 1.9618637736584787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320160 + }, + { + "epoch": 1.5527666400330757, + "grad_norm": 1.3939835241671972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320170 + }, + { + "epoch": 1.5528151382259119, + "grad_norm": 1.0265868155556745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320180 + }, + { + "epoch": 1.5528636364187478, + "grad_norm": 1.8318941386041843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320190 + }, + { + "epoch": 1.5529121346115842, + "grad_norm": 1.1180139836142189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320200 + }, + { + "epoch": 1.55296063280442, + "grad_norm": 1.048487732191461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320210 + }, + { + "epoch": 1.5530091309972562, + "grad_norm": 1.1197806770724128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320220 + }, + { + "epoch": 1.5530576291900924, + "grad_norm": 9.903016717771607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320230 + }, + { + "epoch": 1.5531061273829283, + "grad_norm": 1.2881427835509385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320240 + }, + { + "epoch": 1.5531546255757644, + "grad_norm": 2.3832367901377438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320250 + }, + { + "epoch": 1.5532031237686006, + "grad_norm": 1.1098862984226798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320260 + }, + { + "epoch": 1.5532516219614365, + "grad_norm": 1.0115588366943484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320270 + }, + { + "epoch": 1.5533001201542729, + "grad_norm": 1.1118323328673796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320280 + }, + { + "epoch": 1.5533486183471088, + "grad_norm": 1.2665225312957773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320290 + }, + { + "epoch": 1.553397116539945, + "grad_norm": 1.1243677278116593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320300 + }, + { + "epoch": 1.553445614732781, + "grad_norm": 1.0788956217311352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320310 + }, + { + "epoch": 1.553494112925617, + "grad_norm": 1.0268385608469544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320320 + }, + { + "epoch": 1.5535426111184532, + "grad_norm": 9.650115373460721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320330 + }, + { + "epoch": 1.5535911093112893, + "grad_norm": 1.209446907068923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320340 + }, + { + "epoch": 1.5536396075041252, + "grad_norm": 1.2564416351779073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320350 + }, + { + "epoch": 1.5536881056969616, + "grad_norm": 9.874485584759896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320360 + }, + { + "epoch": 1.5537366038897975, + "grad_norm": 1.3214915384196502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320370 + }, + { + "epoch": 1.5537851020826337, + "grad_norm": 1.729500525016192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320380 + }, + { + "epoch": 1.5538336002754698, + "grad_norm": 1.0407811856794069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320390 + }, + { + "epoch": 1.5538820984683057, + "grad_norm": 1.9034780507354299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320400 + }, + { + "epoch": 1.5539305966611419, + "grad_norm": 1.0597153732305742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320410 + }, + { + "epoch": 1.553979094853978, + "grad_norm": 1.0901320735001718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320420 + }, + { + "epoch": 1.5540275930468141, + "grad_norm": 1.102431923527547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320430 + }, + { + "epoch": 1.5540760912396503, + "grad_norm": 2.6824551468962454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320440 + }, + { + "epoch": 1.5541245894324862, + "grad_norm": 0.007311089430004358, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 320450 + }, + { + "epoch": 1.5541730876253224, + "grad_norm": 2.1578675841738004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320460 + }, + { + "epoch": 1.5542215858181585, + "grad_norm": 0.15347212553024292, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 320470 + }, + { + "epoch": 1.5542700840109944, + "grad_norm": 0.0004095789627172053, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320480 + }, + { + "epoch": 1.5543185822038308, + "grad_norm": 0.00026336859446018934, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 320490 + }, + { + "epoch": 1.5543670803966667, + "grad_norm": 0.42723560333251953, + "learning_rate": 0.0002, + "loss": 0.0057, + "step": 320500 + }, + { + "epoch": 1.5544155785895029, + "grad_norm": 0.0010176206706091762, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 320510 + }, + { + "epoch": 1.554464076782339, + "grad_norm": 0.0009524459019303322, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 320520 + }, + { + "epoch": 1.554512574975175, + "grad_norm": 0.00023025478003546596, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 320530 + }, + { + "epoch": 1.554561073168011, + "grad_norm": 0.014521819539368153, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 320540 + }, + { + "epoch": 1.5546095713608472, + "grad_norm": 0.14934584498405457, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 320550 + }, + { + "epoch": 1.5546580695536831, + "grad_norm": 5.7050456234719604e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320560 + }, + { + "epoch": 1.5547065677465195, + "grad_norm": 0.08585260808467865, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 320570 + }, + { + "epoch": 1.5547550659393554, + "grad_norm": 0.0007559476071037352, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 320580 + }, + { + "epoch": 1.5548035641321916, + "grad_norm": 0.00014294513675849885, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 320590 + }, + { + "epoch": 1.5548520623250277, + "grad_norm": 4.098988574696705e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320600 + }, + { + "epoch": 1.5549005605178636, + "grad_norm": 0.002007711911574006, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 320610 + }, + { + "epoch": 1.5549490587106998, + "grad_norm": 3.21051811624784e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 320620 + }, + { + "epoch": 1.554997556903536, + "grad_norm": 2.8850567105109803e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320630 + }, + { + "epoch": 1.5550460550963718, + "grad_norm": 3.3829921449068934e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320640 + }, + { + "epoch": 1.5550945532892082, + "grad_norm": 2.1313097022357397e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320650 + }, + { + "epoch": 1.5551430514820441, + "grad_norm": 0.00015081724268384278, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320660 + }, + { + "epoch": 1.5551915496748803, + "grad_norm": 2.414787741145119e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320670 + }, + { + "epoch": 1.5552400478677164, + "grad_norm": 1.6752239389461465e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320680 + }, + { + "epoch": 1.5552885460605523, + "grad_norm": 2.1928526621195488e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320690 + }, + { + "epoch": 1.5553370442533885, + "grad_norm": 1.5471952792722732e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320700 + }, + { + "epoch": 1.5553855424462246, + "grad_norm": 1.2311062164371833e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320710 + }, + { + "epoch": 1.5554340406390605, + "grad_norm": 1.9844605049001984e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320720 + }, + { + "epoch": 1.555482538831897, + "grad_norm": 1.207754758070223e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320730 + }, + { + "epoch": 1.5555310370247328, + "grad_norm": 1.8858947441913188e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320740 + }, + { + "epoch": 1.555579535217569, + "grad_norm": 1.0382394066255074e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320750 + }, + { + "epoch": 1.5556280334104051, + "grad_norm": 1.04558675957378e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320760 + }, + { + "epoch": 1.555676531603241, + "grad_norm": 1.0760201803350355e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320770 + }, + { + "epoch": 1.5557250297960772, + "grad_norm": 9.744334420247469e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320780 + }, + { + "epoch": 1.5557735279889133, + "grad_norm": 1.3144289368938189e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320790 + }, + { + "epoch": 1.5558220261817493, + "grad_norm": 1.2651088582060765e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320800 + }, + { + "epoch": 1.5558705243745856, + "grad_norm": 1.3008676432946231e-05, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 320810 + }, + { + "epoch": 1.5559190225674215, + "grad_norm": 4.085618274984881e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320820 + }, + { + "epoch": 1.5559675207602577, + "grad_norm": 8.218656876124442e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320830 + }, + { + "epoch": 1.5560160189530938, + "grad_norm": 0.00010772854875540361, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320840 + }, + { + "epoch": 1.5560645171459297, + "grad_norm": 0.00014362932415679097, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320850 + }, + { + "epoch": 1.556113015338766, + "grad_norm": 2.9996530429343693e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320860 + }, + { + "epoch": 1.556161513531602, + "grad_norm": 2.9202639780123718e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320870 + }, + { + "epoch": 1.556210011724438, + "grad_norm": 3.03709148283815e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320880 + }, + { + "epoch": 1.5562585099172743, + "grad_norm": 2.802126073220279e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320890 + }, + { + "epoch": 1.5563070081101102, + "grad_norm": 1.8926459233625792e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320900 + }, + { + "epoch": 1.5563555063029464, + "grad_norm": 1.4107360584603157e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320910 + }, + { + "epoch": 1.5564040044957825, + "grad_norm": 1.148357114288956e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320920 + }, + { + "epoch": 1.5564525026886185, + "grad_norm": 1.2829774277633987e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320930 + }, + { + "epoch": 1.5565010008814546, + "grad_norm": 1.571803295519203e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320940 + }, + { + "epoch": 1.5565494990742907, + "grad_norm": 1.0533015483815689e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320950 + }, + { + "epoch": 1.5565979972671269, + "grad_norm": 9.750322533363942e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320960 + }, + { + "epoch": 1.556646495459963, + "grad_norm": 8.563459232391324e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320970 + }, + { + "epoch": 1.556694993652799, + "grad_norm": 1.59362443810096e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320980 + }, + { + "epoch": 1.556743491845635, + "grad_norm": 1.0960524377878755e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 320990 + }, + { + "epoch": 1.5567919900384712, + "grad_norm": 8.749286280362867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321000 + }, + { + "epoch": 1.5568404882313072, + "grad_norm": 6.659947757725604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321010 + }, + { + "epoch": 1.5568889864241435, + "grad_norm": 1.0015628504334018e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321020 + }, + { + "epoch": 1.5569374846169795, + "grad_norm": 8.876482752384618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321030 + }, + { + "epoch": 1.5569859828098156, + "grad_norm": 1.068752408173168e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321040 + }, + { + "epoch": 1.5570344810026517, + "grad_norm": 8.318359505210537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321050 + }, + { + "epoch": 1.5570829791954877, + "grad_norm": 5.485789642989403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321060 + }, + { + "epoch": 1.5571314773883238, + "grad_norm": 4.890241598332068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321070 + }, + { + "epoch": 1.55717997558116, + "grad_norm": 6.4237879087158944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321080 + }, + { + "epoch": 1.5572284737739959, + "grad_norm": 8.31419492897112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321090 + }, + { + "epoch": 1.5572769719668322, + "grad_norm": 5.173136742087081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321100 + }, + { + "epoch": 1.5573254701596682, + "grad_norm": 5.069077360531082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321110 + }, + { + "epoch": 1.5573739683525043, + "grad_norm": 5.1094461923639756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321120 + }, + { + "epoch": 1.5574224665453404, + "grad_norm": 4.558811724564293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321130 + }, + { + "epoch": 1.5574709647381764, + "grad_norm": 6.409994512068806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321140 + }, + { + "epoch": 1.5575194629310125, + "grad_norm": 4.833899311051937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321150 + }, + { + "epoch": 1.5575679611238487, + "grad_norm": 5.950212198513327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321160 + }, + { + "epoch": 1.5576164593166846, + "grad_norm": 4.21941558670369e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321170 + }, + { + "epoch": 1.557664957509521, + "grad_norm": 4.941729457641486e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321180 + }, + { + "epoch": 1.5577134557023569, + "grad_norm": 6.7742771534540225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321190 + }, + { + "epoch": 1.557761953895193, + "grad_norm": 4.052101758134086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321200 + }, + { + "epoch": 1.5578104520880292, + "grad_norm": 3.5733967251871945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321210 + }, + { + "epoch": 1.557858950280865, + "grad_norm": 3.7318577597034164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321220 + }, + { + "epoch": 1.5579074484737012, + "grad_norm": 4.819991772819776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321230 + }, + { + "epoch": 1.5579559466665374, + "grad_norm": 5.943162250332534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321240 + }, + { + "epoch": 1.5580044448593733, + "grad_norm": 3.95419465348823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321250 + }, + { + "epoch": 1.5580529430522096, + "grad_norm": 4.008235009678174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321260 + }, + { + "epoch": 1.5581014412450456, + "grad_norm": 4.560655725072138e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321270 + }, + { + "epoch": 1.5581499394378817, + "grad_norm": 4.696707492257701e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321280 + }, + { + "epoch": 1.5581984376307179, + "grad_norm": 5.015494025428779e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321290 + }, + { + "epoch": 1.5582469358235538, + "grad_norm": 6.223873697308591e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321300 + }, + { + "epoch": 1.55829543401639, + "grad_norm": 3.236605152778793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321310 + }, + { + "epoch": 1.558343932209226, + "grad_norm": 4.319410436437465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321320 + }, + { + "epoch": 1.558392430402062, + "grad_norm": 3.331014795548981e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321330 + }, + { + "epoch": 1.5584409285948984, + "grad_norm": 4.215489298076136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321340 + }, + { + "epoch": 1.5584894267877343, + "grad_norm": 2.827208618327859e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321350 + }, + { + "epoch": 1.5585379249805704, + "grad_norm": 2.7659846182359615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321360 + }, + { + "epoch": 1.5585864231734066, + "grad_norm": 2.6113571038877126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321370 + }, + { + "epoch": 1.5586349213662425, + "grad_norm": 3.0288756533991545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321380 + }, + { + "epoch": 1.5586834195590786, + "grad_norm": 3.981791451224126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321390 + }, + { + "epoch": 1.5587319177519148, + "grad_norm": 2.6964842163579306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321400 + }, + { + "epoch": 1.5587804159447507, + "grad_norm": 3.276796860518516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321410 + }, + { + "epoch": 1.558828914137587, + "grad_norm": 2.523464672776754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321420 + }, + { + "epoch": 1.558877412330423, + "grad_norm": 2.560110715421615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321430 + }, + { + "epoch": 1.5589259105232591, + "grad_norm": 3.2668187941453652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321440 + }, + { + "epoch": 1.5589744087160953, + "grad_norm": 2.301218955835793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321450 + }, + { + "epoch": 1.5590229069089312, + "grad_norm": 2.810337946357322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321460 + }, + { + "epoch": 1.5590714051017676, + "grad_norm": 2.6311265628464753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321470 + }, + { + "epoch": 1.5591199032946035, + "grad_norm": 2.3584645987284603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321480 + }, + { + "epoch": 1.5591684014874396, + "grad_norm": 3.044945287911105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321490 + }, + { + "epoch": 1.5592168996802758, + "grad_norm": 2.40681629293249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321500 + }, + { + "epoch": 1.5592653978731117, + "grad_norm": 2.233668737972039e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321510 + }, + { + "epoch": 1.5593138960659478, + "grad_norm": 3.339525164847146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321520 + }, + { + "epoch": 1.559362394258784, + "grad_norm": 2.168645778510836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321530 + }, + { + "epoch": 1.55941089245162, + "grad_norm": 3.0270775823737495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321540 + }, + { + "epoch": 1.5594593906444563, + "grad_norm": 2.1080609258206096e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321550 + }, + { + "epoch": 1.5595078888372922, + "grad_norm": 2.107147111019003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321560 + }, + { + "epoch": 1.5595563870301283, + "grad_norm": 1.972397740246379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321570 + }, + { + "epoch": 1.5596048852229645, + "grad_norm": 1.920558588608401e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321580 + }, + { + "epoch": 1.5596533834158004, + "grad_norm": 2.82990754385537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321590 + }, + { + "epoch": 1.5597018816086365, + "grad_norm": 2.1100354388181586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321600 + }, + { + "epoch": 1.5597503798014727, + "grad_norm": 1.6877471580301062e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321610 + }, + { + "epoch": 1.5597988779943086, + "grad_norm": 2.335583985768608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321620 + }, + { + "epoch": 1.559847376187145, + "grad_norm": 1.954040953933145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321630 + }, + { + "epoch": 1.559895874379981, + "grad_norm": 2.6172206162300427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321640 + }, + { + "epoch": 1.559944372572817, + "grad_norm": 1.8185533008363564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321650 + }, + { + "epoch": 1.5599928707656532, + "grad_norm": 1.6693036286596907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321660 + }, + { + "epoch": 1.560041368958489, + "grad_norm": 1.946762267834856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321670 + }, + { + "epoch": 1.5600898671513252, + "grad_norm": 3.154370688207564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321680 + }, + { + "epoch": 1.5601383653441614, + "grad_norm": 2.537094360377523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321690 + }, + { + "epoch": 1.5601868635369973, + "grad_norm": 1.48110132158763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321700 + }, + { + "epoch": 1.5602353617298337, + "grad_norm": 1.9344013253430603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321710 + }, + { + "epoch": 1.5602838599226696, + "grad_norm": 1.6341449509127415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321720 + }, + { + "epoch": 1.5603323581155057, + "grad_norm": 2.276400891787489e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321730 + }, + { + "epoch": 1.560380856308342, + "grad_norm": 2.4667701836733613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321740 + }, + { + "epoch": 1.5604293545011778, + "grad_norm": 1.7931707816387643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321750 + }, + { + "epoch": 1.560477852694014, + "grad_norm": 1.5071410643940908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321760 + }, + { + "epoch": 1.56052635088685, + "grad_norm": 1.459552663618524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321770 + }, + { + "epoch": 1.560574849079686, + "grad_norm": 1.5019642205515993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321780 + }, + { + "epoch": 1.5606233472725224, + "grad_norm": 1.9605172383307945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321790 + }, + { + "epoch": 1.5606718454653583, + "grad_norm": 1.3232585160949384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321800 + }, + { + "epoch": 1.5607203436581945, + "grad_norm": 1.3928475937063922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321810 + }, + { + "epoch": 1.5607688418510306, + "grad_norm": 1.5052145272420603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321820 + }, + { + "epoch": 1.5608173400438665, + "grad_norm": 1.3774484841633239e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321830 + }, + { + "epoch": 1.5608658382367027, + "grad_norm": 3.093543000431964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321840 + }, + { + "epoch": 1.5609143364295388, + "grad_norm": 1.3771561953035416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321850 + }, + { + "epoch": 1.5609628346223747, + "grad_norm": 1.4632404372605379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321860 + }, + { + "epoch": 1.561011332815211, + "grad_norm": 1.1649507314359653e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321870 + }, + { + "epoch": 1.561059831008047, + "grad_norm": 1.271333417207643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321880 + }, + { + "epoch": 1.5611083292008832, + "grad_norm": 1.7575125639268663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321890 + }, + { + "epoch": 1.5611568273937193, + "grad_norm": 1.6838727106005535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321900 + }, + { + "epoch": 1.5612053255865552, + "grad_norm": 1.2504774531407747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321910 + }, + { + "epoch": 1.5612538237793914, + "grad_norm": 1.1010704383807024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321920 + }, + { + "epoch": 1.5613023219722275, + "grad_norm": 1.1815939160442213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321930 + }, + { + "epoch": 1.5613508201650634, + "grad_norm": 1.5871471532591386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321940 + }, + { + "epoch": 1.5613993183578998, + "grad_norm": 1.1317628150209202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321950 + }, + { + "epoch": 1.5614478165507357, + "grad_norm": 1.1213576271984493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321960 + }, + { + "epoch": 1.5614963147435719, + "grad_norm": 1.2741171531160944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321970 + }, + { + "epoch": 1.561544812936408, + "grad_norm": 1.162317630587495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321980 + }, + { + "epoch": 1.561593311129244, + "grad_norm": 1.7775089418137213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 321990 + }, + { + "epoch": 1.5616418093220803, + "grad_norm": 1.070529378921492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322000 + }, + { + "epoch": 1.5616903075149162, + "grad_norm": 1.0465693094374728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322010 + }, + { + "epoch": 1.5617388057077524, + "grad_norm": 1.282068524233182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322020 + }, + { + "epoch": 1.5617873039005885, + "grad_norm": 1.190504917758517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322030 + }, + { + "epoch": 1.5618358020934244, + "grad_norm": 1.5649500255676685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322040 + }, + { + "epoch": 1.5618843002862606, + "grad_norm": 1.0945763051495305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322050 + }, + { + "epoch": 1.5619327984790967, + "grad_norm": 1.0084788755193586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322060 + }, + { + "epoch": 1.5619812966719326, + "grad_norm": 1.1029715096810833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322070 + }, + { + "epoch": 1.562029794864769, + "grad_norm": 1.3390480262387428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322080 + }, + { + "epoch": 1.562078293057605, + "grad_norm": 1.5326494349210407e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322090 + }, + { + "epoch": 1.562126791250441, + "grad_norm": 1.0564216381681035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322100 + }, + { + "epoch": 1.5621752894432772, + "grad_norm": 9.558916644891724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322110 + }, + { + "epoch": 1.5622237876361131, + "grad_norm": 1.1270913091721013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322120 + }, + { + "epoch": 1.5622722858289493, + "grad_norm": 1.0375077863500337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322130 + }, + { + "epoch": 1.5623207840217854, + "grad_norm": 1.4321103662950918e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322140 + }, + { + "epoch": 1.5623692822146213, + "grad_norm": 1.1191981457159272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322150 + }, + { + "epoch": 1.5624177804074577, + "grad_norm": 9.527992119728879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322160 + }, + { + "epoch": 1.5624662786002936, + "grad_norm": 8.814004672785813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322170 + }, + { + "epoch": 1.5625147767931298, + "grad_norm": 1.0961051657432108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322180 + }, + { + "epoch": 1.562563274985966, + "grad_norm": 1.3104948948239326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322190 + }, + { + "epoch": 1.5626117731788018, + "grad_norm": 1.0080269703394151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322200 + }, + { + "epoch": 1.562660271371638, + "grad_norm": 8.087179139693035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322210 + }, + { + "epoch": 1.5627087695644741, + "grad_norm": 9.548436992190545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322220 + }, + { + "epoch": 1.56275726775731, + "grad_norm": 1.0074314786834293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322230 + }, + { + "epoch": 1.5628057659501464, + "grad_norm": 2.6838251869776286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322240 + }, + { + "epoch": 1.5628542641429823, + "grad_norm": 1.0062559567813878e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322250 + }, + { + "epoch": 1.5629027623358185, + "grad_norm": 9.587938620825298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322260 + }, + { + "epoch": 1.5629512605286546, + "grad_norm": 8.512226372658915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322270 + }, + { + "epoch": 1.5629997587214906, + "grad_norm": 6.915626045156387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322280 + }, + { + "epoch": 1.5630482569143267, + "grad_norm": 1.2195581575724646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322290 + }, + { + "epoch": 1.5630967551071628, + "grad_norm": 8.130593300847977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322300 + }, + { + "epoch": 1.5631452532999988, + "grad_norm": 8.231980359596491e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322310 + }, + { + "epoch": 1.5631937514928351, + "grad_norm": 1.7080720908779767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322320 + }, + { + "epoch": 1.563242249685671, + "grad_norm": 9.581078757037176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322330 + }, + { + "epoch": 1.5632907478785072, + "grad_norm": 1.135207185143372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322340 + }, + { + "epoch": 1.5633392460713433, + "grad_norm": 7.278894145201775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322350 + }, + { + "epoch": 1.5633877442641793, + "grad_norm": 9.023673896990658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322360 + }, + { + "epoch": 1.5634362424570154, + "grad_norm": 8.242761850851821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322370 + }, + { + "epoch": 1.5634847406498515, + "grad_norm": 7.51284574107558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322380 + }, + { + "epoch": 1.5635332388426875, + "grad_norm": 1.1562219697225373e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322390 + }, + { + "epoch": 1.5635817370355238, + "grad_norm": 5.394274921854958e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322400 + }, + { + "epoch": 1.5636302352283598, + "grad_norm": 9.960174338630168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322410 + }, + { + "epoch": 1.563678733421196, + "grad_norm": 8.174073968802986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322420 + }, + { + "epoch": 1.563727231614032, + "grad_norm": 2.424673994028126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322430 + }, + { + "epoch": 1.563775729806868, + "grad_norm": 1.3240479574960773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322440 + }, + { + "epoch": 1.563824227999704, + "grad_norm": 7.421344321301149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322450 + }, + { + "epoch": 1.5638727261925403, + "grad_norm": 7.020314001238148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322460 + }, + { + "epoch": 1.5639212243853762, + "grad_norm": 7.109692319318128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322470 + }, + { + "epoch": 1.5639697225782125, + "grad_norm": 7.588578228023835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322480 + }, + { + "epoch": 1.5640182207710485, + "grad_norm": 1.0459360737513634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322490 + }, + { + "epoch": 1.5640667189638846, + "grad_norm": 5.948404577793553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322500 + }, + { + "epoch": 1.5641152171567207, + "grad_norm": 7.327777780119504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322510 + }, + { + "epoch": 1.5641637153495567, + "grad_norm": 7.204968142104917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322520 + }, + { + "epoch": 1.564212213542393, + "grad_norm": 7.344094683503499e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322530 + }, + { + "epoch": 1.564260711735229, + "grad_norm": 9.949445711754379e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322540 + }, + { + "epoch": 1.564309209928065, + "grad_norm": 6.508593060061685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322550 + }, + { + "epoch": 1.5643577081209012, + "grad_norm": 6.136341426099534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322560 + }, + { + "epoch": 1.5644062063137372, + "grad_norm": 8.971662168733019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322570 + }, + { + "epoch": 1.5644547045065733, + "grad_norm": 7.298116315723746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322580 + }, + { + "epoch": 1.5645032026994095, + "grad_norm": 9.127779208029096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322590 + }, + { + "epoch": 1.5645517008922454, + "grad_norm": 5.433103638097236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322600 + }, + { + "epoch": 1.5646001990850817, + "grad_norm": 6.36991273950116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322610 + }, + { + "epoch": 1.5646486972779177, + "grad_norm": 6.813895652157953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322620 + }, + { + "epoch": 1.5646971954707538, + "grad_norm": 6.799514835620357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322630 + }, + { + "epoch": 1.56474569366359, + "grad_norm": 9.05450576738076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322640 + }, + { + "epoch": 1.5647941918564259, + "grad_norm": 6.43597275029606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322650 + }, + { + "epoch": 1.564842690049262, + "grad_norm": 6.35785738722916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322660 + }, + { + "epoch": 1.5648911882420982, + "grad_norm": 4.885263251708238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322670 + }, + { + "epoch": 1.564939686434934, + "grad_norm": 5.562984597418108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322680 + }, + { + "epoch": 1.5649881846277705, + "grad_norm": 8.595908980169042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322690 + }, + { + "epoch": 1.5650366828206064, + "grad_norm": 6.507201533167972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322700 + }, + { + "epoch": 1.5650851810134425, + "grad_norm": 5.377561933528341e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322710 + }, + { + "epoch": 1.5651336792062787, + "grad_norm": 5.729918939323397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322720 + }, + { + "epoch": 1.5651821773991146, + "grad_norm": 4.903557169200212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322730 + }, + { + "epoch": 1.5652306755919507, + "grad_norm": 8.318232289639127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322740 + }, + { + "epoch": 1.5652791737847869, + "grad_norm": 4.890385412181786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322750 + }, + { + "epoch": 1.5653276719776228, + "grad_norm": 7.781558224451146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322760 + }, + { + "epoch": 1.5653761701704592, + "grad_norm": 4.698254087998066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322770 + }, + { + "epoch": 1.565424668363295, + "grad_norm": 5.500641577782517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322780 + }, + { + "epoch": 1.5654731665561312, + "grad_norm": 8.144829166667478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322790 + }, + { + "epoch": 1.5655216647489674, + "grad_norm": 6.299335382209392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322800 + }, + { + "epoch": 1.5655701629418033, + "grad_norm": 5.601087309514696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322810 + }, + { + "epoch": 1.5656186611346394, + "grad_norm": 5.318407261256652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322820 + }, + { + "epoch": 1.5656671593274756, + "grad_norm": 5.546889951801859e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322830 + }, + { + "epoch": 1.5657156575203115, + "grad_norm": 6.863066914775118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322840 + }, + { + "epoch": 1.5657641557131479, + "grad_norm": 5.371721272240393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322850 + }, + { + "epoch": 1.5658126539059838, + "grad_norm": 6.969532364564657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322860 + }, + { + "epoch": 1.56586115209882, + "grad_norm": 5.280742811919481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322870 + }, + { + "epoch": 1.565909650291656, + "grad_norm": 5.662615762958012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322880 + }, + { + "epoch": 1.565958148484492, + "grad_norm": 6.821223905717488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322890 + }, + { + "epoch": 1.5660066466773281, + "grad_norm": 3.8844339655952353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322900 + }, + { + "epoch": 1.5660551448701643, + "grad_norm": 4.5377575474958576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322910 + }, + { + "epoch": 1.5661036430630002, + "grad_norm": 4.793317884832504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322920 + }, + { + "epoch": 1.5661521412558366, + "grad_norm": 5.154618065716932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322930 + }, + { + "epoch": 1.5662006394486725, + "grad_norm": 6.62423644826049e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322940 + }, + { + "epoch": 1.5662491376415086, + "grad_norm": 5.454226652545913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322950 + }, + { + "epoch": 1.5662976358343448, + "grad_norm": 4.5027425699117885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322960 + }, + { + "epoch": 1.5663461340271807, + "grad_norm": 4.4295373413660855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322970 + }, + { + "epoch": 1.5663946322200168, + "grad_norm": 5.240000859885185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322980 + }, + { + "epoch": 1.566443130412853, + "grad_norm": 5.938209142186679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 322990 + }, + { + "epoch": 1.5664916286056891, + "grad_norm": 3.7896020899097493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323000 + }, + { + "epoch": 1.5665401267985253, + "grad_norm": 5.643681788569666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323010 + }, + { + "epoch": 1.5665886249913612, + "grad_norm": 0.08153633028268814, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323020 + }, + { + "epoch": 1.5666371231841973, + "grad_norm": 1.093194259738084e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323030 + }, + { + "epoch": 1.5666856213770335, + "grad_norm": 9.009787390823476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323040 + }, + { + "epoch": 1.5667341195698694, + "grad_norm": 6.081812534830533e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323050 + }, + { + "epoch": 1.5667826177627058, + "grad_norm": 1.0655719961505383e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323060 + }, + { + "epoch": 1.5668311159555417, + "grad_norm": 4.007117695437046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323070 + }, + { + "epoch": 1.5668796141483778, + "grad_norm": 3.4519935070420615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323080 + }, + { + "epoch": 1.566928112341214, + "grad_norm": 1.067268499355123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323090 + }, + { + "epoch": 1.56697661053405, + "grad_norm": 1.6924774399740272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323100 + }, + { + "epoch": 1.567025108726886, + "grad_norm": 2.1491564439202193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323110 + }, + { + "epoch": 1.5670736069197222, + "grad_norm": 1.7634747564443387e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323120 + }, + { + "epoch": 1.5671221051125581, + "grad_norm": 0.00021336508507374674, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323130 + }, + { + "epoch": 1.5671706033053945, + "grad_norm": 1.0710022024795762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323140 + }, + { + "epoch": 1.5672191014982304, + "grad_norm": 1.5311593415390234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323150 + }, + { + "epoch": 1.5672675996910665, + "grad_norm": 2.4457606286887312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323160 + }, + { + "epoch": 1.5673160978839027, + "grad_norm": 9.278116408495407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323170 + }, + { + "epoch": 1.5673645960767386, + "grad_norm": 8.040428838285152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323180 + }, + { + "epoch": 1.5674130942695748, + "grad_norm": 9.386980082126684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323190 + }, + { + "epoch": 1.567461592462411, + "grad_norm": 2.891780241043307e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323200 + }, + { + "epoch": 1.5675100906552468, + "grad_norm": 7.907255508143862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323210 + }, + { + "epoch": 1.5675585888480832, + "grad_norm": 7.589836741317413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323220 + }, + { + "epoch": 1.5676070870409191, + "grad_norm": 9.708198831503978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323230 + }, + { + "epoch": 1.5676555852337553, + "grad_norm": 1.7955943576453137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323240 + }, + { + "epoch": 1.5677040834265914, + "grad_norm": 1.890992507469491e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323250 + }, + { + "epoch": 1.5677525816194273, + "grad_norm": 7.084804565238301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323260 + }, + { + "epoch": 1.5678010798122635, + "grad_norm": 1.5961728649926954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323270 + }, + { + "epoch": 1.5678495780050996, + "grad_norm": 1.0739098570411443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323280 + }, + { + "epoch": 1.5678980761979355, + "grad_norm": 8.982447639027669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323290 + }, + { + "epoch": 1.567946574390772, + "grad_norm": 8.390476864406082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323300 + }, + { + "epoch": 1.5679950725836078, + "grad_norm": 2.514291281840997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323310 + }, + { + "epoch": 1.568043570776444, + "grad_norm": 7.904255312496389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323320 + }, + { + "epoch": 1.56809206896928, + "grad_norm": 6.117959401308326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323330 + }, + { + "epoch": 1.568140567162116, + "grad_norm": 7.668404577998444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323340 + }, + { + "epoch": 1.5681890653549522, + "grad_norm": 8.059686251726816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323350 + }, + { + "epoch": 1.5682375635477883, + "grad_norm": 7.578225336146716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323360 + }, + { + "epoch": 1.5682860617406242, + "grad_norm": 6.195930950525508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323370 + }, + { + "epoch": 1.5683345599334606, + "grad_norm": 8.803643254395865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323380 + }, + { + "epoch": 1.5683830581262965, + "grad_norm": 8.17260229268868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323390 + }, + { + "epoch": 1.5684315563191327, + "grad_norm": 5.702645466953982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323400 + }, + { + "epoch": 1.5684800545119688, + "grad_norm": 9.988955298467772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323410 + }, + { + "epoch": 1.5685285527048047, + "grad_norm": 5.954517519057845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323420 + }, + { + "epoch": 1.5685770508976409, + "grad_norm": 4.942510827277147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323430 + }, + { + "epoch": 1.568625549090477, + "grad_norm": 5.988828775116417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323440 + }, + { + "epoch": 1.568674047283313, + "grad_norm": 5.498332598108391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323450 + }, + { + "epoch": 1.5687225454761493, + "grad_norm": 5.449016384773131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323460 + }, + { + "epoch": 1.5687710436689852, + "grad_norm": 6.991184022808739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323470 + }, + { + "epoch": 1.5688195418618214, + "grad_norm": 1.1008567071257858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323480 + }, + { + "epoch": 1.5688680400546575, + "grad_norm": 7.30033207219094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323490 + }, + { + "epoch": 1.5689165382474934, + "grad_norm": 5.986013320580241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323500 + }, + { + "epoch": 1.5689650364403298, + "grad_norm": 5.945499310655578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323510 + }, + { + "epoch": 1.5690135346331657, + "grad_norm": 5.272135581435577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323520 + }, + { + "epoch": 1.5690620328260019, + "grad_norm": 7.103291136445478e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323530 + }, + { + "epoch": 1.569110531018838, + "grad_norm": 7.335993927881646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323540 + }, + { + "epoch": 1.569159029211674, + "grad_norm": 6.049188527867955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323550 + }, + { + "epoch": 1.56920752740451, + "grad_norm": 5.67156405395508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323560 + }, + { + "epoch": 1.5692560255973462, + "grad_norm": 5.761627335232333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323570 + }, + { + "epoch": 1.5693045237901821, + "grad_norm": 5.251773700365447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323580 + }, + { + "epoch": 1.5693530219830185, + "grad_norm": 5.911852554163488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323590 + }, + { + "epoch": 1.5694015201758544, + "grad_norm": 5.444932753562171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323600 + }, + { + "epoch": 1.5694500183686906, + "grad_norm": 4.801073600901873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323610 + }, + { + "epoch": 1.5694985165615267, + "grad_norm": 5.351799359232245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323620 + }, + { + "epoch": 1.5695470147543626, + "grad_norm": 4.6427112465607934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323630 + }, + { + "epoch": 1.5695955129471988, + "grad_norm": 1.1195423894605483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323640 + }, + { + "epoch": 1.569644011140035, + "grad_norm": 6.159022518659185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323650 + }, + { + "epoch": 1.5696925093328709, + "grad_norm": 5.656819439536775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323660 + }, + { + "epoch": 1.5697410075257072, + "grad_norm": 6.553840421474888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323670 + }, + { + "epoch": 1.5697895057185431, + "grad_norm": 5.214529323893657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323680 + }, + { + "epoch": 1.5698380039113793, + "grad_norm": 6.209572802617913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323690 + }, + { + "epoch": 1.5698865021042154, + "grad_norm": 1.0295353831679677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323700 + }, + { + "epoch": 1.5699350002970514, + "grad_norm": 1.3494801578417537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323710 + }, + { + "epoch": 1.5699834984898875, + "grad_norm": 6.211319600879506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323720 + }, + { + "epoch": 1.5700319966827236, + "grad_norm": 5.274063141769147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323730 + }, + { + "epoch": 1.5700804948755596, + "grad_norm": 5.28294833657128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323740 + }, + { + "epoch": 1.570128993068396, + "grad_norm": 4.788489604834467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323750 + }, + { + "epoch": 1.5701774912612319, + "grad_norm": 5.268997824714461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323760 + }, + { + "epoch": 1.570225989454068, + "grad_norm": 6.262125680223107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323770 + }, + { + "epoch": 1.5702744876469041, + "grad_norm": 4.187159845514543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323780 + }, + { + "epoch": 1.57032298583974, + "grad_norm": 5.231645445746835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323790 + }, + { + "epoch": 1.5703714840325762, + "grad_norm": 3.3206208627234446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323800 + }, + { + "epoch": 1.5704199822254123, + "grad_norm": 4.232077799315448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323810 + }, + { + "epoch": 1.5704684804182483, + "grad_norm": 4.139933480473701e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323820 + }, + { + "epoch": 1.5705169786110846, + "grad_norm": 4.669324482620141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323830 + }, + { + "epoch": 1.5705654768039206, + "grad_norm": 4.857717499362479e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323840 + }, + { + "epoch": 1.5706139749967567, + "grad_norm": 3.392158589576866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323850 + }, + { + "epoch": 1.5706624731895928, + "grad_norm": 3.9712304555905575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323860 + }, + { + "epoch": 1.5707109713824288, + "grad_norm": 5.342913027561735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323870 + }, + { + "epoch": 1.570759469575265, + "grad_norm": 4.791389187630557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323880 + }, + { + "epoch": 1.570807967768101, + "grad_norm": 5.771216819994152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323890 + }, + { + "epoch": 1.570856465960937, + "grad_norm": 3.9119473171922436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323900 + }, + { + "epoch": 1.5709049641537733, + "grad_norm": 4.844329737352382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323910 + }, + { + "epoch": 1.5709534623466093, + "grad_norm": 4.646128672902705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323920 + }, + { + "epoch": 1.5710019605394454, + "grad_norm": 3.609055170272768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323930 + }, + { + "epoch": 1.5710504587322816, + "grad_norm": 4.457360205378791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323940 + }, + { + "epoch": 1.5710989569251175, + "grad_norm": 3.9901655668472813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323950 + }, + { + "epoch": 1.5711474551179536, + "grad_norm": 3.6018110449731466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323960 + }, + { + "epoch": 1.5711959533107898, + "grad_norm": 3.333697975449468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323970 + }, + { + "epoch": 1.5712444515036257, + "grad_norm": 4.523931238509249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323980 + }, + { + "epoch": 1.571292949696462, + "grad_norm": 3.862820960875979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 323990 + }, + { + "epoch": 1.571341447889298, + "grad_norm": 7.256261937982345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324000 + }, + { + "epoch": 1.5713899460821341, + "grad_norm": 3.477571510757116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324010 + }, + { + "epoch": 1.5714384442749703, + "grad_norm": 4.267524502665765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324020 + }, + { + "epoch": 1.5714869424678062, + "grad_norm": 3.6609728226721927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324030 + }, + { + "epoch": 1.5715354406606425, + "grad_norm": 3.8558150095013843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324040 + }, + { + "epoch": 1.5715839388534785, + "grad_norm": 3.2508913250239857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324050 + }, + { + "epoch": 1.5716324370463146, + "grad_norm": 2.9899925380050263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324060 + }, + { + "epoch": 1.5716809352391508, + "grad_norm": 4.1360470959261875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324070 + }, + { + "epoch": 1.5717294334319867, + "grad_norm": 7.110739943527733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324080 + }, + { + "epoch": 1.5717779316248228, + "grad_norm": 4.715776640296099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324090 + }, + { + "epoch": 1.571826429817659, + "grad_norm": 2.9502152187887987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324100 + }, + { + "epoch": 1.5718749280104949, + "grad_norm": 4.0805505818752863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324110 + }, + { + "epoch": 1.5719234262033313, + "grad_norm": 6.331919166768785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324120 + }, + { + "epoch": 1.5719719243961672, + "grad_norm": 3.738624343441188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324130 + }, + { + "epoch": 1.5720204225890033, + "grad_norm": 3.724927353232488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324140 + }, + { + "epoch": 1.5720689207818395, + "grad_norm": 3.610448970903235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324150 + }, + { + "epoch": 1.5721174189746754, + "grad_norm": 6.780056196475925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324160 + }, + { + "epoch": 1.5721659171675115, + "grad_norm": 3.420359178107901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324170 + }, + { + "epoch": 1.5722144153603477, + "grad_norm": 3.206520204912522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324180 + }, + { + "epoch": 1.5722629135531836, + "grad_norm": 3.2421181117570086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324190 + }, + { + "epoch": 1.57231141174602, + "grad_norm": 3.011031992627977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324200 + }, + { + "epoch": 1.5723599099388559, + "grad_norm": 3.661124594600551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324210 + }, + { + "epoch": 1.572408408131692, + "grad_norm": 4.1879894752128166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324220 + }, + { + "epoch": 1.5724569063245282, + "grad_norm": 2.7188966100766265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324230 + }, + { + "epoch": 1.572505404517364, + "grad_norm": 3.1025155067254673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324240 + }, + { + "epoch": 1.5725539027102002, + "grad_norm": 3.7826762877557485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324250 + }, + { + "epoch": 1.5726024009030364, + "grad_norm": 3.1511345355283993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324260 + }, + { + "epoch": 1.5726508990958723, + "grad_norm": 1.2518063385869027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324270 + }, + { + "epoch": 1.5726993972887087, + "grad_norm": 3.064211568926112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324280 + }, + { + "epoch": 1.5727478954815446, + "grad_norm": 4.1382870108463976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324290 + }, + { + "epoch": 1.5727963936743807, + "grad_norm": 3.3271635402343236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324300 + }, + { + "epoch": 1.5728448918672169, + "grad_norm": 2.8315943723100645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324310 + }, + { + "epoch": 1.5728933900600528, + "grad_norm": 2.856426704056503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324320 + }, + { + "epoch": 1.572941888252889, + "grad_norm": 2.535044245632889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324330 + }, + { + "epoch": 1.572990386445725, + "grad_norm": 3.233935217394901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324340 + }, + { + "epoch": 1.573038884638561, + "grad_norm": 2.447169151764683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324350 + }, + { + "epoch": 1.5730873828313974, + "grad_norm": 3.049889869544131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324360 + }, + { + "epoch": 1.5731358810242333, + "grad_norm": 2.3205751631394378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324370 + }, + { + "epoch": 1.5731843792170694, + "grad_norm": 2.396626825884596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324380 + }, + { + "epoch": 1.5732328774099056, + "grad_norm": 3.18792132247836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324390 + }, + { + "epoch": 1.5732813756027415, + "grad_norm": 2.2471657246114773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324400 + }, + { + "epoch": 1.5733298737955776, + "grad_norm": 2.851591318631108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324410 + }, + { + "epoch": 1.5733783719884138, + "grad_norm": 2.2349580319769302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324420 + }, + { + "epoch": 1.5734268701812497, + "grad_norm": 9.76041405920114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324430 + }, + { + "epoch": 1.573475368374086, + "grad_norm": 3.3109549235632585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324440 + }, + { + "epoch": 1.573523866566922, + "grad_norm": 2.7186658257960516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324450 + }, + { + "epoch": 1.5735723647597581, + "grad_norm": 2.9825918090864434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324460 + }, + { + "epoch": 1.5736208629525943, + "grad_norm": 2.0821229895773286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324470 + }, + { + "epoch": 1.5736693611454302, + "grad_norm": 2.486460743966745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324480 + }, + { + "epoch": 1.5737178593382664, + "grad_norm": 2.5843863227237307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324490 + }, + { + "epoch": 1.5737663575311025, + "grad_norm": 2.8694134357465373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324500 + }, + { + "epoch": 1.5738148557239384, + "grad_norm": 2.807739747368032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324510 + }, + { + "epoch": 1.5738633539167748, + "grad_norm": 2.3415788064085064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324520 + }, + { + "epoch": 1.5739118521096107, + "grad_norm": 2.1554583895522228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324530 + }, + { + "epoch": 1.5739603503024469, + "grad_norm": 2.713225626393978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324540 + }, + { + "epoch": 1.574008848495283, + "grad_norm": 2.5790288304960995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324550 + }, + { + "epoch": 1.574057346688119, + "grad_norm": 2.2865341975375486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324560 + }, + { + "epoch": 1.5741058448809553, + "grad_norm": 1.96655861373074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324570 + }, + { + "epoch": 1.5741543430737912, + "grad_norm": 3.37926422844248e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324580 + }, + { + "epoch": 1.5742028412666274, + "grad_norm": 2.7365820187696954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324590 + }, + { + "epoch": 1.5742513394594635, + "grad_norm": 3.014279457147495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324600 + }, + { + "epoch": 1.5742998376522994, + "grad_norm": 1.9543315943337802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324610 + }, + { + "epoch": 1.5743483358451356, + "grad_norm": 2.026828127554836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324620 + }, + { + "epoch": 1.5743968340379717, + "grad_norm": 2.1431389995996142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324630 + }, + { + "epoch": 1.5744453322308076, + "grad_norm": 2.3994536491045437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324640 + }, + { + "epoch": 1.574493830423644, + "grad_norm": 3.2942733696472715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324650 + }, + { + "epoch": 1.57454232861648, + "grad_norm": 2.913816388172563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324660 + }, + { + "epoch": 1.574590826809316, + "grad_norm": 2.352235242142342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324670 + }, + { + "epoch": 1.5746393250021522, + "grad_norm": 1.999702021748817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324680 + }, + { + "epoch": 1.5746878231949881, + "grad_norm": 2.409495607480494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324690 + }, + { + "epoch": 1.5747363213878243, + "grad_norm": 2.357112975914788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324700 + }, + { + "epoch": 1.5747848195806604, + "grad_norm": 2.2887283535055758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324710 + }, + { + "epoch": 1.5748333177734963, + "grad_norm": 2.1533435301535064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324720 + }, + { + "epoch": 1.5748818159663327, + "grad_norm": 0.0001273373345611617, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324730 + }, + { + "epoch": 1.5749303141591686, + "grad_norm": 2.489701387276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324740 + }, + { + "epoch": 1.5749788123520048, + "grad_norm": 2.2857149417632172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324750 + }, + { + "epoch": 1.575027310544841, + "grad_norm": 1.78988571519767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324760 + }, + { + "epoch": 1.5750758087376768, + "grad_norm": 2.2360408991062286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324770 + }, + { + "epoch": 1.575124306930513, + "grad_norm": 2.71416411123937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324780 + }, + { + "epoch": 1.5751728051233491, + "grad_norm": 2.2689339118642238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324790 + }, + { + "epoch": 1.575221303316185, + "grad_norm": 1.8052050165806577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324800 + }, + { + "epoch": 1.5752698015090214, + "grad_norm": 1.9903627901385335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324810 + }, + { + "epoch": 1.5753182997018573, + "grad_norm": 1.741981776604007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324820 + }, + { + "epoch": 1.5753667978946935, + "grad_norm": 2.983033766668086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324830 + }, + { + "epoch": 1.5754152960875296, + "grad_norm": 2.3453890207747463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324840 + }, + { + "epoch": 1.5754637942803655, + "grad_norm": 2.3091429568466992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324850 + }, + { + "epoch": 1.5755122924732017, + "grad_norm": 1.8650884214821417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324860 + }, + { + "epoch": 1.5755607906660378, + "grad_norm": 2.3946159899423947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324870 + }, + { + "epoch": 1.5756092888588737, + "grad_norm": 1.6533064695067878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324880 + }, + { + "epoch": 1.5756577870517101, + "grad_norm": 2.666309626420116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324890 + }, + { + "epoch": 1.575706285244546, + "grad_norm": 1.74745892422834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324900 + }, + { + "epoch": 1.5757547834373822, + "grad_norm": 2.105368110960626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324910 + }, + { + "epoch": 1.5758032816302183, + "grad_norm": 2.114918231654883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324920 + }, + { + "epoch": 1.5758517798230542, + "grad_norm": 2.065495152692165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324930 + }, + { + "epoch": 1.5759002780158904, + "grad_norm": 2.4487135874551313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324940 + }, + { + "epoch": 1.5759487762087265, + "grad_norm": 2.430739129977155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324950 + }, + { + "epoch": 1.5759972744015625, + "grad_norm": 4.954031851411855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324960 + }, + { + "epoch": 1.5760457725943988, + "grad_norm": 2.3141129190662468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324970 + }, + { + "epoch": 1.5760942707872347, + "grad_norm": 2.3819112016099098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324980 + }, + { + "epoch": 1.5761427689800709, + "grad_norm": 2.0633682140669407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 324990 + }, + { + "epoch": 1.576191267172907, + "grad_norm": 2.1225733348728681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325000 + }, + { + "epoch": 1.576239765365743, + "grad_norm": 2.0138419642989902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325010 + }, + { + "epoch": 1.576288263558579, + "grad_norm": 1.569110850141442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325020 + }, + { + "epoch": 1.5763367617514152, + "grad_norm": 1.6350934117781435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325030 + }, + { + "epoch": 1.5763852599442514, + "grad_norm": 2.4370902451664733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325040 + }, + { + "epoch": 1.5764337581370875, + "grad_norm": 2.4229760242633347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325050 + }, + { + "epoch": 1.5764822563299234, + "grad_norm": 1.5875646397489618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325060 + }, + { + "epoch": 1.5765307545227596, + "grad_norm": 2.079133878396533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325070 + }, + { + "epoch": 1.5765792527155957, + "grad_norm": 2.4965811462607235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325080 + }, + { + "epoch": 1.5766277509084317, + "grad_norm": 2.0791564736555301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325090 + }, + { + "epoch": 1.576676249101268, + "grad_norm": 1.8592467654343636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325100 + }, + { + "epoch": 1.576724747294104, + "grad_norm": 1.6441347838735965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325110 + }, + { + "epoch": 1.57677324548694, + "grad_norm": 1.5172682310549135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325120 + }, + { + "epoch": 1.5768217436797762, + "grad_norm": 1.9477619161989423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325130 + }, + { + "epoch": 1.5768702418726122, + "grad_norm": 2.0157877145265957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325140 + }, + { + "epoch": 1.5769187400654483, + "grad_norm": 1.5268867059603508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325150 + }, + { + "epoch": 1.5769672382582844, + "grad_norm": 1.5518324403274164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325160 + }, + { + "epoch": 1.5770157364511204, + "grad_norm": 2.045794218474839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325170 + }, + { + "epoch": 1.5770642346439567, + "grad_norm": 2.052636887128756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325180 + }, + { + "epoch": 1.5771127328367927, + "grad_norm": 1.9642921245122125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325190 + }, + { + "epoch": 1.5771612310296288, + "grad_norm": 1.4486214183762058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325200 + }, + { + "epoch": 1.577209729222465, + "grad_norm": 1.45258411521354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325210 + }, + { + "epoch": 1.5772582274153009, + "grad_norm": 2.082886254584082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325220 + }, + { + "epoch": 1.577306725608137, + "grad_norm": 1.4341924270411255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325230 + }, + { + "epoch": 1.5773552238009731, + "grad_norm": 1.707198009626154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325240 + }, + { + "epoch": 1.577403721993809, + "grad_norm": 1.9912152993128984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325250 + }, + { + "epoch": 1.5774522201866454, + "grad_norm": 1.7952666553355812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325260 + }, + { + "epoch": 1.5775007183794814, + "grad_norm": 1.5195082880836708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325270 + }, + { + "epoch": 1.5775492165723175, + "grad_norm": 3.176657230596902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325280 + }, + { + "epoch": 1.5775977147651536, + "grad_norm": 1.8081432529015729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325290 + }, + { + "epoch": 1.5776462129579896, + "grad_norm": 1.526667148255001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325300 + }, + { + "epoch": 1.5776947111508257, + "grad_norm": 1.5698012134635064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325310 + }, + { + "epoch": 1.5777432093436619, + "grad_norm": 1.610905968618681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325320 + }, + { + "epoch": 1.5777917075364978, + "grad_norm": 1.4476434273547056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325330 + }, + { + "epoch": 1.5778402057293341, + "grad_norm": 1.4440348650168744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325340 + }, + { + "epoch": 1.57788870392217, + "grad_norm": 1.4374884926837694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325350 + }, + { + "epoch": 1.5779372021150062, + "grad_norm": 1.626339383165032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325360 + }, + { + "epoch": 1.5779857003078424, + "grad_norm": 1.4009755489041709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325370 + }, + { + "epoch": 1.5780341985006783, + "grad_norm": 1.4539072878960724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325380 + }, + { + "epoch": 1.5780826966935144, + "grad_norm": 1.6616503728528187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325390 + }, + { + "epoch": 1.5781311948863506, + "grad_norm": 1.495253201255764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325400 + }, + { + "epoch": 1.5781796930791865, + "grad_norm": 1.3540886811824748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325410 + }, + { + "epoch": 1.5782281912720229, + "grad_norm": 1.439129562186281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325420 + }, + { + "epoch": 1.5782766894648588, + "grad_norm": 2.4252557295767474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325430 + }, + { + "epoch": 1.578325187657695, + "grad_norm": 1.4696671257752314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325440 + }, + { + "epoch": 1.578373685850531, + "grad_norm": 1.526040023236419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325450 + }, + { + "epoch": 1.578422184043367, + "grad_norm": 1.3287272793149896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325460 + }, + { + "epoch": 1.5784706822362031, + "grad_norm": 6.401025416380435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325470 + }, + { + "epoch": 1.5785191804290393, + "grad_norm": 1.5353671756201948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325480 + }, + { + "epoch": 1.5785676786218752, + "grad_norm": 5.54230325633398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325490 + }, + { + "epoch": 1.5786161768147116, + "grad_norm": 1.9942068263389956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325500 + }, + { + "epoch": 1.5786646750075475, + "grad_norm": 1.5895385274689033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325510 + }, + { + "epoch": 1.5787131732003836, + "grad_norm": 1.493653769557568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325520 + }, + { + "epoch": 1.5787616713932198, + "grad_norm": 1.452470996810007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325530 + }, + { + "epoch": 1.5788101695860557, + "grad_norm": 1.6339910757778853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325540 + }, + { + "epoch": 1.578858667778892, + "grad_norm": 1.4589086561045406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325550 + }, + { + "epoch": 1.578907165971728, + "grad_norm": 1.7115863215622085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325560 + }, + { + "epoch": 1.5789556641645641, + "grad_norm": 4.974829721504648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325570 + }, + { + "epoch": 1.5790041623574003, + "grad_norm": 1.6054528373388166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325580 + }, + { + "epoch": 1.5790526605502362, + "grad_norm": 1.6190951157568634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325590 + }, + { + "epoch": 1.5791011587430723, + "grad_norm": 1.5372857831152942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325600 + }, + { + "epoch": 1.5791496569359085, + "grad_norm": 1.3554445388308523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325610 + }, + { + "epoch": 1.5791981551287444, + "grad_norm": 1.4819430305124115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325620 + }, + { + "epoch": 1.5792466533215808, + "grad_norm": 1.8014962677170843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325630 + }, + { + "epoch": 1.5792951515144167, + "grad_norm": 1.4435835282711196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325640 + }, + { + "epoch": 1.5793436497072528, + "grad_norm": 1.3272776300254918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325650 + }, + { + "epoch": 1.579392147900089, + "grad_norm": 1.3060282810783974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325660 + }, + { + "epoch": 1.579440646092925, + "grad_norm": 1.531184210534775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325670 + }, + { + "epoch": 1.579489144285761, + "grad_norm": 1.122382400353672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325680 + }, + { + "epoch": 1.5795376424785972, + "grad_norm": 1.3902663908993418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325690 + }, + { + "epoch": 1.579586140671433, + "grad_norm": 1.256652524261881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325700 + }, + { + "epoch": 1.5796346388642695, + "grad_norm": 1.4826535732481716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325710 + }, + { + "epoch": 1.5796831370571054, + "grad_norm": 1.2916206060253899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325720 + }, + { + "epoch": 1.5797316352499415, + "grad_norm": 3.412995681628672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325730 + }, + { + "epoch": 1.5797801334427777, + "grad_norm": 1.4783412893848435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325740 + }, + { + "epoch": 1.5798286316356136, + "grad_norm": 1.558249351774066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325750 + }, + { + "epoch": 1.5798771298284497, + "grad_norm": 1.8997153006239387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325760 + }, + { + "epoch": 1.5799256280212859, + "grad_norm": 1.246577312485897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325770 + }, + { + "epoch": 1.5799741262141218, + "grad_norm": 1.2111283353988256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325780 + }, + { + "epoch": 1.5800226244069582, + "grad_norm": 1.5127127994674083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325790 + }, + { + "epoch": 1.580071122599794, + "grad_norm": 1.1078349615445404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325800 + }, + { + "epoch": 1.5801196207926302, + "grad_norm": 1.3321927383458387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325810 + }, + { + "epoch": 1.5801681189854664, + "grad_norm": 1.2247437553014606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325820 + }, + { + "epoch": 1.5802166171783023, + "grad_norm": 1.186127462915465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325830 + }, + { + "epoch": 1.5802651153711385, + "grad_norm": 1.4975552176110796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325840 + }, + { + "epoch": 1.5803136135639746, + "grad_norm": 1.214612694866446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325850 + }, + { + "epoch": 1.5803621117568105, + "grad_norm": 1.2685774208875955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325860 + }, + { + "epoch": 1.5804106099496469, + "grad_norm": 1.3552148914186546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325870 + }, + { + "epoch": 1.5804591081424828, + "grad_norm": 1.5001512565504527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325880 + }, + { + "epoch": 1.580507606335319, + "grad_norm": 1.329779450998103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325890 + }, + { + "epoch": 1.580556104528155, + "grad_norm": 1.097406894245978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325900 + }, + { + "epoch": 1.580604602720991, + "grad_norm": 1.3533497167372843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325910 + }, + { + "epoch": 1.5806531009138272, + "grad_norm": 1.1419379575272615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325920 + }, + { + "epoch": 1.5807015991066633, + "grad_norm": 1.161256193427107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325930 + }, + { + "epoch": 1.5807500972994992, + "grad_norm": 1.2173219943178992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325940 + }, + { + "epoch": 1.5807985954923356, + "grad_norm": 1.450363953381384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325950 + }, + { + "epoch": 1.5808470936851715, + "grad_norm": 1.1510260833347274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325960 + }, + { + "epoch": 1.5808955918780077, + "grad_norm": 1.1308121372621827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325970 + }, + { + "epoch": 1.5809440900708438, + "grad_norm": 1.1928482024359255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325980 + }, + { + "epoch": 1.5809925882636797, + "grad_norm": 1.3488870820310694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 325990 + }, + { + "epoch": 1.5810410864565159, + "grad_norm": 1.134380624989717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326000 + }, + { + "epoch": 1.581089584649352, + "grad_norm": 1.0999569610703475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326010 + }, + { + "epoch": 1.581138082842188, + "grad_norm": 1.2020544204460748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326020 + }, + { + "epoch": 1.5811865810350243, + "grad_norm": 1.0777836934039442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326030 + }, + { + "epoch": 1.5812350792278602, + "grad_norm": 1.3683330735148047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326040 + }, + { + "epoch": 1.5812835774206964, + "grad_norm": 1.0919631421302256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326050 + }, + { + "epoch": 1.5813320756135325, + "grad_norm": 1.6870777130861825e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326060 + }, + { + "epoch": 1.5813805738063684, + "grad_norm": 1.0682908424541893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326070 + }, + { + "epoch": 1.5814290719992048, + "grad_norm": 4.3863991550097126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326080 + }, + { + "epoch": 1.5814775701920407, + "grad_norm": 1.2950174266279646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326090 + }, + { + "epoch": 1.5815260683848769, + "grad_norm": 1.0414012052706312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326100 + }, + { + "epoch": 1.581574566577713, + "grad_norm": 1.1462513782589667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326110 + }, + { + "epoch": 1.581623064770549, + "grad_norm": 1.2816899186418595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326120 + }, + { + "epoch": 1.581671562963385, + "grad_norm": 9.893413022155073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326130 + }, + { + "epoch": 1.5817200611562212, + "grad_norm": 1.1319723114411318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326140 + }, + { + "epoch": 1.5817685593490571, + "grad_norm": 1.2226554702010617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326150 + }, + { + "epoch": 1.5818170575418935, + "grad_norm": 2.627795936405164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326160 + }, + { + "epoch": 1.5818655557347294, + "grad_norm": 1.1478986294832794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326170 + }, + { + "epoch": 1.5819140539275656, + "grad_norm": 1.8436379889408272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326180 + }, + { + "epoch": 1.5819625521204017, + "grad_norm": 1.406164784611974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326190 + }, + { + "epoch": 1.5820110503132376, + "grad_norm": 1.440810279973448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326200 + }, + { + "epoch": 1.5820595485060738, + "grad_norm": 1.0083905976898677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326210 + }, + { + "epoch": 1.58210804669891, + "grad_norm": 1.0645024417499371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326220 + }, + { + "epoch": 1.5821565448917458, + "grad_norm": 1.314857911438594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326230 + }, + { + "epoch": 1.5822050430845822, + "grad_norm": 1.2384812464460992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326240 + }, + { + "epoch": 1.5822535412774181, + "grad_norm": 1.3981252777739428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326250 + }, + { + "epoch": 1.5823020394702543, + "grad_norm": 1.4228878342237294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326260 + }, + { + "epoch": 1.5823505376630904, + "grad_norm": 3.6916688372912176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326270 + }, + { + "epoch": 1.5823990358559263, + "grad_norm": 1.0421420171269347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326280 + }, + { + "epoch": 1.5824475340487625, + "grad_norm": 1.2473704202875524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326290 + }, + { + "epoch": 1.5824960322415986, + "grad_norm": 1.0015174467525867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326300 + }, + { + "epoch": 1.5825445304344345, + "grad_norm": 1.1185072423813835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326310 + }, + { + "epoch": 1.582593028627271, + "grad_norm": 4.0487358887730807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326320 + }, + { + "epoch": 1.5826415268201068, + "grad_norm": 1.0658393279072698e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326330 + }, + { + "epoch": 1.582690025012943, + "grad_norm": 1.434886485185416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326340 + }, + { + "epoch": 1.5827385232057791, + "grad_norm": 1.0691370988524795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326350 + }, + { + "epoch": 1.582787021398615, + "grad_norm": 1.6727662455195969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326360 + }, + { + "epoch": 1.5828355195914512, + "grad_norm": 1.624814700562638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326370 + }, + { + "epoch": 1.5828840177842873, + "grad_norm": 1.4626222366587172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326380 + }, + { + "epoch": 1.5829325159771233, + "grad_norm": 1.0584981424699436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326390 + }, + { + "epoch": 1.5829810141699596, + "grad_norm": 9.941226863929842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326400 + }, + { + "epoch": 1.5830295123627955, + "grad_norm": 1.5663940189369896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326410 + }, + { + "epoch": 1.5830780105556317, + "grad_norm": 1.0413890549898497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326420 + }, + { + "epoch": 1.5831265087484678, + "grad_norm": 1.2785824310412863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326430 + }, + { + "epoch": 1.5831750069413038, + "grad_norm": 1.1372821262511934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326440 + }, + { + "epoch": 1.58322350513414, + "grad_norm": 1.0783091397570388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326450 + }, + { + "epoch": 1.583272003326976, + "grad_norm": 9.357233210494087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326460 + }, + { + "epoch": 1.583320501519812, + "grad_norm": 1.0484750845307644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326470 + }, + { + "epoch": 1.5833689997126483, + "grad_norm": 1.0102642278297935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326480 + }, + { + "epoch": 1.5834174979054843, + "grad_norm": 1.2997564624583902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326490 + }, + { + "epoch": 1.5834659960983204, + "grad_norm": 9.490217678376212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326500 + }, + { + "epoch": 1.5835144942911565, + "grad_norm": 9.854982607748752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326510 + }, + { + "epoch": 1.5835629924839925, + "grad_norm": 1.1194094895472517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326520 + }, + { + "epoch": 1.5836114906768286, + "grad_norm": 9.871702388863923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326530 + }, + { + "epoch": 1.5836599888696647, + "grad_norm": 1.235156048551289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326540 + }, + { + "epoch": 1.5837084870625007, + "grad_norm": 1.0334085942531601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326550 + }, + { + "epoch": 1.583756985255337, + "grad_norm": 1.170224663837871e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326560 + }, + { + "epoch": 1.583805483448173, + "grad_norm": 1.5203397651930572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326570 + }, + { + "epoch": 1.583853981641009, + "grad_norm": 1.0016928086997723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326580 + }, + { + "epoch": 1.5839024798338452, + "grad_norm": 1.1128526011816575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326590 + }, + { + "epoch": 1.5839509780266812, + "grad_norm": 1.138589027505077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326600 + }, + { + "epoch": 1.5839994762195175, + "grad_norm": 1.0040099596153595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326610 + }, + { + "epoch": 1.5840479744123535, + "grad_norm": 9.562555192133004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326620 + }, + { + "epoch": 1.5840964726051896, + "grad_norm": 9.384813637325351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326630 + }, + { + "epoch": 1.5841449707980257, + "grad_norm": 1.1619821549402332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326640 + }, + { + "epoch": 1.5841934689908617, + "grad_norm": 1.2246952962868818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326650 + }, + { + "epoch": 1.5842419671836978, + "grad_norm": 1.1206278571762596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326660 + }, + { + "epoch": 1.584290465376534, + "grad_norm": 1.285748396639974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326670 + }, + { + "epoch": 1.5843389635693699, + "grad_norm": 9.863401828624774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326680 + }, + { + "epoch": 1.5843874617622062, + "grad_norm": 9.875174811213583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326690 + }, + { + "epoch": 1.5844359599550422, + "grad_norm": 1.2976994412383647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326700 + }, + { + "epoch": 1.5844844581478783, + "grad_norm": 8.820746444371252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326710 + }, + { + "epoch": 1.5845329563407144, + "grad_norm": 1.0231694602680363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326720 + }, + { + "epoch": 1.5845814545335504, + "grad_norm": 1.5696228672368306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326730 + }, + { + "epoch": 1.5846299527263865, + "grad_norm": 1.020277338170672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326740 + }, + { + "epoch": 1.5846784509192227, + "grad_norm": 1.0114147386275363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326750 + }, + { + "epoch": 1.5847269491120586, + "grad_norm": 9.482904772539769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326760 + }, + { + "epoch": 1.584775447304895, + "grad_norm": 8.534222928346935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326770 + }, + { + "epoch": 1.5848239454977309, + "grad_norm": 1.1381812470290242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326780 + }, + { + "epoch": 1.584872443690567, + "grad_norm": 9.040882531508032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326790 + }, + { + "epoch": 1.5849209418834032, + "grad_norm": 1.075433857522512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326800 + }, + { + "epoch": 1.584969440076239, + "grad_norm": 1.3738069526425534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326810 + }, + { + "epoch": 1.5850179382690752, + "grad_norm": 9.267396450240994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326820 + }, + { + "epoch": 1.5850664364619114, + "grad_norm": 9.686991120361199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326830 + }, + { + "epoch": 1.5851149346547473, + "grad_norm": 1.1588838333409512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326840 + }, + { + "epoch": 1.5851634328475837, + "grad_norm": 1.003226586249184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326850 + }, + { + "epoch": 1.5852119310404196, + "grad_norm": 1.2426379214502958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326860 + }, + { + "epoch": 1.5852604292332557, + "grad_norm": 8.588216360294609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326870 + }, + { + "epoch": 1.5853089274260919, + "grad_norm": 1.1411434996944081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326880 + }, + { + "epoch": 1.5853574256189278, + "grad_norm": 1.1549745693173463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326890 + }, + { + "epoch": 1.585405923811764, + "grad_norm": 8.605950796436446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326900 + }, + { + "epoch": 1.5854544220046, + "grad_norm": 1.0532989591638398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326910 + }, + { + "epoch": 1.585502920197436, + "grad_norm": 9.430637959439991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326920 + }, + { + "epoch": 1.5855514183902724, + "grad_norm": 8.576011367722458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326930 + }, + { + "epoch": 1.5855999165831083, + "grad_norm": 9.761181019030118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326940 + }, + { + "epoch": 1.5856484147759444, + "grad_norm": 1.1124544130325376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326950 + }, + { + "epoch": 1.5856969129687806, + "grad_norm": 9.271467149574164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326960 + }, + { + "epoch": 1.5857454111616165, + "grad_norm": 1.355212475573353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326970 + }, + { + "epoch": 1.5857939093544526, + "grad_norm": 1.0521750226644144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326980 + }, + { + "epoch": 1.5858424075472888, + "grad_norm": 8.76726886644974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 326990 + }, + { + "epoch": 1.5858909057401247, + "grad_norm": 9.005108125847983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327000 + }, + { + "epoch": 1.585939403932961, + "grad_norm": 9.255435173827209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327010 + }, + { + "epoch": 1.585987902125797, + "grad_norm": 9.670437606246196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327020 + }, + { + "epoch": 1.5860364003186331, + "grad_norm": 8.829977105051512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327030 + }, + { + "epoch": 1.5860848985114693, + "grad_norm": 8.875371548811017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327040 + }, + { + "epoch": 1.5861333967043052, + "grad_norm": 1.043968964609121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327050 + }, + { + "epoch": 1.5861818948971413, + "grad_norm": 9.021405134035376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327060 + }, + { + "epoch": 1.5862303930899775, + "grad_norm": 8.765756120965307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327070 + }, + { + "epoch": 1.5862788912828134, + "grad_norm": 8.851218353811419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327080 + }, + { + "epoch": 1.5863273894756498, + "grad_norm": 9.586067761802042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327090 + }, + { + "epoch": 1.5863758876684857, + "grad_norm": 8.796636308261441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327100 + }, + { + "epoch": 1.5864243858613218, + "grad_norm": 1.0476284728611063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327110 + }, + { + "epoch": 1.586472884054158, + "grad_norm": 1.0086052526503408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327120 + }, + { + "epoch": 1.586521382246994, + "grad_norm": 8.408436968920796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327130 + }, + { + "epoch": 1.5865698804398303, + "grad_norm": 1.2852615327574313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327140 + }, + { + "epoch": 1.5866183786326662, + "grad_norm": 1.1547437850367714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327150 + }, + { + "epoch": 1.5866668768255023, + "grad_norm": 9.802735689845576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327160 + }, + { + "epoch": 1.5867153750183385, + "grad_norm": 9.963953573333129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327170 + }, + { + "epoch": 1.5867638732111744, + "grad_norm": 8.233460846440721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327180 + }, + { + "epoch": 1.5868123714040105, + "grad_norm": 9.704154280143484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327190 + }, + { + "epoch": 1.5868608695968467, + "grad_norm": 9.399118283681673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327200 + }, + { + "epoch": 1.5869093677896826, + "grad_norm": 2.1822188500664197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327210 + }, + { + "epoch": 1.586957865982519, + "grad_norm": 8.510993865229466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327220 + }, + { + "epoch": 1.587006364175355, + "grad_norm": 8.353434566288342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327230 + }, + { + "epoch": 1.587054862368191, + "grad_norm": 8.952377328341754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327240 + }, + { + "epoch": 1.5871033605610272, + "grad_norm": 1.1838083224802176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327250 + }, + { + "epoch": 1.587151858753863, + "grad_norm": 8.636991566390861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327260 + }, + { + "epoch": 1.5872003569466993, + "grad_norm": 8.928977734967702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327270 + }, + { + "epoch": 1.5872488551395354, + "grad_norm": 1.0432339081489772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327280 + }, + { + "epoch": 1.5872973533323713, + "grad_norm": 1.3915224883476185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327290 + }, + { + "epoch": 1.5873458515252077, + "grad_norm": 8.534774309509885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327300 + }, + { + "epoch": 1.5873943497180436, + "grad_norm": 8.598184564334588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327310 + }, + { + "epoch": 1.5874428479108798, + "grad_norm": 9.36717725608105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327320 + }, + { + "epoch": 1.587491346103716, + "grad_norm": 1.0660802018946924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327330 + }, + { + "epoch": 1.5875398442965518, + "grad_norm": 8.397750406174964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327340 + }, + { + "epoch": 1.587588342489388, + "grad_norm": 9.567045822223008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327350 + }, + { + "epoch": 1.587636840682224, + "grad_norm": 8.331584666620984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327360 + }, + { + "epoch": 1.58768533887506, + "grad_norm": 8.085257974244087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327370 + }, + { + "epoch": 1.5877338370678964, + "grad_norm": 1.1480147321663026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327380 + }, + { + "epoch": 1.5877823352607323, + "grad_norm": 9.74973630718523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327390 + }, + { + "epoch": 1.5878308334535685, + "grad_norm": 1.3275146670821414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327400 + }, + { + "epoch": 1.5878793316464046, + "grad_norm": 8.990561894961502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327410 + }, + { + "epoch": 1.5879278298392405, + "grad_norm": 8.464348866255023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327420 + }, + { + "epoch": 1.5879763280320767, + "grad_norm": 8.711540999684075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327430 + }, + { + "epoch": 1.5880248262249128, + "grad_norm": 7.950721681027062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327440 + }, + { + "epoch": 1.5880733244177487, + "grad_norm": 8.537406870345876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327450 + }, + { + "epoch": 1.588121822610585, + "grad_norm": 8.607077717215361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327460 + }, + { + "epoch": 1.588170320803421, + "grad_norm": 8.415511132398024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327470 + }, + { + "epoch": 1.5882188189962572, + "grad_norm": 8.127833694970832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327480 + }, + { + "epoch": 1.5882673171890933, + "grad_norm": 8.383532446032405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327490 + }, + { + "epoch": 1.5883158153819292, + "grad_norm": 8.544841278990134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327500 + }, + { + "epoch": 1.5883643135747654, + "grad_norm": 1.0008859163690431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327510 + }, + { + "epoch": 1.5884128117676015, + "grad_norm": 8.58339817000342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327520 + }, + { + "epoch": 1.5884613099604374, + "grad_norm": 1.502160529298635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327530 + }, + { + "epoch": 1.5885098081532738, + "grad_norm": 9.162494762904316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327540 + }, + { + "epoch": 1.5885583063461097, + "grad_norm": 1.0353218016234678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327550 + }, + { + "epoch": 1.5886068045389459, + "grad_norm": 1.0291188345945557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327560 + }, + { + "epoch": 1.588655302731782, + "grad_norm": 1.011485508684018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327570 + }, + { + "epoch": 1.588703800924618, + "grad_norm": 8.467306855663992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327580 + }, + { + "epoch": 1.588752299117454, + "grad_norm": 8.484125402219433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327590 + }, + { + "epoch": 1.5888007973102902, + "grad_norm": 9.089851005228411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327600 + }, + { + "epoch": 1.5888492955031264, + "grad_norm": 9.767667563664872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327610 + }, + { + "epoch": 1.5888977936959625, + "grad_norm": 1.0460203014872604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327620 + }, + { + "epoch": 1.5889462918887984, + "grad_norm": 8.252131777908289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327630 + }, + { + "epoch": 1.5889947900816346, + "grad_norm": 8.184296973468008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327640 + }, + { + "epoch": 1.5890432882744707, + "grad_norm": 8.542817653278689e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327650 + }, + { + "epoch": 1.5890917864673066, + "grad_norm": 9.710083759273402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327660 + }, + { + "epoch": 1.589140284660143, + "grad_norm": 8.05728745945089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327670 + }, + { + "epoch": 1.589188782852979, + "grad_norm": 7.953624958645378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327680 + }, + { + "epoch": 1.589237281045815, + "grad_norm": 8.123729600129082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327690 + }, + { + "epoch": 1.5892857792386512, + "grad_norm": 8.247261007454654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327700 + }, + { + "epoch": 1.5893342774314871, + "grad_norm": 3.604273501878197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327710 + }, + { + "epoch": 1.5893827756243233, + "grad_norm": 1.1830703527948572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327720 + }, + { + "epoch": 1.5894312738171594, + "grad_norm": 7.84505971296312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327730 + }, + { + "epoch": 1.5894797720099954, + "grad_norm": 6.867663842058391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327740 + }, + { + "epoch": 1.5895282702028317, + "grad_norm": 7.853881811570318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327750 + }, + { + "epoch": 1.5895767683956676, + "grad_norm": 7.740602825379028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327760 + }, + { + "epoch": 1.5896252665885038, + "grad_norm": 1.3580343249941507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327770 + }, + { + "epoch": 1.58967376478134, + "grad_norm": 8.671013063121791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327780 + }, + { + "epoch": 1.5897222629741758, + "grad_norm": 5.745415592173231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327790 + }, + { + "epoch": 1.589770761167012, + "grad_norm": 1.2467357635159715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327800 + }, + { + "epoch": 1.5898192593598481, + "grad_norm": 7.969626381054695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327810 + }, + { + "epoch": 1.589867757552684, + "grad_norm": 7.49110569131517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327820 + }, + { + "epoch": 1.5899162557455204, + "grad_norm": 1.237649058793977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327830 + }, + { + "epoch": 1.5899647539383563, + "grad_norm": 7.286774206249902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327840 + }, + { + "epoch": 1.5900132521311925, + "grad_norm": 8.548278174203006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327850 + }, + { + "epoch": 1.5900617503240286, + "grad_norm": 7.818181302354787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327860 + }, + { + "epoch": 1.5901102485168646, + "grad_norm": 8.123365802248372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327870 + }, + { + "epoch": 1.5901587467097007, + "grad_norm": 7.546267966063169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327880 + }, + { + "epoch": 1.5902072449025368, + "grad_norm": 7.870963258937991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327890 + }, + { + "epoch": 1.5902557430953728, + "grad_norm": 7.668707269203878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327900 + }, + { + "epoch": 1.5903042412882091, + "grad_norm": 1.0618537515938442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327910 + }, + { + "epoch": 1.590352739481045, + "grad_norm": 1.7737507107540296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327920 + }, + { + "epoch": 1.5904012376738812, + "grad_norm": 7.865660478501013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327930 + }, + { + "epoch": 1.5904497358667173, + "grad_norm": 7.726266204599597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327940 + }, + { + "epoch": 1.5904982340595533, + "grad_norm": 7.604391072391081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327950 + }, + { + "epoch": 1.5905467322523894, + "grad_norm": 7.240442556621929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327960 + }, + { + "epoch": 1.5905952304452255, + "grad_norm": 7.552554848189175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327970 + }, + { + "epoch": 1.5906437286380615, + "grad_norm": 8.108881388579903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327980 + }, + { + "epoch": 1.5906922268308978, + "grad_norm": 7.283674818836516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 327990 + }, + { + "epoch": 1.5907407250237338, + "grad_norm": 1.17551515188552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328000 + }, + { + "epoch": 1.59078922321657, + "grad_norm": 8.423732111850768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328010 + }, + { + "epoch": 1.590837721409406, + "grad_norm": 7.69567805036786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328020 + }, + { + "epoch": 1.590886219602242, + "grad_norm": 7.730341167189181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328030 + }, + { + "epoch": 1.5909347177950781, + "grad_norm": 7.421782299843471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328040 + }, + { + "epoch": 1.5909832159879143, + "grad_norm": 7.340859298210489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328050 + }, + { + "epoch": 1.5910317141807502, + "grad_norm": 7.64297425348559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328060 + }, + { + "epoch": 1.5910802123735865, + "grad_norm": 7.817006775212576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328070 + }, + { + "epoch": 1.5911287105664225, + "grad_norm": 6.775911742806784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328080 + }, + { + "epoch": 1.5911772087592586, + "grad_norm": 7.422770664788914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328090 + }, + { + "epoch": 1.5912257069520948, + "grad_norm": 7.346371688754516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328100 + }, + { + "epoch": 1.5912742051449307, + "grad_norm": 8.233988779693391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328110 + }, + { + "epoch": 1.591322703337767, + "grad_norm": 7.640054633384352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328120 + }, + { + "epoch": 1.591371201530603, + "grad_norm": 7.703985716034367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328130 + }, + { + "epoch": 1.591419699723439, + "grad_norm": 6.865145962819952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328140 + }, + { + "epoch": 1.5914681979162753, + "grad_norm": 7.20357036243513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328150 + }, + { + "epoch": 1.5915166961091112, + "grad_norm": 8.765746883909742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328160 + }, + { + "epoch": 1.5915651943019473, + "grad_norm": 7.150988068360675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328170 + }, + { + "epoch": 1.5916136924947835, + "grad_norm": 7.814035996034363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328180 + }, + { + "epoch": 1.5916621906876194, + "grad_norm": 7.52856408325897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328190 + }, + { + "epoch": 1.5917106888804557, + "grad_norm": 7.974777815888956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328200 + }, + { + "epoch": 1.5917591870732917, + "grad_norm": 7.854330874579318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328210 + }, + { + "epoch": 1.5918076852661278, + "grad_norm": 7.466675810974266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328220 + }, + { + "epoch": 1.591856183458964, + "grad_norm": 7.279268743332068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328230 + }, + { + "epoch": 1.5919046816517999, + "grad_norm": 7.109221655809961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328240 + }, + { + "epoch": 1.591953179844636, + "grad_norm": 7.240963384447241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328250 + }, + { + "epoch": 1.5920016780374722, + "grad_norm": 7.275984614807385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328260 + }, + { + "epoch": 1.592050176230308, + "grad_norm": 8.903153059236502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328270 + }, + { + "epoch": 1.5920986744231445, + "grad_norm": 7.303108162659555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328280 + }, + { + "epoch": 1.5921471726159804, + "grad_norm": 7.267088619755668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328290 + }, + { + "epoch": 1.5921956708088165, + "grad_norm": 9.573132331297529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328300 + }, + { + "epoch": 1.5922441690016527, + "grad_norm": 7.612776897758522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328310 + }, + { + "epoch": 1.5922926671944886, + "grad_norm": 8.789875494130683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328320 + }, + { + "epoch": 1.5923411653873247, + "grad_norm": 7.696587545069633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328330 + }, + { + "epoch": 1.5923896635801609, + "grad_norm": 8.458142275458158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328340 + }, + { + "epoch": 1.5924381617729968, + "grad_norm": 7.537368418297774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328350 + }, + { + "epoch": 1.5924866599658332, + "grad_norm": 7.852349170889283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328360 + }, + { + "epoch": 1.592535158158669, + "grad_norm": 9.303734316290502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328370 + }, + { + "epoch": 1.5925836563515052, + "grad_norm": 6.800404150908435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328380 + }, + { + "epoch": 1.5926321545443414, + "grad_norm": 1.0038123576805447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328390 + }, + { + "epoch": 1.5926806527371773, + "grad_norm": 6.977154498599702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328400 + }, + { + "epoch": 1.5927291509300134, + "grad_norm": 7.917784472510903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328410 + }, + { + "epoch": 1.5927776491228496, + "grad_norm": 7.183931671761457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328420 + }, + { + "epoch": 1.5928261473156855, + "grad_norm": 7.528050360861016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328430 + }, + { + "epoch": 1.5928746455085219, + "grad_norm": 1.9034692400055064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328440 + }, + { + "epoch": 1.5929231437013578, + "grad_norm": 6.909936445254061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328450 + }, + { + "epoch": 1.592971641894194, + "grad_norm": 9.410156565081707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328460 + }, + { + "epoch": 1.59302014008703, + "grad_norm": 6.877416325323793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328470 + }, + { + "epoch": 1.593068638279866, + "grad_norm": 6.681415243292577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328480 + }, + { + "epoch": 1.5931171364727021, + "grad_norm": 6.988733503021649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328490 + }, + { + "epoch": 1.5931656346655383, + "grad_norm": 7.150190128868417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328500 + }, + { + "epoch": 1.5932141328583742, + "grad_norm": 7.420130287982829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328510 + }, + { + "epoch": 1.5932626310512106, + "grad_norm": 8.806782858528095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328520 + }, + { + "epoch": 1.5933111292440465, + "grad_norm": 6.56096474926926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328530 + }, + { + "epoch": 1.5933596274368826, + "grad_norm": 6.775990613050453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328540 + }, + { + "epoch": 1.5934081256297188, + "grad_norm": 7.965657999875475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328550 + }, + { + "epoch": 1.5934566238225547, + "grad_norm": 7.368519305828158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328560 + }, + { + "epoch": 1.5935051220153909, + "grad_norm": 6.73337936518692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328570 + }, + { + "epoch": 1.593553620208227, + "grad_norm": 6.624492243645363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328580 + }, + { + "epoch": 1.593602118401063, + "grad_norm": 7.8580342233181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328590 + }, + { + "epoch": 1.5936506165938993, + "grad_norm": 6.762495985412897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328600 + }, + { + "epoch": 1.5936991147867352, + "grad_norm": 7.035167470803572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328610 + }, + { + "epoch": 1.5937476129795713, + "grad_norm": 7.055347595041894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328620 + }, + { + "epoch": 1.5937961111724075, + "grad_norm": 7.347778563371321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328630 + }, + { + "epoch": 1.5938446093652434, + "grad_norm": 6.842402200391007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328640 + }, + { + "epoch": 1.5938931075580798, + "grad_norm": 7.096336673839687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328650 + }, + { + "epoch": 1.5939416057509157, + "grad_norm": 6.983619016409648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328660 + }, + { + "epoch": 1.5939901039437518, + "grad_norm": 6.770608962369806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328670 + }, + { + "epoch": 1.594038602136588, + "grad_norm": 6.672735253232531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328680 + }, + { + "epoch": 1.594087100329424, + "grad_norm": 6.798062202051369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328690 + }, + { + "epoch": 1.59413559852226, + "grad_norm": 7.758626452414319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328700 + }, + { + "epoch": 1.5941840967150962, + "grad_norm": 8.213260827005797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328710 + }, + { + "epoch": 1.5942325949079321, + "grad_norm": 8.211529944901486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328720 + }, + { + "epoch": 1.5942810931007685, + "grad_norm": 6.987772849242901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328730 + }, + { + "epoch": 1.5943295912936044, + "grad_norm": 6.89655337282602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328740 + }, + { + "epoch": 1.5943780894864406, + "grad_norm": 6.985197842368507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328750 + }, + { + "epoch": 1.5944265876792767, + "grad_norm": 7.191386686145051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328760 + }, + { + "epoch": 1.5944750858721126, + "grad_norm": 1.0962226326682867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328770 + }, + { + "epoch": 1.5945235840649488, + "grad_norm": 6.469752378279736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328780 + }, + { + "epoch": 1.594572082257785, + "grad_norm": 6.986837775002641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328790 + }, + { + "epoch": 1.5946205804506208, + "grad_norm": 6.604106772556406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328800 + }, + { + "epoch": 1.5946690786434572, + "grad_norm": 7.021545656016315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328810 + }, + { + "epoch": 1.5947175768362931, + "grad_norm": 7.778653809964453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328820 + }, + { + "epoch": 1.5947660750291293, + "grad_norm": 7.552323211257317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328830 + }, + { + "epoch": 1.5948145732219654, + "grad_norm": 6.475244163084426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328840 + }, + { + "epoch": 1.5948630714148013, + "grad_norm": 6.557526432970917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328850 + }, + { + "epoch": 1.5949115696076375, + "grad_norm": 7.052359762838023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328860 + }, + { + "epoch": 1.5949600678004736, + "grad_norm": 8.248152738588033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328870 + }, + { + "epoch": 1.5950085659933095, + "grad_norm": 6.999977131272317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328880 + }, + { + "epoch": 1.595057064186146, + "grad_norm": 6.623903203717418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328890 + }, + { + "epoch": 1.5951055623789818, + "grad_norm": 6.411292474695074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328900 + }, + { + "epoch": 1.595154060571818, + "grad_norm": 7.543003022192352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328910 + }, + { + "epoch": 1.595202558764654, + "grad_norm": 6.588241774352355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328920 + }, + { + "epoch": 1.59525105695749, + "grad_norm": 6.502325078372451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328930 + }, + { + "epoch": 1.5952995551503262, + "grad_norm": 7.057793993681116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328940 + }, + { + "epoch": 1.5953480533431623, + "grad_norm": 6.328266266564242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328950 + }, + { + "epoch": 1.5953965515359982, + "grad_norm": 7.396098311573951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328960 + }, + { + "epoch": 1.5954450497288346, + "grad_norm": 8.26335977421877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328970 + }, + { + "epoch": 1.5954935479216705, + "grad_norm": 7.457775552666135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328980 + }, + { + "epoch": 1.5955420461145067, + "grad_norm": 6.390254014831953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 328990 + }, + { + "epoch": 1.5955905443073428, + "grad_norm": 6.566310162270383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329000 + }, + { + "epoch": 1.5956390425001787, + "grad_norm": 6.811568198372697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329010 + }, + { + "epoch": 1.5956875406930149, + "grad_norm": 6.404452790320647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329020 + }, + { + "epoch": 1.595736038885851, + "grad_norm": 7.189328243839554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329030 + }, + { + "epoch": 1.595784537078687, + "grad_norm": 6.23336831040433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329040 + }, + { + "epoch": 1.5958330352715233, + "grad_norm": 8.578740562370513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329050 + }, + { + "epoch": 1.5958815334643592, + "grad_norm": 6.355644188715814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329060 + }, + { + "epoch": 1.5959300316571954, + "grad_norm": 7.310127614346129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329070 + }, + { + "epoch": 1.5959785298500315, + "grad_norm": 6.998000401381432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329080 + }, + { + "epoch": 1.5960270280428674, + "grad_norm": 8.575593568593831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329090 + }, + { + "epoch": 1.5960755262357036, + "grad_norm": 6.151140041765757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329100 + }, + { + "epoch": 1.5961240244285397, + "grad_norm": 6.28883896069965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329110 + }, + { + "epoch": 1.5961725226213757, + "grad_norm": 6.411831776631516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329120 + }, + { + "epoch": 1.596221020814212, + "grad_norm": 6.134570185167831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329130 + }, + { + "epoch": 1.596269519007048, + "grad_norm": 6.263790197635899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329140 + }, + { + "epoch": 1.596318017199884, + "grad_norm": 8.285251595907539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329150 + }, + { + "epoch": 1.5963665153927202, + "grad_norm": 7.425802550642402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329160 + }, + { + "epoch": 1.5964150135855562, + "grad_norm": 7.451887995557627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329170 + }, + { + "epoch": 1.5964635117783925, + "grad_norm": 6.369688776430849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329180 + }, + { + "epoch": 1.5965120099712284, + "grad_norm": 6.463456969640902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329190 + }, + { + "epoch": 1.5965605081640646, + "grad_norm": 6.155454457257292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329200 + }, + { + "epoch": 1.5966090063569007, + "grad_norm": 6.228552962284084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329210 + }, + { + "epoch": 1.5966575045497367, + "grad_norm": 6.379801220646186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329220 + }, + { + "epoch": 1.5967060027425728, + "grad_norm": 6.249523210044572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329230 + }, + { + "epoch": 1.596754500935409, + "grad_norm": 6.092288629133691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329240 + }, + { + "epoch": 1.5968029991282449, + "grad_norm": 6.003848085356367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329250 + }, + { + "epoch": 1.5968514973210812, + "grad_norm": 6.554608944497886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329260 + }, + { + "epoch": 1.5968999955139171, + "grad_norm": 9.68964215530832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329270 + }, + { + "epoch": 1.5969484937067533, + "grad_norm": 6.64373160930154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329280 + }, + { + "epoch": 1.5969969918995894, + "grad_norm": 6.438914823547748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329290 + }, + { + "epoch": 1.5970454900924254, + "grad_norm": 5.9479887681845867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329300 + }, + { + "epoch": 1.5970939882852615, + "grad_norm": 6.117097228752755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329310 + }, + { + "epoch": 1.5971424864780976, + "grad_norm": 5.988410833879243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329320 + }, + { + "epoch": 1.5971909846709336, + "grad_norm": 6.875103508718894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329330 + }, + { + "epoch": 1.59723948286377, + "grad_norm": 6.290404286346529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329340 + }, + { + "epoch": 1.5972879810566059, + "grad_norm": 6.253453221916061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329350 + }, + { + "epoch": 1.597336479249442, + "grad_norm": 6.411355002455821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329360 + }, + { + "epoch": 1.5973849774422781, + "grad_norm": 6.3558644569639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329370 + }, + { + "epoch": 1.597433475635114, + "grad_norm": 5.8348444298417235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329380 + }, + { + "epoch": 1.5974819738279502, + "grad_norm": 6.02544076855338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329390 + }, + { + "epoch": 1.5975304720207864, + "grad_norm": 6.473651836813588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329400 + }, + { + "epoch": 1.5975789702136223, + "grad_norm": 6.696722465449056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329410 + }, + { + "epoch": 1.5976274684064586, + "grad_norm": 6.315355705055481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329420 + }, + { + "epoch": 1.5976759665992946, + "grad_norm": 6.089552329058279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329430 + }, + { + "epoch": 1.5977244647921307, + "grad_norm": 6.320396295222963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329440 + }, + { + "epoch": 1.5977729629849668, + "grad_norm": 7.197046869578116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329450 + }, + { + "epoch": 1.5978214611778028, + "grad_norm": 6.048951206594211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329460 + }, + { + "epoch": 1.597869959370639, + "grad_norm": 6.564602728076352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329470 + }, + { + "epoch": 1.597918457563475, + "grad_norm": 5.693981108834123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329480 + }, + { + "epoch": 1.597966955756311, + "grad_norm": 5.950965231704686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329490 + }, + { + "epoch": 1.5980154539491473, + "grad_norm": 6.171433852841801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329500 + }, + { + "epoch": 1.5980639521419833, + "grad_norm": 5.82404418025817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329510 + }, + { + "epoch": 1.5981124503348194, + "grad_norm": 6.646663308629286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329520 + }, + { + "epoch": 1.5981609485276556, + "grad_norm": 6.129522489572992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329530 + }, + { + "epoch": 1.5982094467204915, + "grad_norm": 5.8761497001569296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329540 + }, + { + "epoch": 1.5982579449133276, + "grad_norm": 5.8383928802641094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329550 + }, + { + "epoch": 1.5983064431061638, + "grad_norm": 1.5442850553881726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329560 + }, + { + "epoch": 1.5983549412989997, + "grad_norm": 5.831212490647886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329570 + }, + { + "epoch": 1.598403439491836, + "grad_norm": 5.783333634212795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329580 + }, + { + "epoch": 1.598451937684672, + "grad_norm": 5.8466802954626473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329590 + }, + { + "epoch": 1.5985004358775081, + "grad_norm": 7.28844042896526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329600 + }, + { + "epoch": 1.5985489340703443, + "grad_norm": 6.115087813896025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329610 + }, + { + "epoch": 1.5985974322631802, + "grad_norm": 5.996586338596899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329620 + }, + { + "epoch": 1.5986459304560163, + "grad_norm": 6.142312258816673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329630 + }, + { + "epoch": 1.5986944286488525, + "grad_norm": 5.8951322046141286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329640 + }, + { + "epoch": 1.5987429268416886, + "grad_norm": 6.124846407828954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329650 + }, + { + "epoch": 1.5987914250345248, + "grad_norm": 5.807011405067897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329660 + }, + { + "epoch": 1.5988399232273607, + "grad_norm": 6.146667885786883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329670 + }, + { + "epoch": 1.5988884214201968, + "grad_norm": 5.572537631337582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329680 + }, + { + "epoch": 1.598936919613033, + "grad_norm": 6.820292952625095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329690 + }, + { + "epoch": 1.598985417805869, + "grad_norm": 5.968468030914664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329700 + }, + { + "epoch": 1.5990339159987053, + "grad_norm": 5.7895849892020124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329710 + }, + { + "epoch": 1.5990824141915412, + "grad_norm": 6.597634438776367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329720 + }, + { + "epoch": 1.5991309123843773, + "grad_norm": 5.734780472721468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329730 + }, + { + "epoch": 1.5991794105772135, + "grad_norm": 5.895084242979465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329740 + }, + { + "epoch": 1.5992279087700494, + "grad_norm": 6.14586070923906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329750 + }, + { + "epoch": 1.5992764069628855, + "grad_norm": 7.068241814067733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329760 + }, + { + "epoch": 1.5993249051557217, + "grad_norm": 7.945650537521942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329770 + }, + { + "epoch": 1.5993734033485576, + "grad_norm": 5.599120456167839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329780 + }, + { + "epoch": 1.599421901541394, + "grad_norm": 5.916826140150988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329790 + }, + { + "epoch": 1.5994703997342299, + "grad_norm": 6.0576539340218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329800 + }, + { + "epoch": 1.599518897927066, + "grad_norm": 5.564264782265127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329810 + }, + { + "epoch": 1.5995673961199022, + "grad_norm": 7.770898946546367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329820 + }, + { + "epoch": 1.599615894312738, + "grad_norm": 6.189826251556951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329830 + }, + { + "epoch": 1.5996643925055742, + "grad_norm": 7.105697363840591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329840 + }, + { + "epoch": 1.5997128906984104, + "grad_norm": 1.5232787120567082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329850 + }, + { + "epoch": 1.5997613888912463, + "grad_norm": 7.594311313141588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329860 + }, + { + "epoch": 1.5998098870840827, + "grad_norm": 7.702343651772026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329870 + }, + { + "epoch": 1.5998583852769186, + "grad_norm": 5.536656644267168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329880 + }, + { + "epoch": 1.5999068834697547, + "grad_norm": 6.466739677080113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329890 + }, + { + "epoch": 1.5999553816625909, + "grad_norm": 6.008234265664214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329900 + }, + { + "epoch": 1.6000038798554268, + "grad_norm": 6.069370783734485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329910 + }, + { + "epoch": 1.600052378048263, + "grad_norm": 5.591483898115257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329920 + }, + { + "epoch": 1.600100876241099, + "grad_norm": 8.974967613539775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329930 + }, + { + "epoch": 1.600149374433935, + "grad_norm": 5.4167252017123246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329940 + }, + { + "epoch": 1.6001978726267714, + "grad_norm": 5.392620749944399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329950 + }, + { + "epoch": 1.6002463708196073, + "grad_norm": 5.5779384666720944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329960 + }, + { + "epoch": 1.6002948690124434, + "grad_norm": 5.536903202596477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329970 + }, + { + "epoch": 1.6003433672052796, + "grad_norm": 5.4979771846319636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329980 + }, + { + "epoch": 1.6003918653981155, + "grad_norm": 5.805739888842254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 329990 + }, + { + "epoch": 1.6004403635909517, + "grad_norm": 5.6051689512059966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330000 + }, + { + "epoch": 1.6004888617837878, + "grad_norm": 5.7174581513663725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330010 + }, + { + "epoch": 1.6005373599766237, + "grad_norm": 5.98680429675369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330020 + }, + { + "epoch": 1.60058585816946, + "grad_norm": 5.287559545763543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330030 + }, + { + "epoch": 1.600634356362296, + "grad_norm": 5.332232788646252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330040 + }, + { + "epoch": 1.6006828545551322, + "grad_norm": 5.386142731822474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330050 + }, + { + "epoch": 1.6007313527479683, + "grad_norm": 5.5133316578803715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330060 + }, + { + "epoch": 1.6007798509408042, + "grad_norm": 5.383041568052249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330070 + }, + { + "epoch": 1.6008283491336404, + "grad_norm": 5.6685461657934866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330080 + }, + { + "epoch": 1.6008768473264765, + "grad_norm": 6.993298029556172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330090 + }, + { + "epoch": 1.6009253455193124, + "grad_norm": 5.718350237771119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330100 + }, + { + "epoch": 1.6009738437121488, + "grad_norm": 6.345150183051373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330110 + }, + { + "epoch": 1.6010223419049847, + "grad_norm": 5.2850278819960295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330120 + }, + { + "epoch": 1.6010708400978209, + "grad_norm": 5.063196795163094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330130 + }, + { + "epoch": 1.601119338290657, + "grad_norm": 5.75470089358987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330140 + }, + { + "epoch": 1.601167836483493, + "grad_norm": 5.676973913182337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330150 + }, + { + "epoch": 1.6012163346763293, + "grad_norm": 7.651622269122527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330160 + }, + { + "epoch": 1.6012648328691652, + "grad_norm": 8.534407669458233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330170 + }, + { + "epoch": 1.6013133310620014, + "grad_norm": 4.909706774469669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330180 + }, + { + "epoch": 1.6013618292548375, + "grad_norm": 5.8586095974533237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330190 + }, + { + "epoch": 1.6014103274476734, + "grad_norm": 5.3697593926926857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330200 + }, + { + "epoch": 1.6014588256405096, + "grad_norm": 5.3414236589333086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330210 + }, + { + "epoch": 1.6015073238333457, + "grad_norm": 5.3822972745365405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330220 + }, + { + "epoch": 1.6015558220261816, + "grad_norm": 5.9533086016472225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330230 + }, + { + "epoch": 1.601604320219018, + "grad_norm": 5.4623910727968905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330240 + }, + { + "epoch": 1.601652818411854, + "grad_norm": 7.251163225419077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330250 + }, + { + "epoch": 1.60170131660469, + "grad_norm": 5.5636679263670885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330260 + }, + { + "epoch": 1.6017498147975262, + "grad_norm": 5.119148127619155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330270 + }, + { + "epoch": 1.6017983129903621, + "grad_norm": 5.4077599287438716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330280 + }, + { + "epoch": 1.6018468111831983, + "grad_norm": 5.3776208375211354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330290 + }, + { + "epoch": 1.6018953093760344, + "grad_norm": 5.2831317987056536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330300 + }, + { + "epoch": 1.6019438075688703, + "grad_norm": 5.137236769314768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330310 + }, + { + "epoch": 1.6019923057617067, + "grad_norm": 5.1774396325754424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330320 + }, + { + "epoch": 1.6020408039545426, + "grad_norm": 4.8925706153113424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330330 + }, + { + "epoch": 1.6020893021473788, + "grad_norm": 7.738643859056538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330340 + }, + { + "epoch": 1.602137800340215, + "grad_norm": 5.3354412443695765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330350 + }, + { + "epoch": 1.6021862985330508, + "grad_norm": 5.3048577086656223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330360 + }, + { + "epoch": 1.602234796725887, + "grad_norm": 5.591074625499459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330370 + }, + { + "epoch": 1.6022832949187231, + "grad_norm": 5.534759139891321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330380 + }, + { + "epoch": 1.602331793111559, + "grad_norm": 5.101095723603066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330390 + }, + { + "epoch": 1.6023802913043954, + "grad_norm": 5.071443354154326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330400 + }, + { + "epoch": 1.6024287894972313, + "grad_norm": 5.538946368233155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330410 + }, + { + "epoch": 1.6024772876900675, + "grad_norm": 5.2148383389294395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330420 + }, + { + "epoch": 1.6025257858829036, + "grad_norm": 4.698611277831333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330430 + }, + { + "epoch": 1.6025742840757395, + "grad_norm": 5.707573080826478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330440 + }, + { + "epoch": 1.6026227822685757, + "grad_norm": 5.0042874732980636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330450 + }, + { + "epoch": 1.6026712804614118, + "grad_norm": 5.454558760220607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330460 + }, + { + "epoch": 1.6027197786542478, + "grad_norm": 4.9762778786544004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330470 + }, + { + "epoch": 1.6027682768470841, + "grad_norm": 5.7004925224646286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330480 + }, + { + "epoch": 1.60281677503992, + "grad_norm": 5.4994668374774847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330490 + }, + { + "epoch": 1.6028652732327562, + "grad_norm": 5.246879908327173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330500 + }, + { + "epoch": 1.6029137714255923, + "grad_norm": 5.7315713064554075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330510 + }, + { + "epoch": 1.6029622696184282, + "grad_norm": 5.1462102135246823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330520 + }, + { + "epoch": 1.6030107678112644, + "grad_norm": 5.321341589592521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330530 + }, + { + "epoch": 1.6030592660041005, + "grad_norm": 5.2749840051546926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330540 + }, + { + "epoch": 1.6031077641969365, + "grad_norm": 4.977979983777914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330550 + }, + { + "epoch": 1.6031562623897728, + "grad_norm": 5.258301882804517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330560 + }, + { + "epoch": 1.6032047605826087, + "grad_norm": 5.408279335483712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330570 + }, + { + "epoch": 1.603253258775445, + "grad_norm": 4.7975344585893254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330580 + }, + { + "epoch": 1.603301756968281, + "grad_norm": 5.040097406094901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330590 + }, + { + "epoch": 1.603350255161117, + "grad_norm": 5.13496729581675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330600 + }, + { + "epoch": 1.603398753353953, + "grad_norm": 5.109346190579345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330610 + }, + { + "epoch": 1.6034472515467892, + "grad_norm": 5.09983983931761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330620 + }, + { + "epoch": 1.6034957497396252, + "grad_norm": 5.6801688685936824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330630 + }, + { + "epoch": 1.6035442479324615, + "grad_norm": 5.132850233735553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330640 + }, + { + "epoch": 1.6035927461252975, + "grad_norm": 5.314052842209094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330650 + }, + { + "epoch": 1.6036412443181336, + "grad_norm": 9.806252876387589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330660 + }, + { + "epoch": 1.6036897425109697, + "grad_norm": 5.1656567023883326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330670 + }, + { + "epoch": 1.6037382407038057, + "grad_norm": 4.6560028721387425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330680 + }, + { + "epoch": 1.603786738896642, + "grad_norm": 4.7893863097669964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330690 + }, + { + "epoch": 1.603835237089478, + "grad_norm": 5.6835538941868435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330700 + }, + { + "epoch": 1.603883735282314, + "grad_norm": 4.9024301063127496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330710 + }, + { + "epoch": 1.6039322334751502, + "grad_norm": 4.805633579962887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330720 + }, + { + "epoch": 1.6039807316679862, + "grad_norm": 4.7400096292449234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330730 + }, + { + "epoch": 1.6040292298608223, + "grad_norm": 4.631171535152134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330740 + }, + { + "epoch": 1.6040777280536584, + "grad_norm": 5.0384400651637407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330750 + }, + { + "epoch": 1.6041262262464944, + "grad_norm": 4.6358387351119745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330760 + }, + { + "epoch": 1.6041747244393307, + "grad_norm": 9.865618721960345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330770 + }, + { + "epoch": 1.6042232226321667, + "grad_norm": 4.837230704879403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330780 + }, + { + "epoch": 1.6042717208250028, + "grad_norm": 4.776811479700882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330790 + }, + { + "epoch": 1.604320219017839, + "grad_norm": 4.824893196087032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330800 + }, + { + "epoch": 1.6043687172106749, + "grad_norm": 5.256056567759515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330810 + }, + { + "epoch": 1.604417215403511, + "grad_norm": 5.029580663062916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330820 + }, + { + "epoch": 1.6044657135963472, + "grad_norm": 4.6053404645363116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330830 + }, + { + "epoch": 1.604514211789183, + "grad_norm": 4.714367918268181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330840 + }, + { + "epoch": 1.6045627099820194, + "grad_norm": 4.778567230800945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330850 + }, + { + "epoch": 1.6046112081748554, + "grad_norm": 4.60571278892985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330860 + }, + { + "epoch": 1.6046597063676915, + "grad_norm": 4.570798495251438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330870 + }, + { + "epoch": 1.6047082045605277, + "grad_norm": 4.5714902086047005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330880 + }, + { + "epoch": 1.6047567027533636, + "grad_norm": 4.432095579431916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330890 + }, + { + "epoch": 1.6048052009461997, + "grad_norm": 4.7318643225935375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330900 + }, + { + "epoch": 1.6048536991390359, + "grad_norm": 4.597810843165462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330910 + }, + { + "epoch": 1.6049021973318718, + "grad_norm": 5.373057021529348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330920 + }, + { + "epoch": 1.6049506955247081, + "grad_norm": 4.615678150798885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330930 + }, + { + "epoch": 1.604999193717544, + "grad_norm": 5.025404803404854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330940 + }, + { + "epoch": 1.6050476919103802, + "grad_norm": 4.545483278661777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330950 + }, + { + "epoch": 1.6050961901032164, + "grad_norm": 4.881790971467126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330960 + }, + { + "epoch": 1.6051446882960523, + "grad_norm": 4.477346848830166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330970 + }, + { + "epoch": 1.6051931864888884, + "grad_norm": 4.5592990716158965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330980 + }, + { + "epoch": 1.6052416846817246, + "grad_norm": 4.9465530338466124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 330990 + }, + { + "epoch": 1.6052901828745605, + "grad_norm": 4.88655302888219e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331000 + }, + { + "epoch": 1.6053386810673969, + "grad_norm": 4.565279709822789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331010 + }, + { + "epoch": 1.6053871792602328, + "grad_norm": 4.7204430586589297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331020 + }, + { + "epoch": 1.605435677453069, + "grad_norm": 4.666595643243454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331030 + }, + { + "epoch": 1.605484175645905, + "grad_norm": 4.543584708471826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331040 + }, + { + "epoch": 1.605532673838741, + "grad_norm": 4.953048815536931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331050 + }, + { + "epoch": 1.6055811720315771, + "grad_norm": 4.511280238261861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331060 + }, + { + "epoch": 1.6056296702244133, + "grad_norm": 4.568460809650787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331070 + }, + { + "epoch": 1.6056781684172492, + "grad_norm": 4.751247928425073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331080 + }, + { + "epoch": 1.6057266666100856, + "grad_norm": 4.498307148992353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331090 + }, + { + "epoch": 1.6057751648029215, + "grad_norm": 5.839004302288231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331100 + }, + { + "epoch": 1.6058236629957576, + "grad_norm": 8.831889886096178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331110 + }, + { + "epoch": 1.6058721611885938, + "grad_norm": 4.6141853005110534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331120 + }, + { + "epoch": 1.6059206593814297, + "grad_norm": 4.434054190483039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331130 + }, + { + "epoch": 1.6059691575742658, + "grad_norm": 4.339179682233407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331140 + }, + { + "epoch": 1.606017655767102, + "grad_norm": 2.2440724478656193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331150 + }, + { + "epoch": 1.606066153959938, + "grad_norm": 4.6402131914646816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331160 + }, + { + "epoch": 1.6061146521527743, + "grad_norm": 7.714810124070937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331170 + }, + { + "epoch": 1.6061631503456102, + "grad_norm": 5.228703869875062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331180 + }, + { + "epoch": 1.6062116485384463, + "grad_norm": 5.0662286810165824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331190 + }, + { + "epoch": 1.6062601467312825, + "grad_norm": 4.243267781589566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331200 + }, + { + "epoch": 1.6063086449241184, + "grad_norm": 4.700438083204972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331210 + }, + { + "epoch": 1.6063571431169548, + "grad_norm": 4.3332935462103706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331220 + }, + { + "epoch": 1.6064056413097907, + "grad_norm": 4.467895919901821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331230 + }, + { + "epoch": 1.6064541395026268, + "grad_norm": 4.4266265319947706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331240 + }, + { + "epoch": 1.606502637695463, + "grad_norm": 8.62072440099837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331250 + }, + { + "epoch": 1.606551135888299, + "grad_norm": 4.4648849950590375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331260 + }, + { + "epoch": 1.606599634081135, + "grad_norm": 4.415822729697538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331270 + }, + { + "epoch": 1.6066481322739712, + "grad_norm": 4.442222234501969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331280 + }, + { + "epoch": 1.606696630466807, + "grad_norm": 4.4742705540556926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331290 + }, + { + "epoch": 1.6067451286596435, + "grad_norm": 4.351483440245829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331300 + }, + { + "epoch": 1.6067936268524794, + "grad_norm": 4.528477504095463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331310 + }, + { + "epoch": 1.6068421250453155, + "grad_norm": 4.3794155857312944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331320 + }, + { + "epoch": 1.6068906232381517, + "grad_norm": 4.491216287760835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331330 + }, + { + "epoch": 1.6069391214309876, + "grad_norm": 4.5342449794816275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331340 + }, + { + "epoch": 1.6069876196238237, + "grad_norm": 4.59924400786349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331350 + }, + { + "epoch": 1.60703611781666, + "grad_norm": 4.334778580528109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331360 + }, + { + "epoch": 1.6070846160094958, + "grad_norm": 4.35109619445484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331370 + }, + { + "epoch": 1.6071331142023322, + "grad_norm": 4.232602179854439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331380 + }, + { + "epoch": 1.607181612395168, + "grad_norm": 7.189468220758499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331390 + }, + { + "epoch": 1.6072301105880042, + "grad_norm": 5.107017031491523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331400 + }, + { + "epoch": 1.6072786087808404, + "grad_norm": 4.22137382827259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331410 + }, + { + "epoch": 1.6073271069736763, + "grad_norm": 4.2918486542475875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331420 + }, + { + "epoch": 1.6073756051665125, + "grad_norm": 4.171964107513304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331430 + }, + { + "epoch": 1.6074241033593486, + "grad_norm": 4.18413144132046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331440 + }, + { + "epoch": 1.6074726015521845, + "grad_norm": 4.9465644025303845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331450 + }, + { + "epoch": 1.6075210997450209, + "grad_norm": 4.1615869861288957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331460 + }, + { + "epoch": 1.6075695979378568, + "grad_norm": 4.129243436068464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331470 + }, + { + "epoch": 1.607618096130693, + "grad_norm": 4.288819610565042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331480 + }, + { + "epoch": 1.607666594323529, + "grad_norm": 4.260409980361146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331490 + }, + { + "epoch": 1.607715092516365, + "grad_norm": 4.359727157066118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331500 + }, + { + "epoch": 1.6077635907092012, + "grad_norm": 4.432893163652807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331510 + }, + { + "epoch": 1.6078120889020373, + "grad_norm": 4.021550026322984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331520 + }, + { + "epoch": 1.6078605870948732, + "grad_norm": 4.21800869787603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331530 + }, + { + "epoch": 1.6079090852877096, + "grad_norm": 4.590157942629958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331540 + }, + { + "epoch": 1.6079575834805455, + "grad_norm": 4.38081571019211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331550 + }, + { + "epoch": 1.6080060816733817, + "grad_norm": 4.434646072581927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331560 + }, + { + "epoch": 1.6080545798662178, + "grad_norm": 4.505440287516649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331570 + }, + { + "epoch": 1.6081030780590537, + "grad_norm": 3.7607506442327576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331580 + }, + { + "epoch": 1.6081515762518899, + "grad_norm": 4.2872109418112814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331590 + }, + { + "epoch": 1.608200074444726, + "grad_norm": 4.0302364112676514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331600 + }, + { + "epoch": 1.608248572637562, + "grad_norm": 4.1461387212393674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331610 + }, + { + "epoch": 1.6082970708303983, + "grad_norm": 4.122628283198537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331620 + }, + { + "epoch": 1.6083455690232342, + "grad_norm": 4.1136921424822503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331630 + }, + { + "epoch": 1.6083940672160704, + "grad_norm": 4.02553084200008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331640 + }, + { + "epoch": 1.6084425654089065, + "grad_norm": 4.434080480564262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331650 + }, + { + "epoch": 1.6084910636017424, + "grad_norm": 4.284697041612162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331660 + }, + { + "epoch": 1.6085395617945786, + "grad_norm": 4.6790308516619916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331670 + }, + { + "epoch": 1.6085880599874147, + "grad_norm": 3.769075718196291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331680 + }, + { + "epoch": 1.6086365581802506, + "grad_norm": 3.862207265115103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331690 + }, + { + "epoch": 1.608685056373087, + "grad_norm": 4.887846927204009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331700 + }, + { + "epoch": 1.608733554565923, + "grad_norm": 4.3421614748240245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331710 + }, + { + "epoch": 1.608782052758759, + "grad_norm": 4.0029455306012096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331720 + }, + { + "epoch": 1.6088305509515952, + "grad_norm": 4.085810445531024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331730 + }, + { + "epoch": 1.6088790491444311, + "grad_norm": 3.886754740278775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331740 + }, + { + "epoch": 1.6089275473372675, + "grad_norm": 4.2398120569941966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331750 + }, + { + "epoch": 1.6089760455301034, + "grad_norm": 3.9539010288081045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331760 + }, + { + "epoch": 1.6090245437229396, + "grad_norm": 3.9550762664930517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331770 + }, + { + "epoch": 1.6090730419157757, + "grad_norm": 4.0282749580455857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331780 + }, + { + "epoch": 1.6091215401086116, + "grad_norm": 5.092220689562055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331790 + }, + { + "epoch": 1.6091700383014478, + "grad_norm": 4.138543729936828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331800 + }, + { + "epoch": 1.609218536494284, + "grad_norm": 4.0060022854504496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331810 + }, + { + "epoch": 1.6092670346871198, + "grad_norm": 3.793363134718675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331820 + }, + { + "epoch": 1.6093155328799562, + "grad_norm": 5.4173121100120625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331830 + }, + { + "epoch": 1.6093640310727921, + "grad_norm": 3.8794905066197316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331840 + }, + { + "epoch": 1.6094125292656283, + "grad_norm": 4.196941816303479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331850 + }, + { + "epoch": 1.6094610274584644, + "grad_norm": 3.914109214520067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331860 + }, + { + "epoch": 1.6095095256513003, + "grad_norm": 3.990376384876981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331870 + }, + { + "epoch": 1.6095580238441365, + "grad_norm": 3.6851979245966504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331880 + }, + { + "epoch": 1.6096065220369726, + "grad_norm": 4.15534877618029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331890 + }, + { + "epoch": 1.6096550202298086, + "grad_norm": 3.931767977860545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331900 + }, + { + "epoch": 1.609703518422645, + "grad_norm": 3.784117552640964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331910 + }, + { + "epoch": 1.6097520166154808, + "grad_norm": 3.8983575478823695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331920 + }, + { + "epoch": 1.609800514808317, + "grad_norm": 3.922303193348853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331930 + }, + { + "epoch": 1.6098490130011531, + "grad_norm": 4.8466532120983175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331940 + }, + { + "epoch": 1.609897511193989, + "grad_norm": 3.9375631644134046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331950 + }, + { + "epoch": 1.6099460093868252, + "grad_norm": 4.5208071952629325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331960 + }, + { + "epoch": 1.6099945075796613, + "grad_norm": 4.3380204317600146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331970 + }, + { + "epoch": 1.6100430057724973, + "grad_norm": 3.904163392576265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331980 + }, + { + "epoch": 1.6100915039653336, + "grad_norm": 3.9644660887461214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 331990 + }, + { + "epoch": 1.6101400021581695, + "grad_norm": 3.728035835592891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332000 + }, + { + "epoch": 1.6101885003510057, + "grad_norm": 4.5336083331903865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332010 + }, + { + "epoch": 1.6102369985438418, + "grad_norm": 3.734142950406749e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332020 + }, + { + "epoch": 1.6102854967366778, + "grad_norm": 3.6348055232338083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332030 + }, + { + "epoch": 1.610333994929514, + "grad_norm": 3.81273004279592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332040 + }, + { + "epoch": 1.61038249312235, + "grad_norm": 5.8334734376330744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332050 + }, + { + "epoch": 1.610430991315186, + "grad_norm": 3.852407104432132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332060 + }, + { + "epoch": 1.6104794895080223, + "grad_norm": 4.163985067862086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332070 + }, + { + "epoch": 1.6105279877008583, + "grad_norm": 5.1746873452884756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332080 + }, + { + "epoch": 1.6105764858936944, + "grad_norm": 4.303609557609889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332090 + }, + { + "epoch": 1.6106249840865305, + "grad_norm": 3.739818055237265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332100 + }, + { + "epoch": 1.6106734822793665, + "grad_norm": 3.715049246011404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332110 + }, + { + "epoch": 1.6107219804722026, + "grad_norm": 3.778215429406373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332120 + }, + { + "epoch": 1.6107704786650388, + "grad_norm": 0.24363648891448975, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 332130 + }, + { + "epoch": 1.6108189768578747, + "grad_norm": 0.0009223962551914155, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 332140 + }, + { + "epoch": 1.610867475050711, + "grad_norm": 0.01322379894554615, + "learning_rate": 0.0002, + "loss": 0.3219, + "step": 332150 + }, + { + "epoch": 1.610915973243547, + "grad_norm": 0.002567233983427286, + "learning_rate": 0.0002, + "loss": 0.0028, + "step": 332160 + }, + { + "epoch": 1.610964471436383, + "grad_norm": 0.008978111669421196, + "learning_rate": 0.0002, + "loss": 0.0018, + "step": 332170 + }, + { + "epoch": 1.6110129696292192, + "grad_norm": 0.00622507045045495, + "learning_rate": 0.0002, + "loss": 0.0028, + "step": 332180 + }, + { + "epoch": 1.6110614678220552, + "grad_norm": 0.0013738839188590646, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 332190 + }, + { + "epoch": 1.6111099660148913, + "grad_norm": 0.00011155970423715189, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 332200 + }, + { + "epoch": 1.6111584642077275, + "grad_norm": 5.87357790209353e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 332210 + }, + { + "epoch": 1.6112069624005636, + "grad_norm": 0.2434495985507965, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 332220 + }, + { + "epoch": 1.6112554605933997, + "grad_norm": 5.26969124621246e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 332230 + }, + { + "epoch": 1.6113039587862357, + "grad_norm": 0.00011292422277620062, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332240 + }, + { + "epoch": 1.6113524569790718, + "grad_norm": 4.102469756617211e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332250 + }, + { + "epoch": 1.611400955171908, + "grad_norm": 5.364275784813799e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332260 + }, + { + "epoch": 1.6114494533647439, + "grad_norm": 2.6217290724162012e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332270 + }, + { + "epoch": 1.6114979515575802, + "grad_norm": 2.4347977159777656e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332280 + }, + { + "epoch": 1.6115464497504162, + "grad_norm": 3.853940506814979e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332290 + }, + { + "epoch": 1.6115949479432523, + "grad_norm": 3.1171744922176003e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332300 + }, + { + "epoch": 1.6116434461360885, + "grad_norm": 1.8312031897949055e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332310 + }, + { + "epoch": 1.6116919443289244, + "grad_norm": 1.6046426026150584e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332320 + }, + { + "epoch": 1.6117404425217605, + "grad_norm": 0.5914193391799927, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 332330 + }, + { + "epoch": 1.6117889407145967, + "grad_norm": 2.964341183542274e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332340 + }, + { + "epoch": 1.6118374389074326, + "grad_norm": 1.3126841622579377e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332350 + }, + { + "epoch": 1.611885937100269, + "grad_norm": 1.4320455193228554e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332360 + }, + { + "epoch": 1.6119344352931049, + "grad_norm": 1.4687076145492028e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332370 + }, + { + "epoch": 1.611982933485941, + "grad_norm": 1.645131123950705e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332380 + }, + { + "epoch": 1.6120314316787772, + "grad_norm": 2.5064748115255497e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332390 + }, + { + "epoch": 1.612079929871613, + "grad_norm": 1.214987423736602e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332400 + }, + { + "epoch": 1.6121284280644492, + "grad_norm": 1.3100715477776248e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332410 + }, + { + "epoch": 1.6121769262572854, + "grad_norm": 1.1153088962601032e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332420 + }, + { + "epoch": 1.6122254244501213, + "grad_norm": 1.4637315871368628e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 332430 + }, + { + "epoch": 1.6122739226429577, + "grad_norm": 2.4005141312954947e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332440 + }, + { + "epoch": 1.6123224208357936, + "grad_norm": 1.83647389349062e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332450 + }, + { + "epoch": 1.6123709190286297, + "grad_norm": 1.9775967302848585e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332460 + }, + { + "epoch": 1.6124194172214659, + "grad_norm": 2.0874709662166424e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332470 + }, + { + "epoch": 1.6124679154143018, + "grad_norm": 2.4322282115463167e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332480 + }, + { + "epoch": 1.612516413607138, + "grad_norm": 2.2629448722000234e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332490 + }, + { + "epoch": 1.612564911799974, + "grad_norm": 1.586490179761313e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332500 + }, + { + "epoch": 1.61261340999281, + "grad_norm": 1.769292248354759e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332510 + }, + { + "epoch": 1.6126619081856464, + "grad_norm": 2.3711027097306214e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332520 + }, + { + "epoch": 1.6127104063784823, + "grad_norm": 1.4014614862389863e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332530 + }, + { + "epoch": 1.6127589045713184, + "grad_norm": 2.0741015759995207e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332540 + }, + { + "epoch": 1.6128074027641546, + "grad_norm": 1.4773273505852558e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332550 + }, + { + "epoch": 1.6128559009569905, + "grad_norm": 2.027797199843917e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332560 + }, + { + "epoch": 1.6129043991498266, + "grad_norm": 1.254746166523546e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332570 + }, + { + "epoch": 1.6129528973426628, + "grad_norm": 1.273915586352814e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332580 + }, + { + "epoch": 1.6130013955354987, + "grad_norm": 1.723921377561055e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332590 + }, + { + "epoch": 1.613049893728335, + "grad_norm": 1.2718955986201763e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332600 + }, + { + "epoch": 1.613098391921171, + "grad_norm": 1.3248274626675993e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332610 + }, + { + "epoch": 1.6131468901140071, + "grad_norm": 1.1105143130407669e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332620 + }, + { + "epoch": 1.6131953883068433, + "grad_norm": 9.873004273686092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332630 + }, + { + "epoch": 1.6132438864996792, + "grad_norm": 1.532337955723051e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332640 + }, + { + "epoch": 1.6132923846925153, + "grad_norm": 1.1810485375463031e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332650 + }, + { + "epoch": 1.6133408828853515, + "grad_norm": 9.304172635893337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332660 + }, + { + "epoch": 1.6133893810781874, + "grad_norm": 9.349671927338932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332670 + }, + { + "epoch": 1.6134378792710238, + "grad_norm": 8.5712954387418e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332680 + }, + { + "epoch": 1.6134863774638597, + "grad_norm": 1.5083292964845896e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332690 + }, + { + "epoch": 1.6135348756566958, + "grad_norm": 9.046183549799025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332700 + }, + { + "epoch": 1.613583373849532, + "grad_norm": 7.954274224175606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332710 + }, + { + "epoch": 1.613631872042368, + "grad_norm": 9.45971260080114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332720 + }, + { + "epoch": 1.6136803702352043, + "grad_norm": 8.258763045887463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332730 + }, + { + "epoch": 1.6137288684280402, + "grad_norm": 1.2177640201116446e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332740 + }, + { + "epoch": 1.6137773666208763, + "grad_norm": 7.438896318490151e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332750 + }, + { + "epoch": 1.6138258648137125, + "grad_norm": 7.577333690278465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332760 + }, + { + "epoch": 1.6138743630065484, + "grad_norm": 8.988663466880098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332770 + }, + { + "epoch": 1.6139228611993846, + "grad_norm": 8.255095963249914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332780 + }, + { + "epoch": 1.6139713593922207, + "grad_norm": 1.1308156899758615e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332790 + }, + { + "epoch": 1.6140198575850566, + "grad_norm": 6.910971933393739e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332800 + }, + { + "epoch": 1.614068355777893, + "grad_norm": 6.3725074141984805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332810 + }, + { + "epoch": 1.614116853970729, + "grad_norm": 7.012117748672608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332820 + }, + { + "epoch": 1.614165352163565, + "grad_norm": 7.687234756303951e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332830 + }, + { + "epoch": 1.6142138503564012, + "grad_norm": 1.0230152838630602e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332840 + }, + { + "epoch": 1.6142623485492371, + "grad_norm": 7.226021807582583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332850 + }, + { + "epoch": 1.6143108467420733, + "grad_norm": 8.247736332123168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332860 + }, + { + "epoch": 1.6143593449349094, + "grad_norm": 5.695389972970588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332870 + }, + { + "epoch": 1.6144078431277453, + "grad_norm": 5.657537712977501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332880 + }, + { + "epoch": 1.6144563413205817, + "grad_norm": 9.262650564778596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332890 + }, + { + "epoch": 1.6145048395134176, + "grad_norm": 7.5435787039168645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332900 + }, + { + "epoch": 1.6145533377062538, + "grad_norm": 5.438383595901541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332910 + }, + { + "epoch": 1.61460183589909, + "grad_norm": 7.448501492035575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332920 + }, + { + "epoch": 1.6146503340919258, + "grad_norm": 5.682890787284123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332930 + }, + { + "epoch": 1.614698832284762, + "grad_norm": 8.54565132613061e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332940 + }, + { + "epoch": 1.614747330477598, + "grad_norm": 4.631826868717326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332950 + }, + { + "epoch": 1.614795828670434, + "grad_norm": 4.811405688087689e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332960 + }, + { + "epoch": 1.6148443268632704, + "grad_norm": 5.170133135834476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332970 + }, + { + "epoch": 1.6148928250561063, + "grad_norm": 4.858141437580343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332980 + }, + { + "epoch": 1.6149413232489425, + "grad_norm": 8.537584108125884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 332990 + }, + { + "epoch": 1.6149898214417786, + "grad_norm": 4.5297206270333845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333000 + }, + { + "epoch": 1.6150383196346145, + "grad_norm": 4.513546628004406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333010 + }, + { + "epoch": 1.6150868178274507, + "grad_norm": 4.470619842322776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333020 + }, + { + "epoch": 1.6151353160202868, + "grad_norm": 4.4700332182401326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333030 + }, + { + "epoch": 1.6151838142131227, + "grad_norm": 7.693880434089806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333040 + }, + { + "epoch": 1.615232312405959, + "grad_norm": 4.022717348561855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333050 + }, + { + "epoch": 1.615280810598795, + "grad_norm": 4.657050794776296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333060 + }, + { + "epoch": 1.6153293087916312, + "grad_norm": 5.606435479421634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333070 + }, + { + "epoch": 1.6153778069844673, + "grad_norm": 9.50458343140781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333080 + }, + { + "epoch": 1.6154263051773032, + "grad_norm": 0.00022527144756168127, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333090 + }, + { + "epoch": 1.6154748033701394, + "grad_norm": 6.395751370291691e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333100 + }, + { + "epoch": 1.6155233015629755, + "grad_norm": 5.718474767490989e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333110 + }, + { + "epoch": 1.6155717997558114, + "grad_norm": 6.402690814866219e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333120 + }, + { + "epoch": 1.6156202979486478, + "grad_norm": 6.431922884075902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333130 + }, + { + "epoch": 1.6156687961414837, + "grad_norm": 2.0533432689262554e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333140 + }, + { + "epoch": 1.6157172943343199, + "grad_norm": 3.7359006910264725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333150 + }, + { + "epoch": 1.615765792527156, + "grad_norm": 4.999510565539822e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333160 + }, + { + "epoch": 1.615814290719992, + "grad_norm": 8.566337783122435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333170 + }, + { + "epoch": 1.615862788912828, + "grad_norm": 7.0608452915621456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333180 + }, + { + "epoch": 1.6159112871056642, + "grad_norm": 7.758630999887828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333190 + }, + { + "epoch": 1.6159597852985002, + "grad_norm": 4.7438893489015754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333200 + }, + { + "epoch": 1.6160082834913365, + "grad_norm": 5.427868018159643e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333210 + }, + { + "epoch": 1.6160567816841724, + "grad_norm": 4.302265551814344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333220 + }, + { + "epoch": 1.6161052798770086, + "grad_norm": 1.3160219168639742e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333230 + }, + { + "epoch": 1.6161537780698447, + "grad_norm": 7.2386619649478234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333240 + }, + { + "epoch": 1.6162022762626806, + "grad_norm": 5.331668035069015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333250 + }, + { + "epoch": 1.616250774455517, + "grad_norm": 3.95397410102305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333260 + }, + { + "epoch": 1.616299272648353, + "grad_norm": 3.824318355327705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333270 + }, + { + "epoch": 1.616347770841189, + "grad_norm": 4.100408204976702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333280 + }, + { + "epoch": 1.6163962690340252, + "grad_norm": 6.772981123503996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333290 + }, + { + "epoch": 1.6164447672268611, + "grad_norm": 3.535240239216364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333300 + }, + { + "epoch": 1.6164932654196973, + "grad_norm": 3.373522758920444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333310 + }, + { + "epoch": 1.6165417636125334, + "grad_norm": 5.643545591738075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333320 + }, + { + "epoch": 1.6165902618053694, + "grad_norm": 3.7244312807160895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333330 + }, + { + "epoch": 1.6166387599982057, + "grad_norm": 5.373391559260199e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333340 + }, + { + "epoch": 1.6166872581910416, + "grad_norm": 3.1808704079594463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333350 + }, + { + "epoch": 1.6167357563838778, + "grad_norm": 3.14469616569113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333360 + }, + { + "epoch": 1.616784254576714, + "grad_norm": 3.610195790315629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333370 + }, + { + "epoch": 1.6168327527695499, + "grad_norm": 3.725362603290705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333380 + }, + { + "epoch": 1.616881250962386, + "grad_norm": 5.103755484014982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333390 + }, + { + "epoch": 1.6169297491552221, + "grad_norm": 3.2804603051772574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333400 + }, + { + "epoch": 1.616978247348058, + "grad_norm": 2.987113930430496e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333410 + }, + { + "epoch": 1.6170267455408944, + "grad_norm": 2.896237219829345e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333420 + }, + { + "epoch": 1.6170752437337303, + "grad_norm": 2.8028705401084153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333430 + }, + { + "epoch": 1.6171237419265665, + "grad_norm": 4.5119932110537775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333440 + }, + { + "epoch": 1.6171722401194026, + "grad_norm": 2.9085497317282716e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333450 + }, + { + "epoch": 1.6172207383122386, + "grad_norm": 2.933959422080079e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333460 + }, + { + "epoch": 1.6172692365050747, + "grad_norm": 2.7052712994191097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333470 + }, + { + "epoch": 1.6173177346979108, + "grad_norm": 3.302857749076793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333480 + }, + { + "epoch": 1.6173662328907468, + "grad_norm": 4.3069048842880875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333490 + }, + { + "epoch": 1.6174147310835831, + "grad_norm": 2.904239863710245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333500 + }, + { + "epoch": 1.617463229276419, + "grad_norm": 2.4533981104468694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333510 + }, + { + "epoch": 1.6175117274692552, + "grad_norm": 2.5364934117533267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333520 + }, + { + "epoch": 1.6175602256620913, + "grad_norm": 2.5888512027449906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333530 + }, + { + "epoch": 1.6176087238549273, + "grad_norm": 4.461498974706046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333540 + }, + { + "epoch": 1.6176572220477634, + "grad_norm": 2.835957729985239e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333550 + }, + { + "epoch": 1.6177057202405996, + "grad_norm": 2.381962531217141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333560 + }, + { + "epoch": 1.6177542184334355, + "grad_norm": 2.2238068595470395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333570 + }, + { + "epoch": 1.6178027166262718, + "grad_norm": 3.1539348128717393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333580 + }, + { + "epoch": 1.6178512148191078, + "grad_norm": 3.2492864647792885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333590 + }, + { + "epoch": 1.617899713011944, + "grad_norm": 2.2276137769949855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333600 + }, + { + "epoch": 1.61794821120478, + "grad_norm": 2.20607557821495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333610 + }, + { + "epoch": 1.617996709397616, + "grad_norm": 3.874889443977736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333620 + }, + { + "epoch": 1.6180452075904521, + "grad_norm": 2.407588908681646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333630 + }, + { + "epoch": 1.6180937057832883, + "grad_norm": 3.225749196644756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333640 + }, + { + "epoch": 1.6181422039761242, + "grad_norm": 1.9506724129314534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333650 + }, + { + "epoch": 1.6181907021689605, + "grad_norm": 1.919087935675634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333660 + }, + { + "epoch": 1.6182392003617965, + "grad_norm": 1.9857404822687386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333670 + }, + { + "epoch": 1.6182876985546326, + "grad_norm": 2.663422264959081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333680 + }, + { + "epoch": 1.6183361967474688, + "grad_norm": 3.0826481633994263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333690 + }, + { + "epoch": 1.6183846949403047, + "grad_norm": 1.9398212316446006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333700 + }, + { + "epoch": 1.6184331931331408, + "grad_norm": 2.0531781501631485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333710 + }, + { + "epoch": 1.618481691325977, + "grad_norm": 1.9354213236510986e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333720 + }, + { + "epoch": 1.618530189518813, + "grad_norm": 1.8293349057785235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333730 + }, + { + "epoch": 1.6185786877116493, + "grad_norm": 4.029473075206624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333740 + }, + { + "epoch": 1.6186271859044852, + "grad_norm": 1.866358616098296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333750 + }, + { + "epoch": 1.6186756840973213, + "grad_norm": 1.915734401336522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333760 + }, + { + "epoch": 1.6187241822901575, + "grad_norm": 1.942183644132456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333770 + }, + { + "epoch": 1.6187726804829934, + "grad_norm": 2.0814761683141114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333780 + }, + { + "epoch": 1.6188211786758298, + "grad_norm": 5.6046437748591416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333790 + }, + { + "epoch": 1.6188696768686657, + "grad_norm": 1.8132017203242867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333800 + }, + { + "epoch": 1.6189181750615018, + "grad_norm": 2.47991238211398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333810 + }, + { + "epoch": 1.618966673254338, + "grad_norm": 1.6487711036461405e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333820 + }, + { + "epoch": 1.6190151714471739, + "grad_norm": 1.6561044731133734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333830 + }, + { + "epoch": 1.61906366964001, + "grad_norm": 2.708194415390608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333840 + }, + { + "epoch": 1.6191121678328462, + "grad_norm": 1.7894014945341041e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333850 + }, + { + "epoch": 1.619160666025682, + "grad_norm": 1.6238061562035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333860 + }, + { + "epoch": 1.6192091642185185, + "grad_norm": 2.1974708488414763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333870 + }, + { + "epoch": 1.6192576624113544, + "grad_norm": 1.8710526319409837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333880 + }, + { + "epoch": 1.6193061606041905, + "grad_norm": 2.8620061129913665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333890 + }, + { + "epoch": 1.6193546587970267, + "grad_norm": 1.9687020085257245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333900 + }, + { + "epoch": 1.6194031569898626, + "grad_norm": 1.4852499816697673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333910 + }, + { + "epoch": 1.6194516551826987, + "grad_norm": 1.5203728480628342e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333920 + }, + { + "epoch": 1.6195001533755349, + "grad_norm": 1.4659003682027105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333930 + }, + { + "epoch": 1.6195486515683708, + "grad_norm": 2.229497113148682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333940 + }, + { + "epoch": 1.6195971497612072, + "grad_norm": 1.3069063697912497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333950 + }, + { + "epoch": 1.619645647954043, + "grad_norm": 1.5162894442255492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333960 + }, + { + "epoch": 1.6196941461468792, + "grad_norm": 1.4307187257145415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333970 + }, + { + "epoch": 1.6197426443397154, + "grad_norm": 1.6806876601549448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333980 + }, + { + "epoch": 1.6197911425325513, + "grad_norm": 2.117744770657737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 333990 + }, + { + "epoch": 1.6198396407253874, + "grad_norm": 1.5024581898614997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334000 + }, + { + "epoch": 1.6198881389182236, + "grad_norm": 1.334862417934346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334010 + }, + { + "epoch": 1.6199366371110595, + "grad_norm": 1.46772765674541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334020 + }, + { + "epoch": 1.6199851353038959, + "grad_norm": 1.3662311175721698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334030 + }, + { + "epoch": 1.6200336334967318, + "grad_norm": 1.8649544699655962e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334040 + }, + { + "epoch": 1.620082131689568, + "grad_norm": 1.4015623719387804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334050 + }, + { + "epoch": 1.620130629882404, + "grad_norm": 1.272376835004252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334060 + }, + { + "epoch": 1.62017912807524, + "grad_norm": 1.450378817935416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334070 + }, + { + "epoch": 1.6202276262680761, + "grad_norm": 1.3105591278872453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334080 + }, + { + "epoch": 1.6202761244609123, + "grad_norm": 1.9220751710236073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334090 + }, + { + "epoch": 1.6203246226537482, + "grad_norm": 1.6179574231500737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334100 + }, + { + "epoch": 1.6203731208465846, + "grad_norm": 1.2527796116046375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334110 + }, + { + "epoch": 1.6204216190394205, + "grad_norm": 1.7733532331476454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334120 + }, + { + "epoch": 1.6204701172322566, + "grad_norm": 1.961806219696882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334130 + }, + { + "epoch": 1.6205186154250928, + "grad_norm": 2.148953171854373e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334140 + }, + { + "epoch": 1.6205671136179287, + "grad_norm": 1.1565159638848854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334150 + }, + { + "epoch": 1.6206156118107649, + "grad_norm": 1.1682694776027347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334160 + }, + { + "epoch": 1.620664110003601, + "grad_norm": 1.0515636859054212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334170 + }, + { + "epoch": 1.620712608196437, + "grad_norm": 1.2557402442325838e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334180 + }, + { + "epoch": 1.6207611063892733, + "grad_norm": 1.7298233387919026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334190 + }, + { + "epoch": 1.6208096045821092, + "grad_norm": 1.4926906715118093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334200 + }, + { + "epoch": 1.6208581027749454, + "grad_norm": 1.1014212759619113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334210 + }, + { + "epoch": 1.6209066009677815, + "grad_norm": 1.7713447277856176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334220 + }, + { + "epoch": 1.6209550991606174, + "grad_norm": 1.2204593531350838e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334230 + }, + { + "epoch": 1.6210035973534536, + "grad_norm": 1.774562178979977e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334240 + }, + { + "epoch": 1.6210520955462897, + "grad_norm": 1.0311339337931713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334250 + }, + { + "epoch": 1.6211005937391259, + "grad_norm": 1.0983796983055072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334260 + }, + { + "epoch": 1.621149091931962, + "grad_norm": 1.117992724175565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334270 + }, + { + "epoch": 1.621197590124798, + "grad_norm": 1.014100917018368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334280 + }, + { + "epoch": 1.621246088317634, + "grad_norm": 2.2851543235447025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334290 + }, + { + "epoch": 1.6212945865104702, + "grad_norm": 1.0056052133222693e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334300 + }, + { + "epoch": 1.6213430847033061, + "grad_norm": 1.8788392708302126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334310 + }, + { + "epoch": 1.6213915828961425, + "grad_norm": 1.0613814538373845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334320 + }, + { + "epoch": 1.6214400810889784, + "grad_norm": 1.0425980008221813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334330 + }, + { + "epoch": 1.6214885792818146, + "grad_norm": 1.6290712210320635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334340 + }, + { + "epoch": 1.6215370774746507, + "grad_norm": 1.0429624808239168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334350 + }, + { + "epoch": 1.6215855756674866, + "grad_norm": 9.70985638559796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334360 + }, + { + "epoch": 1.6216340738603228, + "grad_norm": 9.729950534165255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334370 + }, + { + "epoch": 1.621682572053159, + "grad_norm": 1.4721248362548067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334380 + }, + { + "epoch": 1.6217310702459948, + "grad_norm": 1.6206997770495946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334390 + }, + { + "epoch": 1.6217795684388312, + "grad_norm": 1.0302851478627417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334400 + }, + { + "epoch": 1.6218280666316671, + "grad_norm": 9.399447549185425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334410 + }, + { + "epoch": 1.6218765648245033, + "grad_norm": 9.04838032056432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334420 + }, + { + "epoch": 1.6219250630173394, + "grad_norm": 1.058306565937528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334430 + }, + { + "epoch": 1.6219735612101753, + "grad_norm": 1.4776745729250251e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334440 + }, + { + "epoch": 1.6220220594030115, + "grad_norm": 9.361289130538353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334450 + }, + { + "epoch": 1.6220705575958476, + "grad_norm": 1.0365672551415628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334460 + }, + { + "epoch": 1.6221190557886835, + "grad_norm": 8.437559131380112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334470 + }, + { + "epoch": 1.62216755398152, + "grad_norm": 9.756591907716938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334480 + }, + { + "epoch": 1.6222160521743558, + "grad_norm": 1.3614289855468087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334490 + }, + { + "epoch": 1.622264550367192, + "grad_norm": 9.379822358823731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334500 + }, + { + "epoch": 1.6223130485600281, + "grad_norm": 6.42180020804517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334510 + }, + { + "epoch": 1.622361546752864, + "grad_norm": 9.169591521640541e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334520 + }, + { + "epoch": 1.6224100449457002, + "grad_norm": 1.0279184152750531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334530 + }, + { + "epoch": 1.6224585431385363, + "grad_norm": 1.336152536168811e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334540 + }, + { + "epoch": 1.6225070413313722, + "grad_norm": 8.346586355401087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334550 + }, + { + "epoch": 1.6225555395242086, + "grad_norm": 7.944856861286098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334560 + }, + { + "epoch": 1.6226040377170445, + "grad_norm": 8.918595995055512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334570 + }, + { + "epoch": 1.6226525359098807, + "grad_norm": 9.640580174163915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334580 + }, + { + "epoch": 1.6227010341027168, + "grad_norm": 1.2528980732895434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334590 + }, + { + "epoch": 1.6227495322955527, + "grad_norm": 8.39694678234082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334600 + }, + { + "epoch": 1.6227980304883889, + "grad_norm": 7.850404699638602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334610 + }, + { + "epoch": 1.622846528681225, + "grad_norm": 8.674849141243612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334620 + }, + { + "epoch": 1.622895026874061, + "grad_norm": 8.381711040783557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334630 + }, + { + "epoch": 1.6229435250668973, + "grad_norm": 1.3905138303016429e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334640 + }, + { + "epoch": 1.6229920232597332, + "grad_norm": 8.096066608231922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334650 + }, + { + "epoch": 1.6230405214525694, + "grad_norm": 1.0257223266307847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334660 + }, + { + "epoch": 1.6230890196454055, + "grad_norm": 8.13070641925151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334670 + }, + { + "epoch": 1.6231375178382415, + "grad_norm": 1.0196539506068802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334680 + }, + { + "epoch": 1.6231860160310776, + "grad_norm": 1.1633711665126611e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334690 + }, + { + "epoch": 1.6232345142239137, + "grad_norm": 8.218980838137213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334700 + }, + { + "epoch": 1.6232830124167497, + "grad_norm": 9.951825177267892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334710 + }, + { + "epoch": 1.623331510609586, + "grad_norm": 7.288718393283489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334720 + }, + { + "epoch": 1.623380008802422, + "grad_norm": 7.967279316289932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334730 + }, + { + "epoch": 1.623428506995258, + "grad_norm": 1.2204608310639742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334740 + }, + { + "epoch": 1.6234770051880942, + "grad_norm": 7.166515842982335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334750 + }, + { + "epoch": 1.6235255033809302, + "grad_norm": 7.979868996699224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334760 + }, + { + "epoch": 1.6235740015737665, + "grad_norm": 7.983481395967829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334770 + }, + { + "epoch": 1.6236224997666024, + "grad_norm": 8.637728683424939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334780 + }, + { + "epoch": 1.6236709979594386, + "grad_norm": 1.3787712305202149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334790 + }, + { + "epoch": 1.6237194961522747, + "grad_norm": 7.962442509779066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334800 + }, + { + "epoch": 1.6237679943451107, + "grad_norm": 7.647536222066265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334810 + }, + { + "epoch": 1.6238164925379468, + "grad_norm": 7.812456033207127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334820 + }, + { + "epoch": 1.623864990730783, + "grad_norm": 7.596846671731328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334830 + }, + { + "epoch": 1.6239134889236189, + "grad_norm": 2.7155176212545484e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334840 + }, + { + "epoch": 1.6239619871164552, + "grad_norm": 6.820534963480895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334850 + }, + { + "epoch": 1.6240104853092912, + "grad_norm": 7.611876071678125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334860 + }, + { + "epoch": 1.6240589835021273, + "grad_norm": 8.061751941568218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334870 + }, + { + "epoch": 1.6241074816949634, + "grad_norm": 3.549811708580819e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334880 + }, + { + "epoch": 1.6241559798877994, + "grad_norm": 1.0642489769452368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334890 + }, + { + "epoch": 1.6242044780806355, + "grad_norm": 2.0392294572957326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334900 + }, + { + "epoch": 1.6242529762734716, + "grad_norm": 1.4480170875685872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334910 + }, + { + "epoch": 1.6243014744663076, + "grad_norm": 6.827132210673881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334920 + }, + { + "epoch": 1.624349972659144, + "grad_norm": 7.837654720788123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334930 + }, + { + "epoch": 1.6243984708519799, + "grad_norm": 9.62018816608179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334940 + }, + { + "epoch": 1.624446969044816, + "grad_norm": 6.524683158204425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334950 + }, + { + "epoch": 1.6244954672376521, + "grad_norm": 6.553029834321933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334960 + }, + { + "epoch": 1.624543965430488, + "grad_norm": 6.489498218797962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334970 + }, + { + "epoch": 1.6245924636233242, + "grad_norm": 6.177563705023204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334980 + }, + { + "epoch": 1.6246409618161604, + "grad_norm": 1.0503371186132426e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 334990 + }, + { + "epoch": 1.6246894600089963, + "grad_norm": 6.947015549485513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335000 + }, + { + "epoch": 1.6247379582018326, + "grad_norm": 6.119215072430961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335010 + }, + { + "epoch": 1.6247864563946686, + "grad_norm": 6.59206023101433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335020 + }, + { + "epoch": 1.6248349545875047, + "grad_norm": 7.389148777292576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335030 + }, + { + "epoch": 1.6248834527803409, + "grad_norm": 9.434282333131705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335040 + }, + { + "epoch": 1.6249319509731768, + "grad_norm": 6.087599899728957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335050 + }, + { + "epoch": 1.624980449166013, + "grad_norm": 8.59553836107807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335060 + }, + { + "epoch": 1.625028947358849, + "grad_norm": 5.792501269752393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335070 + }, + { + "epoch": 1.625077445551685, + "grad_norm": 6.684950903945719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335080 + }, + { + "epoch": 1.6251259437445214, + "grad_norm": 9.962744798031054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335090 + }, + { + "epoch": 1.6251744419373573, + "grad_norm": 6.948595228095655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335100 + }, + { + "epoch": 1.6252229401301934, + "grad_norm": 6.646694714618206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335110 + }, + { + "epoch": 1.6252714383230296, + "grad_norm": 5.744544182562095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335120 + }, + { + "epoch": 1.6253199365158655, + "grad_norm": 6.101749363551789e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335130 + }, + { + "epoch": 1.6253684347087016, + "grad_norm": 8.576537311455468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335140 + }, + { + "epoch": 1.6254169329015378, + "grad_norm": 5.559760438700323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335150 + }, + { + "epoch": 1.6254654310943737, + "grad_norm": 5.496677886185353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335160 + }, + { + "epoch": 1.62551392928721, + "grad_norm": 6.02999648435798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335170 + }, + { + "epoch": 1.625562427480046, + "grad_norm": 6.278415298766049e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335180 + }, + { + "epoch": 1.6256109256728821, + "grad_norm": 8.502461810167006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335190 + }, + { + "epoch": 1.6256594238657183, + "grad_norm": 5.715474458156677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335200 + }, + { + "epoch": 1.6257079220585542, + "grad_norm": 5.295694904816628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335210 + }, + { + "epoch": 1.6257564202513903, + "grad_norm": 5.212044698055252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335220 + }, + { + "epoch": 1.6258049184442265, + "grad_norm": 5.430551368590386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335230 + }, + { + "epoch": 1.6258534166370624, + "grad_norm": 9.172441082228033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335240 + }, + { + "epoch": 1.6259019148298988, + "grad_norm": 5.263455022941343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335250 + }, + { + "epoch": 1.6259504130227347, + "grad_norm": 5.239472784523969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335260 + }, + { + "epoch": 1.6259989112155708, + "grad_norm": 5.55325186724076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335270 + }, + { + "epoch": 1.626047409408407, + "grad_norm": 5.606246418210503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335280 + }, + { + "epoch": 1.626095907601243, + "grad_norm": 8.112609748422983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335290 + }, + { + "epoch": 1.6261444057940793, + "grad_norm": 5.228670261203661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335300 + }, + { + "epoch": 1.6261929039869152, + "grad_norm": 4.750810376208392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335310 + }, + { + "epoch": 1.6262414021797513, + "grad_norm": 4.851287371820945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335320 + }, + { + "epoch": 1.6262899003725875, + "grad_norm": 5.469270831781614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335330 + }, + { + "epoch": 1.6263383985654234, + "grad_norm": 8.109072382467275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335340 + }, + { + "epoch": 1.6263868967582595, + "grad_norm": 4.852329880122852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335350 + }, + { + "epoch": 1.6264353949510957, + "grad_norm": 5.995392484692275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335360 + }, + { + "epoch": 1.6264838931439316, + "grad_norm": 5.002985972168972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335370 + }, + { + "epoch": 1.626532391336768, + "grad_norm": 5.446165687317261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335380 + }, + { + "epoch": 1.626580889529604, + "grad_norm": 8.374401545552246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335390 + }, + { + "epoch": 1.62662938772244, + "grad_norm": 5.171082193555776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335400 + }, + { + "epoch": 1.6266778859152762, + "grad_norm": 4.915050340059679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335410 + }, + { + "epoch": 1.626726384108112, + "grad_norm": 4.707691516614432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335420 + }, + { + "epoch": 1.6267748823009482, + "grad_norm": 4.576261289912509e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335430 + }, + { + "epoch": 1.6268233804937844, + "grad_norm": 8.38465837205149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335440 + }, + { + "epoch": 1.6268718786866203, + "grad_norm": 7.508127737310133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335450 + }, + { + "epoch": 1.6269203768794567, + "grad_norm": 4.6722539082111325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335460 + }, + { + "epoch": 1.6269688750722926, + "grad_norm": 4.6746907855776954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335470 + }, + { + "epoch": 1.6270173732651287, + "grad_norm": 4.6293808964037453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335480 + }, + { + "epoch": 1.6270658714579649, + "grad_norm": 7.537728947681899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335490 + }, + { + "epoch": 1.6271143696508008, + "grad_norm": 4.204686092634802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335500 + }, + { + "epoch": 1.627162867843637, + "grad_norm": 4.5803599846294674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335510 + }, + { + "epoch": 1.627211366036473, + "grad_norm": 4.316242723234609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335520 + }, + { + "epoch": 1.627259864229309, + "grad_norm": 4.5828511474610423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335530 + }, + { + "epoch": 1.6273083624221454, + "grad_norm": 7.072102903293853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335540 + }, + { + "epoch": 1.6273568606149813, + "grad_norm": 4.3112288494739914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335550 + }, + { + "epoch": 1.6274053588078174, + "grad_norm": 4.882913344772533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335560 + }, + { + "epoch": 1.6274538570006536, + "grad_norm": 4.441135104116256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335570 + }, + { + "epoch": 1.6275023551934895, + "grad_norm": 4.659815715513105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335580 + }, + { + "epoch": 1.6275508533863257, + "grad_norm": 6.564956720467308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335590 + }, + { + "epoch": 1.6275993515791618, + "grad_norm": 4.0146156266018806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335600 + }, + { + "epoch": 1.6276478497719977, + "grad_norm": 4.354202189915668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335610 + }, + { + "epoch": 1.627696347964834, + "grad_norm": 4.59109941175484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335620 + }, + { + "epoch": 1.62774484615767, + "grad_norm": 4.751212543396832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335630 + }, + { + "epoch": 1.6277933443505062, + "grad_norm": 1.019212618302845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335640 + }, + { + "epoch": 1.6278418425433423, + "grad_norm": 3.919921311990038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335650 + }, + { + "epoch": 1.6278903407361782, + "grad_norm": 3.9684718444732425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335660 + }, + { + "epoch": 1.6279388389290144, + "grad_norm": 4.6739859271838213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335670 + }, + { + "epoch": 1.6279873371218505, + "grad_norm": 4.459663784928125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335680 + }, + { + "epoch": 1.6280358353146864, + "grad_norm": 6.550820899065002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335690 + }, + { + "epoch": 1.6280843335075228, + "grad_norm": 4.2900913399535057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335700 + }, + { + "epoch": 1.6281328317003587, + "grad_norm": 5.390639898905647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335710 + }, + { + "epoch": 1.6281813298931949, + "grad_norm": 3.98194544004582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335720 + }, + { + "epoch": 1.628229828086031, + "grad_norm": 5.984639415146376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335730 + }, + { + "epoch": 1.628278326278867, + "grad_norm": 7.234979761960858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335740 + }, + { + "epoch": 1.628326824471703, + "grad_norm": 3.966886765738309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335750 + }, + { + "epoch": 1.6283753226645392, + "grad_norm": 4.110311522254051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335760 + }, + { + "epoch": 1.6284238208573751, + "grad_norm": 5.161153922017547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335770 + }, + { + "epoch": 1.6284723190502115, + "grad_norm": 4.748498838580417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335780 + }, + { + "epoch": 1.6285208172430474, + "grad_norm": 6.115524229244329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335790 + }, + { + "epoch": 1.6285693154358836, + "grad_norm": 4.613719113422121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335800 + }, + { + "epoch": 1.6286178136287197, + "grad_norm": 5.022380378250091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335810 + }, + { + "epoch": 1.6286663118215556, + "grad_norm": 3.7804980479450023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335820 + }, + { + "epoch": 1.628714810014392, + "grad_norm": 4.179411234872532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335830 + }, + { + "epoch": 1.628763308207228, + "grad_norm": 5.88813293234125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335840 + }, + { + "epoch": 1.628811806400064, + "grad_norm": 3.6272601278142247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335850 + }, + { + "epoch": 1.6288603045929002, + "grad_norm": 3.6041217299498385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335860 + }, + { + "epoch": 1.6289088027857361, + "grad_norm": 4.390028607303975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335870 + }, + { + "epoch": 1.6289573009785723, + "grad_norm": 3.4914884849968075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335880 + }, + { + "epoch": 1.6290057991714084, + "grad_norm": 6.008735340401472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335890 + }, + { + "epoch": 1.6290542973642443, + "grad_norm": 3.757882041099947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335900 + }, + { + "epoch": 1.6291027955570807, + "grad_norm": 4.809660936189175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335910 + }, + { + "epoch": 1.6291512937499166, + "grad_norm": 3.7158343957344186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335920 + }, + { + "epoch": 1.6291997919427528, + "grad_norm": 3.8183986816875404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335930 + }, + { + "epoch": 1.629248290135589, + "grad_norm": 6.15160843153717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335940 + }, + { + "epoch": 1.6292967883284248, + "grad_norm": 3.7931332030893827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335950 + }, + { + "epoch": 1.629345286521261, + "grad_norm": 3.557628076578112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335960 + }, + { + "epoch": 1.6293937847140971, + "grad_norm": 3.4401512039039517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335970 + }, + { + "epoch": 1.629442282906933, + "grad_norm": 4.207486199447885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335980 + }, + { + "epoch": 1.6294907810997694, + "grad_norm": 5.653018320117553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 335990 + }, + { + "epoch": 1.6295392792926053, + "grad_norm": 3.434858513173822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336000 + }, + { + "epoch": 1.6295877774854415, + "grad_norm": 3.6727445262840774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336010 + }, + { + "epoch": 1.6296362756782776, + "grad_norm": 5.667499181072344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336020 + }, + { + "epoch": 1.6296847738711135, + "grad_norm": 3.5814068155559653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336030 + }, + { + "epoch": 1.6297332720639497, + "grad_norm": 5.683279482582293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336040 + }, + { + "epoch": 1.6297817702567858, + "grad_norm": 3.465609665909142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336050 + }, + { + "epoch": 1.6298302684496218, + "grad_norm": 3.3121102660516044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336060 + }, + { + "epoch": 1.6298787666424581, + "grad_norm": 3.3447591363255924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336070 + }, + { + "epoch": 1.629927264835294, + "grad_norm": 3.674861375202454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336080 + }, + { + "epoch": 1.6299757630281302, + "grad_norm": 5.956840141152497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336090 + }, + { + "epoch": 1.6300242612209663, + "grad_norm": 3.340227578974009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336100 + }, + { + "epoch": 1.6300727594138023, + "grad_norm": 4.048235098252917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336110 + }, + { + "epoch": 1.6301212576066384, + "grad_norm": 4.445811896403029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336120 + }, + { + "epoch": 1.6301697557994745, + "grad_norm": 4.054854514379258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336130 + }, + { + "epoch": 1.6302182539923105, + "grad_norm": 5.608803803625051e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336140 + }, + { + "epoch": 1.6302667521851468, + "grad_norm": 3.134538815174892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336150 + }, + { + "epoch": 1.6303152503779827, + "grad_norm": 3.0298573960862996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336160 + }, + { + "epoch": 1.630363748570819, + "grad_norm": 3.1466055361306644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336170 + }, + { + "epoch": 1.630412246763655, + "grad_norm": 3.348910126987903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336180 + }, + { + "epoch": 1.630460744956491, + "grad_norm": 5.104572551317688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336190 + }, + { + "epoch": 1.630509243149327, + "grad_norm": 3.1101791364562814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336200 + }, + { + "epoch": 1.6305577413421632, + "grad_norm": 3.025515411536617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336210 + }, + { + "epoch": 1.6306062395349992, + "grad_norm": 3.7027902521913347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336220 + }, + { + "epoch": 1.6306547377278355, + "grad_norm": 3.3773221730371006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336230 + }, + { + "epoch": 1.6307032359206715, + "grad_norm": 5.00094074595836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336240 + }, + { + "epoch": 1.6307517341135076, + "grad_norm": 3.476276049241278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336250 + }, + { + "epoch": 1.6308002323063437, + "grad_norm": 3.4871771958933095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336260 + }, + { + "epoch": 1.6308487304991797, + "grad_norm": 3.152249519189354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336270 + }, + { + "epoch": 1.6308972286920158, + "grad_norm": 3.5799348552245647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336280 + }, + { + "epoch": 1.630945726884852, + "grad_norm": 5.255263317849312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336290 + }, + { + "epoch": 1.630994225077688, + "grad_norm": 2.837867327798449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336300 + }, + { + "epoch": 1.6310427232705242, + "grad_norm": 3.275148969805741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336310 + }, + { + "epoch": 1.6310912214633602, + "grad_norm": 3.254829152865568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336320 + }, + { + "epoch": 1.6311397196561963, + "grad_norm": 3.0150923180372047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336330 + }, + { + "epoch": 1.6311882178490325, + "grad_norm": 4.907430479761388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336340 + }, + { + "epoch": 1.6312367160418684, + "grad_norm": 2.9981393367961573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336350 + }, + { + "epoch": 1.6312852142347047, + "grad_norm": 2.702144286104158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336360 + }, + { + "epoch": 1.6313337124275407, + "grad_norm": 2.876521421057987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336370 + }, + { + "epoch": 1.6313822106203768, + "grad_norm": 3.0556438446183165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336380 + }, + { + "epoch": 1.631430708813213, + "grad_norm": 5.914729968026222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336390 + }, + { + "epoch": 1.6314792070060489, + "grad_norm": 2.9331286555134284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336400 + }, + { + "epoch": 1.631527705198885, + "grad_norm": 2.730856181187846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336410 + }, + { + "epoch": 1.6315762033917212, + "grad_norm": 4.0886450847210654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336420 + }, + { + "epoch": 1.631624701584557, + "grad_norm": 2.879879446027189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336430 + }, + { + "epoch": 1.6316731997773934, + "grad_norm": 4.801006525667617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336440 + }, + { + "epoch": 1.6317216979702294, + "grad_norm": 2.702955725908396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336450 + }, + { + "epoch": 1.6317701961630655, + "grad_norm": 2.927758657733648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336460 + }, + { + "epoch": 1.6318186943559017, + "grad_norm": 4.5632756950908515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336470 + }, + { + "epoch": 1.6318671925487376, + "grad_norm": 2.970367916077521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336480 + }, + { + "epoch": 1.6319156907415737, + "grad_norm": 4.3520094550331123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336490 + }, + { + "epoch": 1.6319641889344099, + "grad_norm": 2.9130251277820207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336500 + }, + { + "epoch": 1.6320126871272458, + "grad_norm": 2.694696945582109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336510 + }, + { + "epoch": 1.6320611853200822, + "grad_norm": 2.469526805271016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336520 + }, + { + "epoch": 1.632109683512918, + "grad_norm": 2.8143520580670156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336530 + }, + { + "epoch": 1.6321581817057542, + "grad_norm": 4.2983137404917215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336540 + }, + { + "epoch": 1.6322066798985904, + "grad_norm": 3.021310703843483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336550 + }, + { + "epoch": 1.6322551780914263, + "grad_norm": 2.499816105228092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336560 + }, + { + "epoch": 1.6323036762842624, + "grad_norm": 2.7937039703829214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336570 + }, + { + "epoch": 1.6323521744770986, + "grad_norm": 3.601081459692068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336580 + }, + { + "epoch": 1.6324006726699345, + "grad_norm": 4.2350399098722846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336590 + }, + { + "epoch": 1.6324491708627709, + "grad_norm": 2.5547493009980826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336600 + }, + { + "epoch": 1.6324976690556068, + "grad_norm": 2.1956078910534416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336610 + }, + { + "epoch": 1.632546167248443, + "grad_norm": 2.811005117564491e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336620 + }, + { + "epoch": 1.632594665441279, + "grad_norm": 2.772897005343111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336630 + }, + { + "epoch": 1.632643163634115, + "grad_norm": 4.008768712537858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336640 + }, + { + "epoch": 1.6326916618269511, + "grad_norm": 9.313122427556664e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 336650 + }, + { + "epoch": 1.6327401600197873, + "grad_norm": 6.642671360168606e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336660 + }, + { + "epoch": 1.6327886582126232, + "grad_norm": 1.9932484065066092e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336670 + }, + { + "epoch": 1.6328371564054596, + "grad_norm": 0.022620540112257004, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 336680 + }, + { + "epoch": 1.6328856545982955, + "grad_norm": 0.0006798780523240566, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336690 + }, + { + "epoch": 1.6329341527911316, + "grad_norm": 0.0015767388977110386, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 336700 + }, + { + "epoch": 1.6329826509839678, + "grad_norm": 0.1148664727807045, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 336710 + }, + { + "epoch": 1.6330311491768037, + "grad_norm": 2.4751554519752972e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 336720 + }, + { + "epoch": 1.6330796473696398, + "grad_norm": 1.4231853128876537e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336730 + }, + { + "epoch": 1.633128145562476, + "grad_norm": 1.5156189874687698e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336740 + }, + { + "epoch": 1.633176643755312, + "grad_norm": 2.7149579182150774e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336750 + }, + { + "epoch": 1.6332251419481483, + "grad_norm": 2.21882601181278e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 336760 + }, + { + "epoch": 1.6332736401409842, + "grad_norm": 0.002857113489881158, + "learning_rate": 0.0002, + "loss": 0.0068, + "step": 336770 + }, + { + "epoch": 1.6333221383338203, + "grad_norm": 0.06705185770988464, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 336780 + }, + { + "epoch": 1.6333706365266565, + "grad_norm": 0.0003524797211866826, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 336790 + }, + { + "epoch": 1.6334191347194924, + "grad_norm": 0.00018006443860940635, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 336800 + }, + { + "epoch": 1.6334676329123288, + "grad_norm": 0.00013099981879349798, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336810 + }, + { + "epoch": 1.6335161311051647, + "grad_norm": 0.0001673456426942721, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336820 + }, + { + "epoch": 1.6335646292980008, + "grad_norm": 7.607306179124862e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336830 + }, + { + "epoch": 1.633613127490837, + "grad_norm": 6.424776074709371e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336840 + }, + { + "epoch": 1.633661625683673, + "grad_norm": 5.5468502978328615e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336850 + }, + { + "epoch": 1.633710123876509, + "grad_norm": 5.158226122148335e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336860 + }, + { + "epoch": 1.6337586220693452, + "grad_norm": 4.570641976897605e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336870 + }, + { + "epoch": 1.6338071202621811, + "grad_norm": 4.033564982819371e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336880 + }, + { + "epoch": 1.6338556184550175, + "grad_norm": 4.0467362850904465e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336890 + }, + { + "epoch": 1.6339041166478534, + "grad_norm": 3.3664124202914536e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336900 + }, + { + "epoch": 1.6339526148406895, + "grad_norm": 3.239663055865094e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336910 + }, + { + "epoch": 1.6340011130335257, + "grad_norm": 2.9426686523947865e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336920 + }, + { + "epoch": 1.6340496112263616, + "grad_norm": 2.792305349430535e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336930 + }, + { + "epoch": 1.6340981094191978, + "grad_norm": 2.782835508696735e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336940 + }, + { + "epoch": 1.634146607612034, + "grad_norm": 2.5389957954757847e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336950 + }, + { + "epoch": 1.6341951058048698, + "grad_norm": 3.919958908227272e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336960 + }, + { + "epoch": 1.6342436039977062, + "grad_norm": 2.2510077542392537e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336970 + }, + { + "epoch": 1.634292102190542, + "grad_norm": 2.1220112103037536e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336980 + }, + { + "epoch": 1.6343406003833783, + "grad_norm": 2.1239085981505923e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 336990 + }, + { + "epoch": 1.6343890985762144, + "grad_norm": 2.0313160348450765e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337000 + }, + { + "epoch": 1.6344375967690503, + "grad_norm": 1.9612758478615433e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337010 + }, + { + "epoch": 1.6344860949618865, + "grad_norm": 1.8597413145471364e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337020 + }, + { + "epoch": 1.6345345931547226, + "grad_norm": 1.7133523215306923e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337030 + }, + { + "epoch": 1.6345830913475585, + "grad_norm": 1.7809697965276428e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337040 + }, + { + "epoch": 1.634631589540395, + "grad_norm": 1.614276879990939e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337050 + }, + { + "epoch": 1.6346800877332308, + "grad_norm": 1.540009725431446e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337060 + }, + { + "epoch": 1.634728585926067, + "grad_norm": 1.5370491382782348e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337070 + }, + { + "epoch": 1.634777084118903, + "grad_norm": 1.4246098544390406e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337080 + }, + { + "epoch": 1.634825582311739, + "grad_norm": 1.4571181054634508e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337090 + }, + { + "epoch": 1.6348740805045752, + "grad_norm": 1.3222401321399957e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337100 + }, + { + "epoch": 1.6349225786974113, + "grad_norm": 1.3414937711786479e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337110 + }, + { + "epoch": 1.6349710768902472, + "grad_norm": 1.2819443327316549e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337120 + }, + { + "epoch": 1.6350195750830836, + "grad_norm": 1.4469937013927847e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337130 + }, + { + "epoch": 1.6350680732759195, + "grad_norm": 1.2737077668134589e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337140 + }, + { + "epoch": 1.6351165714687557, + "grad_norm": 1.3055602721578907e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337150 + }, + { + "epoch": 1.6351650696615918, + "grad_norm": 1.1474733582872432e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337160 + }, + { + "epoch": 1.6352135678544277, + "grad_norm": 1.11525077954866e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337170 + }, + { + "epoch": 1.6352620660472639, + "grad_norm": 2.5290140911238268e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337180 + }, + { + "epoch": 1.6353105642401, + "grad_norm": 1.0977134479617234e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337190 + }, + { + "epoch": 1.635359062432936, + "grad_norm": 1.0139110599993728e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337200 + }, + { + "epoch": 1.6354075606257723, + "grad_norm": 1.0106338777404744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337210 + }, + { + "epoch": 1.6354560588186082, + "grad_norm": 1.0032084901467897e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337220 + }, + { + "epoch": 1.6355045570114444, + "grad_norm": 9.550078175379895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337230 + }, + { + "epoch": 1.6355530552042805, + "grad_norm": 9.7692818599171e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337240 + }, + { + "epoch": 1.6356015533971164, + "grad_norm": 8.954619261203334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337250 + }, + { + "epoch": 1.6356500515899526, + "grad_norm": 9.49023069551913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337260 + }, + { + "epoch": 1.6356985497827887, + "grad_norm": 8.661072570248507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337270 + }, + { + "epoch": 1.6357470479756246, + "grad_norm": 8.831163540889975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337280 + }, + { + "epoch": 1.635795546168461, + "grad_norm": 8.848036486597266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337290 + }, + { + "epoch": 1.635844044361297, + "grad_norm": 8.294766303151846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337300 + }, + { + "epoch": 1.635892542554133, + "grad_norm": 7.96111453382764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337310 + }, + { + "epoch": 1.6359410407469692, + "grad_norm": 7.593630471092183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337320 + }, + { + "epoch": 1.6359895389398051, + "grad_norm": 7.678866495552938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337330 + }, + { + "epoch": 1.6360380371326415, + "grad_norm": 7.70103360991925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337340 + }, + { + "epoch": 1.6360865353254774, + "grad_norm": 7.592666861455655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337350 + }, + { + "epoch": 1.6361350335183136, + "grad_norm": 7.369068043772131e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337360 + }, + { + "epoch": 1.6361835317111497, + "grad_norm": 7.691347491345368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337370 + }, + { + "epoch": 1.6362320299039856, + "grad_norm": 7.032514076854568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337380 + }, + { + "epoch": 1.6362805280968218, + "grad_norm": 7.245840606628917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337390 + }, + { + "epoch": 1.636329026289658, + "grad_norm": 6.940567345736781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337400 + }, + { + "epoch": 1.6363775244824939, + "grad_norm": 7.0189657890296075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337410 + }, + { + "epoch": 1.6364260226753302, + "grad_norm": 6.837952696514549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337420 + }, + { + "epoch": 1.6364745208681661, + "grad_norm": 6.410058631445281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337430 + }, + { + "epoch": 1.6365230190610023, + "grad_norm": 6.908300292707281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337440 + }, + { + "epoch": 1.6365715172538384, + "grad_norm": 6.856672825961141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337450 + }, + { + "epoch": 1.6366200154466743, + "grad_norm": 6.385060714819701e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337460 + }, + { + "epoch": 1.6366685136395105, + "grad_norm": 0.00020578848489094526, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337470 + }, + { + "epoch": 1.6367170118323466, + "grad_norm": 5.762775344919646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337480 + }, + { + "epoch": 1.6367655100251826, + "grad_norm": 6.092480361985508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337490 + }, + { + "epoch": 1.636814008218019, + "grad_norm": 5.785514076706022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337500 + }, + { + "epoch": 1.6368625064108548, + "grad_norm": 5.525278993445681e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337510 + }, + { + "epoch": 1.636911004603691, + "grad_norm": 5.967011929897126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337520 + }, + { + "epoch": 1.6369595027965271, + "grad_norm": 5.420006345957518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337530 + }, + { + "epoch": 1.637008000989363, + "grad_norm": 5.792244792246493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337540 + }, + { + "epoch": 1.6370564991821992, + "grad_norm": 5.495396180776879e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337550 + }, + { + "epoch": 1.6371049973750353, + "grad_norm": 5.223944754106924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337560 + }, + { + "epoch": 1.6371534955678713, + "grad_norm": 5.3238313739711884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337570 + }, + { + "epoch": 1.6372019937607076, + "grad_norm": 5.206354671827285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337580 + }, + { + "epoch": 1.6372504919535436, + "grad_norm": 5.217206762608839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337590 + }, + { + "epoch": 1.6372989901463797, + "grad_norm": 5.000561486667721e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337600 + }, + { + "epoch": 1.6373474883392158, + "grad_norm": 4.800314400199568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337610 + }, + { + "epoch": 1.6373959865320518, + "grad_norm": 4.852854090131586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337620 + }, + { + "epoch": 1.637444484724888, + "grad_norm": 4.762865501106717e-06, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 337630 + }, + { + "epoch": 1.637492982917724, + "grad_norm": 5.253178642306011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337640 + }, + { + "epoch": 1.63754148111056, + "grad_norm": 6.381410003086785e-06, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 337650 + }, + { + "epoch": 1.6375899793033963, + "grad_norm": 1.0043699148809537e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337660 + }, + { + "epoch": 1.6376384774962323, + "grad_norm": 1.4318702596938238e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337670 + }, + { + "epoch": 1.6376869756890684, + "grad_norm": 8.969343616627157e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337680 + }, + { + "epoch": 1.6377354738819045, + "grad_norm": 1.5507746866205707e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337690 + }, + { + "epoch": 1.6377839720747405, + "grad_norm": 6.905193731654435e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337700 + }, + { + "epoch": 1.6378324702675766, + "grad_norm": 1.116294788516825e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337710 + }, + { + "epoch": 1.6378809684604128, + "grad_norm": 1.0448525245010387e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337720 + }, + { + "epoch": 1.6379294666532487, + "grad_norm": 9.99969142867485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337730 + }, + { + "epoch": 1.637977964846085, + "grad_norm": 6.900031439727172e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337740 + }, + { + "epoch": 1.638026463038921, + "grad_norm": 1.0016785381594673e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337750 + }, + { + "epoch": 1.638074961231757, + "grad_norm": 9.32188777369447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337760 + }, + { + "epoch": 1.6381234594245933, + "grad_norm": 9.769444659468718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337770 + }, + { + "epoch": 1.6381719576174292, + "grad_norm": 8.794663699518424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337780 + }, + { + "epoch": 1.6382204558102653, + "grad_norm": 9.86807845038129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337790 + }, + { + "epoch": 1.6382689540031015, + "grad_norm": 8.783931662037503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337800 + }, + { + "epoch": 1.6383174521959374, + "grad_norm": 8.223689292208292e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337810 + }, + { + "epoch": 1.6383659503887738, + "grad_norm": 9.315863280789927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337820 + }, + { + "epoch": 1.6384144485816097, + "grad_norm": 1.0213288078375626e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337830 + }, + { + "epoch": 1.6384629467744458, + "grad_norm": 1.2144751053710934e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337840 + }, + { + "epoch": 1.638511444967282, + "grad_norm": 1.2239447642059531e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337850 + }, + { + "epoch": 1.6385599431601179, + "grad_norm": 9.535910066915676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337860 + }, + { + "epoch": 1.6386084413529542, + "grad_norm": 8.85348163137678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337870 + }, + { + "epoch": 1.6386569395457902, + "grad_norm": 9.104195669351611e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337880 + }, + { + "epoch": 1.6387054377386263, + "grad_norm": 9.621878234611358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337890 + }, + { + "epoch": 1.6387539359314625, + "grad_norm": 7.789736628183164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337900 + }, + { + "epoch": 1.6388024341242984, + "grad_norm": 1.1896370779140852e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337910 + }, + { + "epoch": 1.6388509323171345, + "grad_norm": 7.33881643100176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337920 + }, + { + "epoch": 1.6388994305099707, + "grad_norm": 8.392851668759249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337930 + }, + { + "epoch": 1.6389479287028066, + "grad_norm": 8.118148798530456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337940 + }, + { + "epoch": 1.638996426895643, + "grad_norm": 7.151844783948036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337950 + }, + { + "epoch": 1.6390449250884789, + "grad_norm": 6.646310794167221e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337960 + }, + { + "epoch": 1.639093423281315, + "grad_norm": 6.779247996746562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337970 + }, + { + "epoch": 1.6391419214741512, + "grad_norm": 6.590742486878298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337980 + }, + { + "epoch": 1.639190419666987, + "grad_norm": 6.872444373584585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 337990 + }, + { + "epoch": 1.6392389178598232, + "grad_norm": 6.324640708044171e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338000 + }, + { + "epoch": 1.6392874160526594, + "grad_norm": 6.273786311794538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338010 + }, + { + "epoch": 1.6393359142454953, + "grad_norm": 5.9864628383365925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338020 + }, + { + "epoch": 1.6393844124383317, + "grad_norm": 5.897708433622029e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338030 + }, + { + "epoch": 1.6394329106311676, + "grad_norm": 6.4983905758708715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338040 + }, + { + "epoch": 1.6394814088240037, + "grad_norm": 5.593185505858855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338050 + }, + { + "epoch": 1.6395299070168399, + "grad_norm": 5.630965461023152e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338060 + }, + { + "epoch": 1.6395784052096758, + "grad_norm": 5.545514795812778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338070 + }, + { + "epoch": 1.639626903402512, + "grad_norm": 5.397561380959814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338080 + }, + { + "epoch": 1.639675401595348, + "grad_norm": 5.494800461747218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338090 + }, + { + "epoch": 1.639723899788184, + "grad_norm": 7.756534614600241e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338100 + }, + { + "epoch": 1.6397723979810204, + "grad_norm": 5.042610609962139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338110 + }, + { + "epoch": 1.6398208961738563, + "grad_norm": 4.93192828798783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338120 + }, + { + "epoch": 1.6398693943666924, + "grad_norm": 4.670239832194056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338130 + }, + { + "epoch": 1.6399178925595286, + "grad_norm": 5.058279384684283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338140 + }, + { + "epoch": 1.6399663907523645, + "grad_norm": 4.8179995246755425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338150 + }, + { + "epoch": 1.6400148889452006, + "grad_norm": 4.50873722002143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338160 + }, + { + "epoch": 1.6400633871380368, + "grad_norm": 4.473717126529664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338170 + }, + { + "epoch": 1.6401118853308727, + "grad_norm": 4.616332716977922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338180 + }, + { + "epoch": 1.640160383523709, + "grad_norm": 4.787534180650255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338190 + }, + { + "epoch": 1.640208881716545, + "grad_norm": 4.1454982238064986e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338200 + }, + { + "epoch": 1.6402573799093811, + "grad_norm": 4.310066742618801e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338210 + }, + { + "epoch": 1.6403058781022173, + "grad_norm": 4.300041837268509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338220 + }, + { + "epoch": 1.6403543762950532, + "grad_norm": 4.0697782424103934e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338230 + }, + { + "epoch": 1.6404028744878894, + "grad_norm": 4.413358510646503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338240 + }, + { + "epoch": 1.6404513726807255, + "grad_norm": 3.944863692595391e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338250 + }, + { + "epoch": 1.6404998708735614, + "grad_norm": 7.13861163603724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338260 + }, + { + "epoch": 1.6405483690663978, + "grad_norm": 3.8576044971705414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338270 + }, + { + "epoch": 1.6405968672592337, + "grad_norm": 3.7538845845119795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338280 + }, + { + "epoch": 1.6406453654520698, + "grad_norm": 4.015008926216979e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338290 + }, + { + "epoch": 1.640693863644906, + "grad_norm": 3.673480250654393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338300 + }, + { + "epoch": 1.640742361837742, + "grad_norm": 3.6274459489504807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338310 + }, + { + "epoch": 1.640790860030578, + "grad_norm": 3.614389015638153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338320 + }, + { + "epoch": 1.6408393582234142, + "grad_norm": 3.40627502737334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338330 + }, + { + "epoch": 1.6408878564162501, + "grad_norm": 3.6369269764691126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338340 + }, + { + "epoch": 1.6409363546090865, + "grad_norm": 3.316421725685359e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338350 + }, + { + "epoch": 1.6409848528019224, + "grad_norm": 3.155431159029831e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338360 + }, + { + "epoch": 1.6410333509947586, + "grad_norm": 3.235982831029105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338370 + }, + { + "epoch": 1.6410818491875947, + "grad_norm": 3.1807967388886027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338380 + }, + { + "epoch": 1.6411303473804306, + "grad_norm": 3.425080194574548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338390 + }, + { + "epoch": 1.641178845573267, + "grad_norm": 3.0568216971005313e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338400 + }, + { + "epoch": 1.641227343766103, + "grad_norm": 3.222094164812006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338410 + }, + { + "epoch": 1.641275841958939, + "grad_norm": 3.018720690306509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338420 + }, + { + "epoch": 1.6413243401517752, + "grad_norm": 3.0226653962017735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338430 + }, + { + "epoch": 1.6413728383446111, + "grad_norm": 3.0668722956761485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338440 + }, + { + "epoch": 1.6414213365374473, + "grad_norm": 2.964576196973212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338450 + }, + { + "epoch": 1.6414698347302834, + "grad_norm": 4.191264451947063e-06, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 338460 + }, + { + "epoch": 1.6415183329231193, + "grad_norm": 1.008068738883594e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 338470 + }, + { + "epoch": 1.6415668311159557, + "grad_norm": 0.002029537223279476, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 338480 + }, + { + "epoch": 1.6416153293087916, + "grad_norm": 0.00013022412895224988, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 338490 + }, + { + "epoch": 1.6416638275016278, + "grad_norm": 0.0004943215753883123, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 338500 + }, + { + "epoch": 1.641712325694464, + "grad_norm": 3.3008716854965314e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 338510 + }, + { + "epoch": 1.6417608238872998, + "grad_norm": 2.8478284548327792e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338520 + }, + { + "epoch": 1.641809322080136, + "grad_norm": 2.8620636385312537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338530 + }, + { + "epoch": 1.6418578202729721, + "grad_norm": 2.8639785796258366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338540 + }, + { + "epoch": 1.641906318465808, + "grad_norm": 2.729604148044018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338550 + }, + { + "epoch": 1.6419548166586444, + "grad_norm": 2.7678122478391742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338560 + }, + { + "epoch": 1.6420033148514803, + "grad_norm": 2.634089014463825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338570 + }, + { + "epoch": 1.6420518130443165, + "grad_norm": 2.519857389415847e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338580 + }, + { + "epoch": 1.6421003112371526, + "grad_norm": 2.618096914375201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338590 + }, + { + "epoch": 1.6421488094299885, + "grad_norm": 2.5880349312501494e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338600 + }, + { + "epoch": 1.6421973076228247, + "grad_norm": 2.580985665190383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338610 + }, + { + "epoch": 1.6422458058156608, + "grad_norm": 2.5523925160086947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338620 + }, + { + "epoch": 1.6422943040084967, + "grad_norm": 2.4951502837211592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338630 + }, + { + "epoch": 1.642342802201333, + "grad_norm": 2.526126536395168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338640 + }, + { + "epoch": 1.642391300394169, + "grad_norm": 2.3878515094111208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338650 + }, + { + "epoch": 1.6424397985870052, + "grad_norm": 2.343377900615451e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338660 + }, + { + "epoch": 1.6424882967798413, + "grad_norm": 2.3231448267324595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338670 + }, + { + "epoch": 1.6425367949726772, + "grad_norm": 2.3182412860478507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338680 + }, + { + "epoch": 1.6425852931655134, + "grad_norm": 2.2336248548526783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338690 + }, + { + "epoch": 1.6426337913583495, + "grad_norm": 2.242377831862541e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338700 + }, + { + "epoch": 1.6426822895511854, + "grad_norm": 2.222266630269587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338710 + }, + { + "epoch": 1.6427307877440218, + "grad_norm": 2.101264954035287e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338720 + }, + { + "epoch": 1.6427792859368577, + "grad_norm": 2.2206011180969654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338730 + }, + { + "epoch": 1.6428277841296939, + "grad_norm": 2.172324002458481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338740 + }, + { + "epoch": 1.64287628232253, + "grad_norm": 2.179848024752573e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338750 + }, + { + "epoch": 1.642924780515366, + "grad_norm": 2.085500454995781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338760 + }, + { + "epoch": 1.642973278708202, + "grad_norm": 2.0822985788981896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338770 + }, + { + "epoch": 1.6430217769010382, + "grad_norm": 2.0154184312559664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338780 + }, + { + "epoch": 1.6430702750938742, + "grad_norm": 2.1432929315778892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338790 + }, + { + "epoch": 1.6431187732867105, + "grad_norm": 2.0525465060927672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338800 + }, + { + "epoch": 1.6431672714795464, + "grad_norm": 1.9397600681259064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338810 + }, + { + "epoch": 1.6432157696723826, + "grad_norm": 2.0798495370399905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338820 + }, + { + "epoch": 1.6432642678652187, + "grad_norm": 1.981456762223388e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338830 + }, + { + "epoch": 1.6433127660580547, + "grad_norm": 1.9567430626921123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338840 + }, + { + "epoch": 1.6433612642508908, + "grad_norm": 1.9588976556406124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338850 + }, + { + "epoch": 1.643409762443727, + "grad_norm": 1.8309190181753365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338860 + }, + { + "epoch": 1.643458260636563, + "grad_norm": 1.8418915033180383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338870 + }, + { + "epoch": 1.6435067588293992, + "grad_norm": 1.775124928826699e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338880 + }, + { + "epoch": 1.6435552570222351, + "grad_norm": 1.969523964362452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338890 + }, + { + "epoch": 1.6436037552150713, + "grad_norm": 1.8259679563925602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338900 + }, + { + "epoch": 1.6436522534079074, + "grad_norm": 1.6908128372961073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338910 + }, + { + "epoch": 1.6437007516007434, + "grad_norm": 1.738467403811228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338920 + }, + { + "epoch": 1.6437492497935797, + "grad_norm": 1.744063069963886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338930 + }, + { + "epoch": 1.6437977479864156, + "grad_norm": 1.858647920016665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338940 + }, + { + "epoch": 1.6438462461792518, + "grad_norm": 1.6684637103026034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338950 + }, + { + "epoch": 1.643894744372088, + "grad_norm": 2.5314707272627857e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338960 + }, + { + "epoch": 1.6439432425649239, + "grad_norm": 1.7622468249101075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338970 + }, + { + "epoch": 1.64399174075776, + "grad_norm": 1.6312332036250154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338980 + }, + { + "epoch": 1.6440402389505961, + "grad_norm": 1.7550026996104862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 338990 + }, + { + "epoch": 1.644088737143432, + "grad_norm": 1.6305359622492688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339000 + }, + { + "epoch": 1.6441372353362684, + "grad_norm": 1.6810560055091628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339010 + }, + { + "epoch": 1.6441857335291044, + "grad_norm": 1.6267523506030557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339020 + }, + { + "epoch": 1.6442342317219405, + "grad_norm": 1.615341261640424e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339030 + }, + { + "epoch": 1.6442827299147766, + "grad_norm": 1.5596887124047498e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339040 + }, + { + "epoch": 1.6443312281076126, + "grad_norm": 1.5226073628582526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339050 + }, + { + "epoch": 1.6443797263004487, + "grad_norm": 1.5292694115487393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339060 + }, + { + "epoch": 1.6444282244932849, + "grad_norm": 1.5845675989112351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339070 + }, + { + "epoch": 1.6444767226861208, + "grad_norm": 1.4169900168781169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339080 + }, + { + "epoch": 1.6445252208789571, + "grad_norm": 1.5474853398700361e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339090 + }, + { + "epoch": 1.644573719071793, + "grad_norm": 1.480902483308455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339100 + }, + { + "epoch": 1.6446222172646292, + "grad_norm": 1.5982068362063728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339110 + }, + { + "epoch": 1.6446707154574653, + "grad_norm": 1.4727420420967974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339120 + }, + { + "epoch": 1.6447192136503013, + "grad_norm": 1.6780321629994432e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339130 + }, + { + "epoch": 1.6447677118431374, + "grad_norm": 1.4272571888795937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339140 + }, + { + "epoch": 1.6448162100359736, + "grad_norm": 1.5588458381898818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339150 + }, + { + "epoch": 1.6448647082288095, + "grad_norm": 1.452953256375622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339160 + }, + { + "epoch": 1.6449132064216458, + "grad_norm": 1.4159829788695788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339170 + }, + { + "epoch": 1.6449617046144818, + "grad_norm": 1.3496101018972695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339180 + }, + { + "epoch": 1.645010202807318, + "grad_norm": 1.4111777772995993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339190 + }, + { + "epoch": 1.645058701000154, + "grad_norm": 1.3042892987868981e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339200 + }, + { + "epoch": 1.64510719919299, + "grad_norm": 1.3486255738826003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339210 + }, + { + "epoch": 1.6451556973858261, + "grad_norm": 1.3619126093544764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339220 + }, + { + "epoch": 1.6452041955786623, + "grad_norm": 1.3390485946729314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339230 + }, + { + "epoch": 1.6452526937714982, + "grad_norm": 1.4835927686362993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339240 + }, + { + "epoch": 1.6453011919643346, + "grad_norm": 1.260599447050481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339250 + }, + { + "epoch": 1.6453496901571705, + "grad_norm": 1.32904460770078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339260 + }, + { + "epoch": 1.6453981883500066, + "grad_norm": 1.2551473673738656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339270 + }, + { + "epoch": 1.6454466865428428, + "grad_norm": 1.2218647498229984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339280 + }, + { + "epoch": 1.6454951847356787, + "grad_norm": 1.3345407978704316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339290 + }, + { + "epoch": 1.6455436829285148, + "grad_norm": 1.249474394171557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339300 + }, + { + "epoch": 1.645592181121351, + "grad_norm": 1.2475653647925355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339310 + }, + { + "epoch": 1.645640679314187, + "grad_norm": 1.2407434724082123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339320 + }, + { + "epoch": 1.6456891775070233, + "grad_norm": 1.1999418347841129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339330 + }, + { + "epoch": 1.6457376756998592, + "grad_norm": 1.3167823453841265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339340 + }, + { + "epoch": 1.6457861738926953, + "grad_norm": 1.2415584933478385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339350 + }, + { + "epoch": 1.6458346720855315, + "grad_norm": 1.1448440773165203e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339360 + }, + { + "epoch": 1.6458831702783674, + "grad_norm": 1.2059526852681302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339370 + }, + { + "epoch": 1.6459316684712038, + "grad_norm": 1.1696115507220384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339380 + }, + { + "epoch": 1.6459801666640397, + "grad_norm": 1.2739706107822713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339390 + }, + { + "epoch": 1.6460286648568758, + "grad_norm": 1.152355935118976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339400 + }, + { + "epoch": 1.646077163049712, + "grad_norm": 1.0701413657443481e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339410 + }, + { + "epoch": 1.6461256612425479, + "grad_norm": 1.1225854450458428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339420 + }, + { + "epoch": 1.646174159435384, + "grad_norm": 1.1413077345423517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339430 + }, + { + "epoch": 1.6462226576282202, + "grad_norm": 1.218130705638032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339440 + }, + { + "epoch": 1.646271155821056, + "grad_norm": 1.1802883363998262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339450 + }, + { + "epoch": 1.6463196540138925, + "grad_norm": 1.0938069863186684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339460 + }, + { + "epoch": 1.6463681522067284, + "grad_norm": 1.091119543161767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339470 + }, + { + "epoch": 1.6464166503995645, + "grad_norm": 1.1593067483772757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339480 + }, + { + "epoch": 1.6464651485924007, + "grad_norm": 1.1884264949912904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339490 + }, + { + "epoch": 1.6465136467852366, + "grad_norm": 1.0512300150367082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339500 + }, + { + "epoch": 1.6465621449780727, + "grad_norm": 1.0472609801581712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339510 + }, + { + "epoch": 1.6466106431709089, + "grad_norm": 1.0312398899259279e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339520 + }, + { + "epoch": 1.6466591413637448, + "grad_norm": 1.0596808124319068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339530 + }, + { + "epoch": 1.6467076395565812, + "grad_norm": 1.1994430906270281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339540 + }, + { + "epoch": 1.646756137749417, + "grad_norm": 9.888867680274416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339550 + }, + { + "epoch": 1.6468046359422532, + "grad_norm": 1.0485389339009998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339560 + }, + { + "epoch": 1.6468531341350894, + "grad_norm": 1.0439378002047306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339570 + }, + { + "epoch": 1.6469016323279253, + "grad_norm": 1.0040257620858029e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339580 + }, + { + "epoch": 1.6469501305207614, + "grad_norm": 1.0388949931439129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339590 + }, + { + "epoch": 1.6469986287135976, + "grad_norm": 1.0296627124262159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339600 + }, + { + "epoch": 1.6470471269064335, + "grad_norm": 1.0346341241529444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339610 + }, + { + "epoch": 1.6470956250992699, + "grad_norm": 1.0025291885540355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339620 + }, + { + "epoch": 1.6471441232921058, + "grad_norm": 1.0164892501052236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339630 + }, + { + "epoch": 1.647192621484942, + "grad_norm": 1.0696237495722016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339640 + }, + { + "epoch": 1.647241119677778, + "grad_norm": 9.188396461468074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339650 + }, + { + "epoch": 1.647289617870614, + "grad_norm": 1.0485663324288907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339660 + }, + { + "epoch": 1.6473381160634502, + "grad_norm": 1.0174634326176601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339670 + }, + { + "epoch": 1.6473866142562863, + "grad_norm": 1.0562030183791649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339680 + }, + { + "epoch": 1.6474351124491222, + "grad_norm": 1.0437425999043626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339690 + }, + { + "epoch": 1.6474836106419586, + "grad_norm": 9.485517580287706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339700 + }, + { + "epoch": 1.6475321088347945, + "grad_norm": 8.874405352798931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339710 + }, + { + "epoch": 1.6475806070276307, + "grad_norm": 9.382846997141314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339720 + }, + { + "epoch": 1.6476291052204668, + "grad_norm": 9.68316840044281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339730 + }, + { + "epoch": 1.6476776034133027, + "grad_norm": 1.016472765513754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339740 + }, + { + "epoch": 1.6477261016061389, + "grad_norm": 2.4979872250696644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339750 + }, + { + "epoch": 1.647774599798975, + "grad_norm": 9.484323868491629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339760 + }, + { + "epoch": 1.647823097991811, + "grad_norm": 8.674865625835082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339770 + }, + { + "epoch": 1.6478715961846473, + "grad_norm": 8.512831186635594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339780 + }, + { + "epoch": 1.6479200943774832, + "grad_norm": 9.531412388241733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339790 + }, + { + "epoch": 1.6479685925703194, + "grad_norm": 8.633841162009048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339800 + }, + { + "epoch": 1.6480170907631555, + "grad_norm": 8.640890314381977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339810 + }, + { + "epoch": 1.6480655889559914, + "grad_norm": 8.669940712024982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339820 + }, + { + "epoch": 1.6481140871488276, + "grad_norm": 9.011055794871936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339830 + }, + { + "epoch": 1.6481625853416637, + "grad_norm": 8.987342994259961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339840 + }, + { + "epoch": 1.6482110835344996, + "grad_norm": 8.19092633719265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339850 + }, + { + "epoch": 1.648259581727336, + "grad_norm": 8.411221301685146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339860 + }, + { + "epoch": 1.648308079920172, + "grad_norm": 8.676250331518531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339870 + }, + { + "epoch": 1.648356578113008, + "grad_norm": 8.798380690677732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339880 + }, + { + "epoch": 1.6484050763058442, + "grad_norm": 8.905899449018762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339890 + }, + { + "epoch": 1.6484535744986801, + "grad_norm": 8.147353582899086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339900 + }, + { + "epoch": 1.6485020726915165, + "grad_norm": 8.005797553778393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339910 + }, + { + "epoch": 1.6485505708843524, + "grad_norm": 7.94166680861963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339920 + }, + { + "epoch": 1.6485990690771886, + "grad_norm": 8.186847821889387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339930 + }, + { + "epoch": 1.6486475672700247, + "grad_norm": 9.200218187288556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339940 + }, + { + "epoch": 1.6486960654628606, + "grad_norm": 7.604386382809025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339950 + }, + { + "epoch": 1.6487445636556968, + "grad_norm": 7.821640224392468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339960 + }, + { + "epoch": 1.648793061848533, + "grad_norm": 7.699624688939366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339970 + }, + { + "epoch": 1.6488415600413688, + "grad_norm": 7.850485985727573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339980 + }, + { + "epoch": 1.6488900582342052, + "grad_norm": 8.755077942623757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 339990 + }, + { + "epoch": 1.6489385564270411, + "grad_norm": 7.193206670308427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340000 + }, + { + "epoch": 1.6489870546198773, + "grad_norm": 7.444220386787492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340010 + }, + { + "epoch": 1.6490355528127134, + "grad_norm": 7.441914249284309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340020 + }, + { + "epoch": 1.6490840510055493, + "grad_norm": 7.579911880384316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340030 + }, + { + "epoch": 1.6491325491983855, + "grad_norm": 8.292278721455659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340040 + }, + { + "epoch": 1.6491810473912216, + "grad_norm": 7.419028520416759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340050 + }, + { + "epoch": 1.6492295455840575, + "grad_norm": 7.525812861786108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340060 + }, + { + "epoch": 1.649278043776894, + "grad_norm": 7.529807248829457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340070 + }, + { + "epoch": 1.6493265419697298, + "grad_norm": 7.020001362434414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340080 + }, + { + "epoch": 1.649375040162566, + "grad_norm": 7.986981245267089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340090 + }, + { + "epoch": 1.6494235383554021, + "grad_norm": 7.381769364656066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340100 + }, + { + "epoch": 1.649472036548238, + "grad_norm": 7.219917392831121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340110 + }, + { + "epoch": 1.6495205347410742, + "grad_norm": 7.08699360529863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340120 + }, + { + "epoch": 1.6495690329339103, + "grad_norm": 7.102298127392714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340130 + }, + { + "epoch": 1.6496175311267463, + "grad_norm": 7.534330279668211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340140 + }, + { + "epoch": 1.6496660293195826, + "grad_norm": 6.780264243388956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340150 + }, + { + "epoch": 1.6497145275124185, + "grad_norm": 6.372197844939365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340160 + }, + { + "epoch": 1.6497630257052547, + "grad_norm": 6.775757697141671e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340170 + }, + { + "epoch": 1.6498115238980908, + "grad_norm": 7.02842783084634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340180 + }, + { + "epoch": 1.6498600220909267, + "grad_norm": 7.647050779269193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340190 + }, + { + "epoch": 1.649908520283763, + "grad_norm": 7.83685948135826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340200 + }, + { + "epoch": 1.649957018476599, + "grad_norm": 6.490796522484743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340210 + }, + { + "epoch": 1.650005516669435, + "grad_norm": 6.719949965372507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340220 + }, + { + "epoch": 1.6500540148622713, + "grad_norm": 6.430760208786523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340230 + }, + { + "epoch": 1.6501025130551072, + "grad_norm": 7.464020086445089e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340240 + }, + { + "epoch": 1.6501510112479434, + "grad_norm": 6.091456157264474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340250 + }, + { + "epoch": 1.6501995094407795, + "grad_norm": 7.128765560082684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340260 + }, + { + "epoch": 1.6502480076336155, + "grad_norm": 6.793291049689287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340270 + }, + { + "epoch": 1.6502965058264516, + "grad_norm": 6.762650741620746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340280 + }, + { + "epoch": 1.6503450040192877, + "grad_norm": 7.081798685248941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340290 + }, + { + "epoch": 1.6503935022121237, + "grad_norm": 6.317043244052911e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340300 + }, + { + "epoch": 1.65044200040496, + "grad_norm": 6.389215059243725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340310 + }, + { + "epoch": 1.650490498597796, + "grad_norm": 6.295518915067078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340320 + }, + { + "epoch": 1.650538996790632, + "grad_norm": 6.319733643067593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340330 + }, + { + "epoch": 1.6505874949834682, + "grad_norm": 6.979616387070564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340340 + }, + { + "epoch": 1.6506359931763042, + "grad_norm": 6.316829512797995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340350 + }, + { + "epoch": 1.6506844913691403, + "grad_norm": 6.723908541061974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340360 + }, + { + "epoch": 1.6507329895619764, + "grad_norm": 6.304506996457349e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340370 + }, + { + "epoch": 1.6507814877548124, + "grad_norm": 6.028545840308652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340380 + }, + { + "epoch": 1.6508299859476487, + "grad_norm": 7.17729506050091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340390 + }, + { + "epoch": 1.6508784841404847, + "grad_norm": 5.68104667308944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340400 + }, + { + "epoch": 1.6509269823333208, + "grad_norm": 6.581757361345808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340410 + }, + { + "epoch": 1.650975480526157, + "grad_norm": 5.996554932607978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340420 + }, + { + "epoch": 1.6510239787189929, + "grad_norm": 5.856135771864501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340430 + }, + { + "epoch": 1.6510724769118292, + "grad_norm": 6.632691906816035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340440 + }, + { + "epoch": 1.6511209751046652, + "grad_norm": 6.087944939281442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340450 + }, + { + "epoch": 1.6511694732975013, + "grad_norm": 5.791909529762052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340460 + }, + { + "epoch": 1.6512179714903374, + "grad_norm": 5.67730182865489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340470 + }, + { + "epoch": 1.6512664696831734, + "grad_norm": 5.795619699711096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340480 + }, + { + "epoch": 1.6513149678760095, + "grad_norm": 6.642988523708482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340490 + }, + { + "epoch": 1.6513634660688457, + "grad_norm": 6.396377898454375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340500 + }, + { + "epoch": 1.6514119642616816, + "grad_norm": 5.506595357473998e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340510 + }, + { + "epoch": 1.651460462454518, + "grad_norm": 5.505399371941166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340520 + }, + { + "epoch": 1.6515089606473539, + "grad_norm": 5.353803658181278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340530 + }, + { + "epoch": 1.65155745884019, + "grad_norm": 6.147591875560465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340540 + }, + { + "epoch": 1.6516059570330262, + "grad_norm": 5.52005474219186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340550 + }, + { + "epoch": 1.651654455225862, + "grad_norm": 5.637119215862185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340560 + }, + { + "epoch": 1.6517029534186982, + "grad_norm": 5.929212534283579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340570 + }, + { + "epoch": 1.6517514516115344, + "grad_norm": 5.429619704955257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340580 + }, + { + "epoch": 1.6517999498043703, + "grad_norm": 6.018706812938035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340590 + }, + { + "epoch": 1.6518484479972066, + "grad_norm": 5.578749551204965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340600 + }, + { + "epoch": 1.6518969461900426, + "grad_norm": 5.564378398048575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340610 + }, + { + "epoch": 1.6519454443828787, + "grad_norm": 5.540198912967753e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340620 + }, + { + "epoch": 1.6519939425757149, + "grad_norm": 5.329517307472997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340630 + }, + { + "epoch": 1.6520424407685508, + "grad_norm": 5.832942520100914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340640 + }, + { + "epoch": 1.652090938961387, + "grad_norm": 5.100792463963444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340650 + }, + { + "epoch": 1.652139437154223, + "grad_norm": 5.608883384411456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340660 + }, + { + "epoch": 1.652187935347059, + "grad_norm": 5.027623615205812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340670 + }, + { + "epoch": 1.6522364335398954, + "grad_norm": 5.212970677348494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340680 + }, + { + "epoch": 1.6522849317327313, + "grad_norm": 5.679182777384995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340690 + }, + { + "epoch": 1.6523334299255674, + "grad_norm": 5.042479642725084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340700 + }, + { + "epoch": 1.6523819281184036, + "grad_norm": 5.128613906890678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340710 + }, + { + "epoch": 1.6524304263112395, + "grad_norm": 4.889157594334392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340720 + }, + { + "epoch": 1.6524789245040756, + "grad_norm": 5.677252374880482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340730 + }, + { + "epoch": 1.6525274226969118, + "grad_norm": 5.663488309437525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340740 + }, + { + "epoch": 1.6525759208897477, + "grad_norm": 5.178512765269261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340750 + }, + { + "epoch": 1.652624419082584, + "grad_norm": 5.043634132562147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340760 + }, + { + "epoch": 1.65267291727542, + "grad_norm": 4.862533273808367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340770 + }, + { + "epoch": 1.6527214154682561, + "grad_norm": 4.768722305925621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340780 + }, + { + "epoch": 1.6527699136610923, + "grad_norm": 5.324380936144735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340790 + }, + { + "epoch": 1.6528184118539282, + "grad_norm": 4.698845827988407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340800 + }, + { + "epoch": 1.6528669100467643, + "grad_norm": 4.79443031053961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340810 + }, + { + "epoch": 1.6529154082396005, + "grad_norm": 4.6204192472032446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340820 + }, + { + "epoch": 1.6529639064324364, + "grad_norm": 4.700587226125208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340830 + }, + { + "epoch": 1.6530124046252728, + "grad_norm": 5.059367254034441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340840 + }, + { + "epoch": 1.6530609028181087, + "grad_norm": 4.7653693968641164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340850 + }, + { + "epoch": 1.6531094010109448, + "grad_norm": 4.5863455966355104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340860 + }, + { + "epoch": 1.653157899203781, + "grad_norm": 4.612748796262167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340870 + }, + { + "epoch": 1.653206397396617, + "grad_norm": 4.7370170364047226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340880 + }, + { + "epoch": 1.653254895589453, + "grad_norm": 5.057938210484281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340890 + }, + { + "epoch": 1.6533033937822892, + "grad_norm": 4.698608506714663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340900 + }, + { + "epoch": 1.6533518919751253, + "grad_norm": 4.602628393968189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340910 + }, + { + "epoch": 1.6534003901679615, + "grad_norm": 4.4706894186674617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340920 + }, + { + "epoch": 1.6534488883607974, + "grad_norm": 4.5851464847146417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340930 + }, + { + "epoch": 1.6534973865536335, + "grad_norm": 4.84772840536607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340940 + }, + { + "epoch": 1.6535458847464697, + "grad_norm": 4.428392799127323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340950 + }, + { + "epoch": 1.6535943829393056, + "grad_norm": 4.202612160497665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340960 + }, + { + "epoch": 1.653642881132142, + "grad_norm": 4.473023125228792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340970 + }, + { + "epoch": 1.653691379324978, + "grad_norm": 4.3120039094901585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340980 + }, + { + "epoch": 1.653739877517814, + "grad_norm": 4.5950514504511375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 340990 + }, + { + "epoch": 1.6537883757106502, + "grad_norm": 5.547538535211061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341000 + }, + { + "epoch": 1.653836873903486, + "grad_norm": 4.123879193684843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341010 + }, + { + "epoch": 1.6538853720963222, + "grad_norm": 4.2552980517029937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341020 + }, + { + "epoch": 1.6539338702891584, + "grad_norm": 4.379052143121953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341030 + }, + { + "epoch": 1.6539823684819943, + "grad_norm": 4.702377225385135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341040 + }, + { + "epoch": 1.6540308666748307, + "grad_norm": 4.133934226047131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341050 + }, + { + "epoch": 1.6540793648676666, + "grad_norm": 4.36008406268229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341060 + }, + { + "epoch": 1.6541278630605027, + "grad_norm": 4.3305465169396484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341070 + }, + { + "epoch": 1.654176361253339, + "grad_norm": 4.050199038374558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341080 + }, + { + "epoch": 1.6542248594461748, + "grad_norm": 4.465194081149093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341090 + }, + { + "epoch": 1.654273357639011, + "grad_norm": 4.3623037981888046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341100 + }, + { + "epoch": 1.654321855831847, + "grad_norm": 4.125832049339806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341110 + }, + { + "epoch": 1.654370354024683, + "grad_norm": 3.9517922800769156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341120 + }, + { + "epoch": 1.6544188522175194, + "grad_norm": 4.002353080068133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341130 + }, + { + "epoch": 1.6544673504103553, + "grad_norm": 4.296875317777449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341140 + }, + { + "epoch": 1.6545158486031915, + "grad_norm": 3.9912558236210316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341150 + }, + { + "epoch": 1.6545643467960276, + "grad_norm": 5.237016580394993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341160 + }, + { + "epoch": 1.6546128449888635, + "grad_norm": 3.952824840780522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341170 + }, + { + "epoch": 1.6546613431816997, + "grad_norm": 3.854559338378749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341180 + }, + { + "epoch": 1.6547098413745358, + "grad_norm": 4.3680768158083083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341190 + }, + { + "epoch": 1.6547583395673717, + "grad_norm": 3.7256663176776783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341200 + }, + { + "epoch": 1.654806837760208, + "grad_norm": 3.7746661973869777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341210 + }, + { + "epoch": 1.654855335953044, + "grad_norm": 4.0259780575979676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341220 + }, + { + "epoch": 1.6549038341458802, + "grad_norm": 3.786615252465708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341230 + }, + { + "epoch": 1.6549523323387163, + "grad_norm": 4.0807276491250377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341240 + }, + { + "epoch": 1.6550008305315522, + "grad_norm": 3.797660781401646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341250 + }, + { + "epoch": 1.6550493287243884, + "grad_norm": 3.7508920058826334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341260 + }, + { + "epoch": 1.6550978269172245, + "grad_norm": 3.8412517255892453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341270 + }, + { + "epoch": 1.6551463251100604, + "grad_norm": 4.0084128727357893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341280 + }, + { + "epoch": 1.6551948233028968, + "grad_norm": 3.968837063439423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341290 + }, + { + "epoch": 1.6552433214957327, + "grad_norm": 3.657827676306624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341300 + }, + { + "epoch": 1.6552918196885689, + "grad_norm": 4.3451791498227976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341310 + }, + { + "epoch": 1.655340317881405, + "grad_norm": 3.5251349572718027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341320 + }, + { + "epoch": 1.655388816074241, + "grad_norm": 3.62820344435022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341330 + }, + { + "epoch": 1.655437314267077, + "grad_norm": 3.8667849366902374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341340 + }, + { + "epoch": 1.6554858124599132, + "grad_norm": 3.6703562500406406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341350 + }, + { + "epoch": 1.6555343106527491, + "grad_norm": 3.625851832111948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341360 + }, + { + "epoch": 1.6555828088455855, + "grad_norm": 3.781544819503324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341370 + }, + { + "epoch": 1.6556313070384214, + "grad_norm": 3.449490009188594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341380 + }, + { + "epoch": 1.6556798052312576, + "grad_norm": 3.958457739372534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341390 + }, + { + "epoch": 1.6557283034240937, + "grad_norm": 3.50896016243496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341400 + }, + { + "epoch": 1.6557768016169296, + "grad_norm": 3.517219511195435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341410 + }, + { + "epoch": 1.655825299809766, + "grad_norm": 3.5126609532198927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341420 + }, + { + "epoch": 1.655873798002602, + "grad_norm": 3.64544320063942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341430 + }, + { + "epoch": 1.655922296195438, + "grad_norm": 3.689471270718059e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341440 + }, + { + "epoch": 1.6559707943882742, + "grad_norm": 3.336996314828866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341450 + }, + { + "epoch": 1.6560192925811101, + "grad_norm": 3.4645998425730795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341460 + }, + { + "epoch": 1.6560677907739463, + "grad_norm": 3.3758144013518177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341470 + }, + { + "epoch": 1.6561162889667824, + "grad_norm": 3.57111161974899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341480 + }, + { + "epoch": 1.6561647871596183, + "grad_norm": 3.6419680782273645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341490 + }, + { + "epoch": 1.6562132853524547, + "grad_norm": 3.440554223743675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341500 + }, + { + "epoch": 1.6562617835452906, + "grad_norm": 3.5690393929144193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341510 + }, + { + "epoch": 1.6563102817381268, + "grad_norm": 3.293424981620774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341520 + }, + { + "epoch": 1.656358779930963, + "grad_norm": 3.2452553000439366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341530 + }, + { + "epoch": 1.6564072781237988, + "grad_norm": 3.684760088162875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341540 + }, + { + "epoch": 1.656455776316635, + "grad_norm": 3.5446214496914763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341550 + }, + { + "epoch": 1.6565042745094711, + "grad_norm": 4.4607867266677204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341560 + }, + { + "epoch": 1.656552772702307, + "grad_norm": 3.3366481488883437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341570 + }, + { + "epoch": 1.6566012708951434, + "grad_norm": 3.362455629485339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341580 + }, + { + "epoch": 1.6566497690879793, + "grad_norm": 3.417339371480921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341590 + }, + { + "epoch": 1.6566982672808155, + "grad_norm": 3.141198021694436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341600 + }, + { + "epoch": 1.6567467654736516, + "grad_norm": 3.2335401556338184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341610 + }, + { + "epoch": 1.6567952636664875, + "grad_norm": 3.239650823161355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341620 + }, + { + "epoch": 1.6568437618593237, + "grad_norm": 3.2794062576613214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341630 + }, + { + "epoch": 1.6568922600521598, + "grad_norm": 3.561611379154783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341640 + }, + { + "epoch": 1.6569407582449958, + "grad_norm": 3.17651313253009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341650 + }, + { + "epoch": 1.6569892564378321, + "grad_norm": 3.0834317499284225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341660 + }, + { + "epoch": 1.657037754630668, + "grad_norm": 3.177731002779183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341670 + }, + { + "epoch": 1.6570862528235042, + "grad_norm": 3.264689212301164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341680 + }, + { + "epoch": 1.6571347510163403, + "grad_norm": 3.2806448757582984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341690 + }, + { + "epoch": 1.6571832492091763, + "grad_norm": 3.089187998739362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341700 + }, + { + "epoch": 1.6572317474020124, + "grad_norm": 4.0580050608696183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341710 + }, + { + "epoch": 1.6572802455948485, + "grad_norm": 2.9751640795439016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341720 + }, + { + "epoch": 1.6573287437876845, + "grad_norm": 3.047130974209722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341730 + }, + { + "epoch": 1.6573772419805208, + "grad_norm": 3.2361762691834883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341740 + }, + { + "epoch": 1.6574257401733568, + "grad_norm": 3.1497737040808715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341750 + }, + { + "epoch": 1.657474238366193, + "grad_norm": 2.935802854153735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341760 + }, + { + "epoch": 1.657522736559029, + "grad_norm": 2.984016020946001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341770 + }, + { + "epoch": 1.657571234751865, + "grad_norm": 3.2515143288947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341780 + }, + { + "epoch": 1.657619732944701, + "grad_norm": 3.2516661008230585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341790 + }, + { + "epoch": 1.6576682311375373, + "grad_norm": 2.8571557209033926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341800 + }, + { + "epoch": 1.6577167293303732, + "grad_norm": 2.8725310130539583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341810 + }, + { + "epoch": 1.6577652275232095, + "grad_norm": 3.1085275509212806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341820 + }, + { + "epoch": 1.6578137257160455, + "grad_norm": 2.935057921149564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341830 + }, + { + "epoch": 1.6578622239088816, + "grad_norm": 3.1843399028730346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341840 + }, + { + "epoch": 1.6579107221017177, + "grad_norm": 2.9510255217246595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341850 + }, + { + "epoch": 1.6579592202945537, + "grad_norm": 2.838958437223482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341860 + }, + { + "epoch": 1.6580077184873898, + "grad_norm": 2.9128548817425326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341870 + }, + { + "epoch": 1.658056216680226, + "grad_norm": 2.803186021083093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341880 + }, + { + "epoch": 1.6581047148730619, + "grad_norm": 3.078818053836585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341890 + }, + { + "epoch": 1.6581532130658982, + "grad_norm": 2.796659259729495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341900 + }, + { + "epoch": 1.6582017112587342, + "grad_norm": 3.352087389885128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341910 + }, + { + "epoch": 1.6582502094515703, + "grad_norm": 2.707728867790138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341920 + }, + { + "epoch": 1.6582987076444065, + "grad_norm": 2.8546830321829475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341930 + }, + { + "epoch": 1.6583472058372424, + "grad_norm": 3.1449388870896655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341940 + }, + { + "epoch": 1.6583957040300787, + "grad_norm": 2.805876704314869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341950 + }, + { + "epoch": 1.6584442022229147, + "grad_norm": 2.7553042514227855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341960 + }, + { + "epoch": 1.6584927004157508, + "grad_norm": 2.6566621613710595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341970 + }, + { + "epoch": 1.658541198608587, + "grad_norm": 2.883738261516555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341980 + }, + { + "epoch": 1.6585896968014229, + "grad_norm": 2.9302009352250025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 341990 + }, + { + "epoch": 1.658638194994259, + "grad_norm": 2.829339393883856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342000 + }, + { + "epoch": 1.6586866931870952, + "grad_norm": 2.7064663754572393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342010 + }, + { + "epoch": 1.658735191379931, + "grad_norm": 2.67134112164058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342020 + }, + { + "epoch": 1.6587836895727675, + "grad_norm": 2.661688824900921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342030 + }, + { + "epoch": 1.6588321877656034, + "grad_norm": 2.8707671617667074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342040 + }, + { + "epoch": 1.6588806859584395, + "grad_norm": 2.5576844109309604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342050 + }, + { + "epoch": 1.6589291841512757, + "grad_norm": 2.5979463202929765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342060 + }, + { + "epoch": 1.6589776823441116, + "grad_norm": 3.0713923138137034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342070 + }, + { + "epoch": 1.6590261805369477, + "grad_norm": 2.659543554273114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342080 + }, + { + "epoch": 1.6590746787297839, + "grad_norm": 2.8531582074720063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342090 + }, + { + "epoch": 1.6591231769226198, + "grad_norm": 2.579935198809835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342100 + }, + { + "epoch": 1.6591716751154562, + "grad_norm": 3.0048701660234656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342110 + }, + { + "epoch": 1.659220173308292, + "grad_norm": 2.646467009981279e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342120 + }, + { + "epoch": 1.6592686715011282, + "grad_norm": 2.5975396056310274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342130 + }, + { + "epoch": 1.6593171696939644, + "grad_norm": 2.8159263365523657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342140 + }, + { + "epoch": 1.6593656678868003, + "grad_norm": 2.5111603463301435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342150 + }, + { + "epoch": 1.6594141660796364, + "grad_norm": 2.5685992000035185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342160 + }, + { + "epoch": 1.6594626642724726, + "grad_norm": 2.5765160671653575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342170 + }, + { + "epoch": 1.6595111624653085, + "grad_norm": 2.5112163370977214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342180 + }, + { + "epoch": 1.6595596606581449, + "grad_norm": 2.756803212378145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342190 + }, + { + "epoch": 1.6596081588509808, + "grad_norm": 2.5340872866763675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342200 + }, + { + "epoch": 1.659656657043817, + "grad_norm": 2.71728822553996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342210 + }, + { + "epoch": 1.659705155236653, + "grad_norm": 2.3782622804446873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342220 + }, + { + "epoch": 1.659753653429489, + "grad_norm": 2.4599398784630466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342230 + }, + { + "epoch": 1.6598021516223251, + "grad_norm": 2.500514142411703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342240 + }, + { + "epoch": 1.6598506498151613, + "grad_norm": 2.492821806754364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342250 + }, + { + "epoch": 1.6598991480079972, + "grad_norm": 2.43965615709385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342260 + }, + { + "epoch": 1.6599476462008336, + "grad_norm": 2.2309944824883132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342270 + }, + { + "epoch": 1.6599961443936695, + "grad_norm": 2.495303021987638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342280 + }, + { + "epoch": 1.6600446425865056, + "grad_norm": 2.6237600536660466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342290 + }, + { + "epoch": 1.6600931407793418, + "grad_norm": 2.2541553335031494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342300 + }, + { + "epoch": 1.6601416389721777, + "grad_norm": 2.4998064418468857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342310 + }, + { + "epoch": 1.6601901371650138, + "grad_norm": 2.2014468470388238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342320 + }, + { + "epoch": 1.66023863535785, + "grad_norm": 2.6255099783156766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342330 + }, + { + "epoch": 1.660287133550686, + "grad_norm": 2.361275761586512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342340 + }, + { + "epoch": 1.6603356317435223, + "grad_norm": 2.2107008135208162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342350 + }, + { + "epoch": 1.6603841299363582, + "grad_norm": 2.2744404759578174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342360 + }, + { + "epoch": 1.6604326281291943, + "grad_norm": 2.249570769663478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342370 + }, + { + "epoch": 1.6604811263220305, + "grad_norm": 2.172591706539606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342380 + }, + { + "epoch": 1.6605296245148664, + "grad_norm": 2.2050789993954822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342390 + }, + { + "epoch": 1.6605781227077026, + "grad_norm": 2.1552581586092856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342400 + }, + { + "epoch": 1.6606266209005387, + "grad_norm": 2.1415560524928878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342410 + }, + { + "epoch": 1.6606751190933746, + "grad_norm": 2.1673254479992465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342420 + }, + { + "epoch": 1.660723617286211, + "grad_norm": 2.1983788656143588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342430 + }, + { + "epoch": 1.660772115479047, + "grad_norm": 2.405958241524786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342440 + }, + { + "epoch": 1.660820613671883, + "grad_norm": 2.0814528056689596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342450 + }, + { + "epoch": 1.6608691118647192, + "grad_norm": 2.086974149051457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342460 + }, + { + "epoch": 1.6609176100575551, + "grad_norm": 2.1015250695199938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342470 + }, + { + "epoch": 1.6609661082503915, + "grad_norm": 1.9840101117551967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342480 + }, + { + "epoch": 1.6610146064432274, + "grad_norm": 2.1152108331534691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342490 + }, + { + "epoch": 1.6610631046360635, + "grad_norm": 2.0503631503743236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342500 + }, + { + "epoch": 1.6611116028288997, + "grad_norm": 3.360385676387523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342510 + }, + { + "epoch": 1.6611601010217356, + "grad_norm": 1.9935252737468545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342520 + }, + { + "epoch": 1.6612085992145718, + "grad_norm": 8.914066143006494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342530 + }, + { + "epoch": 1.661257097407408, + "grad_norm": 2.1635537450492848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342540 + }, + { + "epoch": 1.6613055956002438, + "grad_norm": 1.82462869702249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342550 + }, + { + "epoch": 1.6613540937930802, + "grad_norm": 2.0511883747076354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342560 + }, + { + "epoch": 1.661402591985916, + "grad_norm": 1.945991101592881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342570 + }, + { + "epoch": 1.6614510901787523, + "grad_norm": 1.831507887573025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342580 + }, + { + "epoch": 1.6614995883715884, + "grad_norm": 1.999088681259309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342590 + }, + { + "epoch": 1.6615480865644243, + "grad_norm": 1.8170855753396609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342600 + }, + { + "epoch": 1.6615965847572605, + "grad_norm": 1.82090531097856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342610 + }, + { + "epoch": 1.6616450829500966, + "grad_norm": 1.8547477509400778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342620 + }, + { + "epoch": 1.6616935811429325, + "grad_norm": 1.9199462997221417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342630 + }, + { + "epoch": 1.661742079335769, + "grad_norm": 0.0028770973440259695, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342640 + }, + { + "epoch": 1.6617905775286048, + "grad_norm": 1.921104768598525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342650 + }, + { + "epoch": 1.661839075721441, + "grad_norm": 2.1364716928928829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342660 + }, + { + "epoch": 1.661887573914277, + "grad_norm": 2.5233958922399324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342670 + }, + { + "epoch": 1.661936072107113, + "grad_norm": 2.0132628719693457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342680 + }, + { + "epoch": 1.6619845702999492, + "grad_norm": 2.2868555049626593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342690 + }, + { + "epoch": 1.6620330684927853, + "grad_norm": 1.8685599911805184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342700 + }, + { + "epoch": 1.6620815666856212, + "grad_norm": 2.0029133906973584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342710 + }, + { + "epoch": 1.6621300648784576, + "grad_norm": 2.0809338252547604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342720 + }, + { + "epoch": 1.6621785630712935, + "grad_norm": 1.8861864248265192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342730 + }, + { + "epoch": 1.6622270612641297, + "grad_norm": 2.0835366854043968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342740 + }, + { + "epoch": 1.6622755594569658, + "grad_norm": 1.8456894679275138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342750 + }, + { + "epoch": 1.6623240576498017, + "grad_norm": 1.865828949121351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342760 + }, + { + "epoch": 1.6623725558426379, + "grad_norm": 1.8223830977603939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342770 + }, + { + "epoch": 1.662421054035474, + "grad_norm": 1.895354841963126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342780 + }, + { + "epoch": 1.66246955222831, + "grad_norm": 2.0607357953394967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342790 + }, + { + "epoch": 1.6625180504211463, + "grad_norm": 1.973296690493953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342800 + }, + { + "epoch": 1.6625665486139822, + "grad_norm": 1.744061819408671e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342810 + }, + { + "epoch": 1.6626150468068184, + "grad_norm": 1.9242735049829207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342820 + }, + { + "epoch": 1.6626635449996545, + "grad_norm": 1.666603282046708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342830 + }, + { + "epoch": 1.6627120431924904, + "grad_norm": 1.8776769650230563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342840 + }, + { + "epoch": 1.6627605413853266, + "grad_norm": 1.962511788633492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342850 + }, + { + "epoch": 1.6628090395781627, + "grad_norm": 1.700708622820457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342860 + }, + { + "epoch": 1.6628575377709987, + "grad_norm": 1.7219971937265655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342870 + }, + { + "epoch": 1.662906035963835, + "grad_norm": 1.8753588904019125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342880 + }, + { + "epoch": 1.662954534156671, + "grad_norm": 1.8359898490416526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342890 + }, + { + "epoch": 1.663003032349507, + "grad_norm": 1.7672945773483661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342900 + }, + { + "epoch": 1.6630515305423432, + "grad_norm": 1.802288664976004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342910 + }, + { + "epoch": 1.6631000287351791, + "grad_norm": 1.8289107117652748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342920 + }, + { + "epoch": 1.6631485269280153, + "grad_norm": 2.594777868125675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342930 + }, + { + "epoch": 1.6631970251208514, + "grad_norm": 1.6925659451771935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342940 + }, + { + "epoch": 1.6632455233136874, + "grad_norm": 1.5430080679834646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342950 + }, + { + "epoch": 1.6632940215065237, + "grad_norm": 2.3468308540941507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342960 + }, + { + "epoch": 1.6633425196993596, + "grad_norm": 1.6128834090523014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342970 + }, + { + "epoch": 1.6633910178921958, + "grad_norm": 2.0504222675299388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342980 + }, + { + "epoch": 1.663439516085032, + "grad_norm": 1.6638453814721288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 342990 + }, + { + "epoch": 1.6634880142778679, + "grad_norm": 1.6401317282088712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343000 + }, + { + "epoch": 1.6635365124707042, + "grad_norm": 1.6521775592082122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343010 + }, + { + "epoch": 1.6635850106635401, + "grad_norm": 1.588719271694572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343020 + }, + { + "epoch": 1.6636335088563763, + "grad_norm": 1.5574103429116803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343030 + }, + { + "epoch": 1.6636820070492124, + "grad_norm": 1.7519546702260413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343040 + }, + { + "epoch": 1.6637305052420484, + "grad_norm": 1.727205471979687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343050 + }, + { + "epoch": 1.6637790034348845, + "grad_norm": 1.6486801257542538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343060 + }, + { + "epoch": 1.6638275016277206, + "grad_norm": 2.4560611677770794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343070 + }, + { + "epoch": 1.6638759998205566, + "grad_norm": 1.4762819944280636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343080 + }, + { + "epoch": 1.663924498013393, + "grad_norm": 1.6633823918255075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343090 + }, + { + "epoch": 1.6639729962062288, + "grad_norm": 1.5261996111348708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343100 + }, + { + "epoch": 1.664021494399065, + "grad_norm": 1.5340832248966763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343110 + }, + { + "epoch": 1.6640699925919011, + "grad_norm": 1.5125060315313021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343120 + }, + { + "epoch": 1.664118490784737, + "grad_norm": 1.5890178417521383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343130 + }, + { + "epoch": 1.6641669889775732, + "grad_norm": 1.6891200971258513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343140 + }, + { + "epoch": 1.6642154871704093, + "grad_norm": 1.4273450688051526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343150 + }, + { + "epoch": 1.6642639853632453, + "grad_norm": 1.427179938673362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343160 + }, + { + "epoch": 1.6643124835560816, + "grad_norm": 1.4831665851033904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343170 + }, + { + "epoch": 1.6643609817489176, + "grad_norm": 1.581748421131124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343180 + }, + { + "epoch": 1.6644094799417537, + "grad_norm": 1.5061954172779224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343190 + }, + { + "epoch": 1.6644579781345898, + "grad_norm": 1.5637250783129275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343200 + }, + { + "epoch": 1.6645064763274258, + "grad_norm": 1.3768828921456588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343210 + }, + { + "epoch": 1.664554974520262, + "grad_norm": 1.367320123790705e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343220 + }, + { + "epoch": 1.664603472713098, + "grad_norm": 1.5008669151939102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343230 + }, + { + "epoch": 1.664651970905934, + "grad_norm": 1.4722968444402795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343240 + }, + { + "epoch": 1.6647004690987703, + "grad_norm": 1.3921655295234814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343250 + }, + { + "epoch": 1.6647489672916063, + "grad_norm": 1.350220628637544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343260 + }, + { + "epoch": 1.6647974654844424, + "grad_norm": 1.302819612192252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343270 + }, + { + "epoch": 1.6648459636772786, + "grad_norm": 1.4926447988727887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343280 + }, + { + "epoch": 1.6648944618701145, + "grad_norm": 1.5556524601834099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343290 + }, + { + "epoch": 1.6649429600629506, + "grad_norm": 3.816653872945608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343300 + }, + { + "epoch": 1.6649914582557868, + "grad_norm": 4.304869776206033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343310 + }, + { + "epoch": 1.6650399564486227, + "grad_norm": 9.353413474855188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343320 + }, + { + "epoch": 1.665088454641459, + "grad_norm": 6.344575922412332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343330 + }, + { + "epoch": 1.665136952834295, + "grad_norm": 1.1347372037562309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343340 + }, + { + "epoch": 1.6651854510271311, + "grad_norm": 3.8606378893746296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343350 + }, + { + "epoch": 1.6652339492199673, + "grad_norm": 4.5760168632114073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343360 + }, + { + "epoch": 1.6652824474128032, + "grad_norm": 4.2895098317785596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343370 + }, + { + "epoch": 1.6653309456056393, + "grad_norm": 6.056415600141918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343380 + }, + { + "epoch": 1.6653794437984755, + "grad_norm": 7.774974051244499e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343390 + }, + { + "epoch": 1.6654279419913114, + "grad_norm": 2.985551077472337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343400 + }, + { + "epoch": 1.6654764401841478, + "grad_norm": 3.698726231959881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343410 + }, + { + "epoch": 1.6655249383769837, + "grad_norm": 2.4249172270174313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343420 + }, + { + "epoch": 1.6655734365698198, + "grad_norm": 4.1803758676906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343430 + }, + { + "epoch": 1.665621934762656, + "grad_norm": 4.87381953462318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343440 + }, + { + "epoch": 1.6656704329554919, + "grad_norm": 2.9625468300764624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343450 + }, + { + "epoch": 1.665718931148328, + "grad_norm": 4.026603619422531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343460 + }, + { + "epoch": 1.6657674293411642, + "grad_norm": 3.559395622687589e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343470 + }, + { + "epoch": 1.6658159275340003, + "grad_norm": 2.2960389856052643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343480 + }, + { + "epoch": 1.6658644257268365, + "grad_norm": 4.72137912765902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343490 + }, + { + "epoch": 1.6659129239196724, + "grad_norm": 8.00966631686606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343500 + }, + { + "epoch": 1.6659614221125085, + "grad_norm": 1.7359501214286865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343510 + }, + { + "epoch": 1.6660099203053447, + "grad_norm": 2.1588820686702093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343520 + }, + { + "epoch": 1.6660584184981806, + "grad_norm": 2.2740196925497003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343530 + }, + { + "epoch": 1.666106916691017, + "grad_norm": 1.2819814401154872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343540 + }, + { + "epoch": 1.6661554148838529, + "grad_norm": 1.741203305982708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343550 + }, + { + "epoch": 1.666203913076689, + "grad_norm": 1.6915835487907316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343560 + }, + { + "epoch": 1.6662524112695252, + "grad_norm": 1.5461610303191264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343570 + }, + { + "epoch": 1.666300909462361, + "grad_norm": 2.1749741563326097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343580 + }, + { + "epoch": 1.6663494076551972, + "grad_norm": 3.18539747468094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343590 + }, + { + "epoch": 1.6663979058480334, + "grad_norm": 1.8474399610113323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343600 + }, + { + "epoch": 1.6664464040408693, + "grad_norm": 2.2682112899019558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343610 + }, + { + "epoch": 1.6664949022337057, + "grad_norm": 1.66720894867467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343620 + }, + { + "epoch": 1.6665434004265416, + "grad_norm": 2.1313832121450105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343630 + }, + { + "epoch": 1.6665918986193777, + "grad_norm": 2.5153968863378395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343640 + }, + { + "epoch": 1.6666403968122139, + "grad_norm": 1.8847435967472848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343650 + }, + { + "epoch": 1.6666888950050498, + "grad_norm": 1.4469905806890893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343660 + }, + { + "epoch": 1.666737393197886, + "grad_norm": 1.895337078394732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343670 + }, + { + "epoch": 1.666785891390722, + "grad_norm": 1.5648785733901605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343680 + }, + { + "epoch": 1.666834389583558, + "grad_norm": 2.374267467075697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343690 + }, + { + "epoch": 1.6668828877763944, + "grad_norm": 3.907711288775317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343700 + }, + { + "epoch": 1.6669313859692303, + "grad_norm": 1.445602038074867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343710 + }, + { + "epoch": 1.6669798841620664, + "grad_norm": 1.9931214012558485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343720 + }, + { + "epoch": 1.6670283823549026, + "grad_norm": 1.5115837470602855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343730 + }, + { + "epoch": 1.6670768805477385, + "grad_norm": 3.031298092537327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343740 + }, + { + "epoch": 1.6671253787405746, + "grad_norm": 1.414328352211669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343750 + }, + { + "epoch": 1.6671738769334108, + "grad_norm": 1.682192021235096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343760 + }, + { + "epoch": 1.6672223751262467, + "grad_norm": 1.3062947346043075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343770 + }, + { + "epoch": 1.667270873319083, + "grad_norm": 1.2280391104013688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343780 + }, + { + "epoch": 1.667319371511919, + "grad_norm": 2.0835916814121447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343790 + }, + { + "epoch": 1.6673678697047551, + "grad_norm": 1.2671920046614105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343800 + }, + { + "epoch": 1.6674163678975913, + "grad_norm": 1.2828188289404352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343810 + }, + { + "epoch": 1.6674648660904272, + "grad_norm": 1.1209396433287111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343820 + }, + { + "epoch": 1.6675133642832634, + "grad_norm": 1.2802784965515457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343830 + }, + { + "epoch": 1.6675618624760995, + "grad_norm": 1.810240064514801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343840 + }, + { + "epoch": 1.6676103606689354, + "grad_norm": 1.368092199527382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343850 + }, + { + "epoch": 1.6676588588617718, + "grad_norm": 1.1358417140172605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343860 + }, + { + "epoch": 1.6677073570546077, + "grad_norm": 1.2311598140968272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343870 + }, + { + "epoch": 1.6677558552474439, + "grad_norm": 1.2731992171666207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343880 + }, + { + "epoch": 1.66780435344028, + "grad_norm": 1.623804308792387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343890 + }, + { + "epoch": 1.667852851633116, + "grad_norm": 1.1481412087732679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343900 + }, + { + "epoch": 1.667901349825952, + "grad_norm": 1.2342003685716918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343910 + }, + { + "epoch": 1.6679498480187882, + "grad_norm": 1.126150195318587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343920 + }, + { + "epoch": 1.6679983462116241, + "grad_norm": 1.024663873749887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343930 + }, + { + "epoch": 1.6680468444044605, + "grad_norm": 1.4790235525197204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343940 + }, + { + "epoch": 1.6680953425972964, + "grad_norm": 1.2044877450989588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343950 + }, + { + "epoch": 1.6681438407901326, + "grad_norm": 1.1948169742481696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343960 + }, + { + "epoch": 1.6681923389829687, + "grad_norm": 1.1224977214396858e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343970 + }, + { + "epoch": 1.6682408371758046, + "grad_norm": 1.0892943436147107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343980 + }, + { + "epoch": 1.668289335368641, + "grad_norm": 1.5100656014510605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 343990 + }, + { + "epoch": 1.668337833561477, + "grad_norm": 1.0783642068190602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344000 + }, + { + "epoch": 1.668386331754313, + "grad_norm": 1.1014363821004736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344010 + }, + { + "epoch": 1.6684348299471492, + "grad_norm": 1.0159386931718473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344020 + }, + { + "epoch": 1.6684833281399851, + "grad_norm": 1.4372912460203224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344030 + }, + { + "epoch": 1.6685318263328213, + "grad_norm": 1.349312128695601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344040 + }, + { + "epoch": 1.6685803245256574, + "grad_norm": 1.0146395368337835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344050 + }, + { + "epoch": 1.6686288227184933, + "grad_norm": 1.2129162030305451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344060 + }, + { + "epoch": 1.6686773209113297, + "grad_norm": 1.1397138166557852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344070 + }, + { + "epoch": 1.6687258191041656, + "grad_norm": 1.0332789202038839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344080 + }, + { + "epoch": 1.6687743172970018, + "grad_norm": 1.343375970463967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344090 + }, + { + "epoch": 1.668822815489838, + "grad_norm": 1.0269267392004622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344100 + }, + { + "epoch": 1.6688713136826738, + "grad_norm": 9.708357850968241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344110 + }, + { + "epoch": 1.66891981187551, + "grad_norm": 1.0094075975075611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344120 + }, + { + "epoch": 1.6689683100683461, + "grad_norm": 1.2593763187851437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344130 + }, + { + "epoch": 1.669016808261182, + "grad_norm": 1.1849968473143235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344140 + }, + { + "epoch": 1.6690653064540184, + "grad_norm": 1.0832131636107079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344150 + }, + { + "epoch": 1.6691138046468543, + "grad_norm": 1.020743383151057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344160 + }, + { + "epoch": 1.6691623028396905, + "grad_norm": 1.030964398296419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344170 + }, + { + "epoch": 1.6692108010325266, + "grad_norm": 1.0743210054897645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344180 + }, + { + "epoch": 1.6692592992253625, + "grad_norm": 1.7864925894173211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344190 + }, + { + "epoch": 1.6693077974181987, + "grad_norm": 1.2103747337732784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344200 + }, + { + "epoch": 1.6693562956110348, + "grad_norm": 9.996631433750736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344210 + }, + { + "epoch": 1.6694047938038707, + "grad_norm": 9.465881589676428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344220 + }, + { + "epoch": 1.6694532919967071, + "grad_norm": 1.9761023395403754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344230 + }, + { + "epoch": 1.669501790189543, + "grad_norm": 1.3229445983142796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344240 + }, + { + "epoch": 1.6695502883823792, + "grad_norm": 9.21202385484321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344250 + }, + { + "epoch": 1.6695987865752153, + "grad_norm": 9.308728010637424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344260 + }, + { + "epoch": 1.6696472847680512, + "grad_norm": 9.33249708623407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344270 + }, + { + "epoch": 1.6696957829608874, + "grad_norm": 8.443056742635235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344280 + }, + { + "epoch": 1.6697442811537235, + "grad_norm": 1.3407647259100486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344290 + }, + { + "epoch": 1.6697927793465595, + "grad_norm": 9.082832264084573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344300 + }, + { + "epoch": 1.6698412775393958, + "grad_norm": 9.529455269330356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344310 + }, + { + "epoch": 1.6698897757322317, + "grad_norm": 9.14456919076656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344320 + }, + { + "epoch": 1.6699382739250679, + "grad_norm": 8.646025406733315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344330 + }, + { + "epoch": 1.669986772117904, + "grad_norm": 1.4796586356169428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344340 + }, + { + "epoch": 1.67003527031074, + "grad_norm": 8.510102844638823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344350 + }, + { + "epoch": 1.670083768503576, + "grad_norm": 2.388078996773402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344360 + }, + { + "epoch": 1.6701322666964122, + "grad_norm": 8.813936602791728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344370 + }, + { + "epoch": 1.6701807648892482, + "grad_norm": 9.01857575286158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344380 + }, + { + "epoch": 1.6702292630820845, + "grad_norm": 1.1922060139113455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344390 + }, + { + "epoch": 1.6702777612749204, + "grad_norm": 8.566559017708641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344400 + }, + { + "epoch": 1.6703262594677566, + "grad_norm": 8.367768344896831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344410 + }, + { + "epoch": 1.6703747576605927, + "grad_norm": 8.350234992349215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344420 + }, + { + "epoch": 1.6704232558534287, + "grad_norm": 8.793259098638373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344430 + }, + { + "epoch": 1.6704717540462648, + "grad_norm": 9.808739065420014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344440 + }, + { + "epoch": 1.670520252239101, + "grad_norm": 8.23496080215591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344450 + }, + { + "epoch": 1.6705687504319369, + "grad_norm": 8.301935849885922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344460 + }, + { + "epoch": 1.6706172486247732, + "grad_norm": 8.54115853599069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344470 + }, + { + "epoch": 1.6706657468176092, + "grad_norm": 8.177843113799099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344480 + }, + { + "epoch": 1.6707142450104453, + "grad_norm": 9.846593940210369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344490 + }, + { + "epoch": 1.6707627432032814, + "grad_norm": 8.206524171328056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344500 + }, + { + "epoch": 1.6708112413961174, + "grad_norm": 8.187575417650805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344510 + }, + { + "epoch": 1.6708597395889537, + "grad_norm": 7.645998323368985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344520 + }, + { + "epoch": 1.6709082377817897, + "grad_norm": 8.045386579169644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344530 + }, + { + "epoch": 1.6709567359746258, + "grad_norm": 9.71398890214914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344540 + }, + { + "epoch": 1.671005234167462, + "grad_norm": 8.576852650321598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344550 + }, + { + "epoch": 1.6710537323602979, + "grad_norm": 8.431997855495865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344560 + }, + { + "epoch": 1.671102230553134, + "grad_norm": 8.016644414965413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344570 + }, + { + "epoch": 1.6711507287459701, + "grad_norm": 8.091564041023958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344580 + }, + { + "epoch": 1.671199226938806, + "grad_norm": 1.0021438612284328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344590 + }, + { + "epoch": 1.6712477251316424, + "grad_norm": 8.609094948042184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344600 + }, + { + "epoch": 1.6712962233244784, + "grad_norm": 8.302626497425081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344610 + }, + { + "epoch": 1.6713447215173145, + "grad_norm": 8.78509496260449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344620 + }, + { + "epoch": 1.6713932197101506, + "grad_norm": 8.072814949855456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344630 + }, + { + "epoch": 1.6714417179029866, + "grad_norm": 9.146155832695513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344640 + }, + { + "epoch": 1.6714902160958227, + "grad_norm": 7.67176544513859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344650 + }, + { + "epoch": 1.6715387142886589, + "grad_norm": 8.115684835274806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344660 + }, + { + "epoch": 1.6715872124814948, + "grad_norm": 7.794646705860941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344670 + }, + { + "epoch": 1.6716357106743311, + "grad_norm": 7.753362041285072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344680 + }, + { + "epoch": 1.671684208867167, + "grad_norm": 9.390816302357052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344690 + }, + { + "epoch": 1.6717327070600032, + "grad_norm": 8.341613977336237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344700 + }, + { + "epoch": 1.6717812052528394, + "grad_norm": 7.508725019533813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344710 + }, + { + "epoch": 1.6718297034456753, + "grad_norm": 8.002545826002461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344720 + }, + { + "epoch": 1.6718782016385114, + "grad_norm": 7.995443240815803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344730 + }, + { + "epoch": 1.6719266998313476, + "grad_norm": 8.664163431149063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344740 + }, + { + "epoch": 1.6719751980241835, + "grad_norm": 7.511406607818572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344750 + }, + { + "epoch": 1.6720236962170199, + "grad_norm": 7.699691906282169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344760 + }, + { + "epoch": 1.6720721944098558, + "grad_norm": 7.604840845942817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344770 + }, + { + "epoch": 1.672120692602692, + "grad_norm": 7.745801156033849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344780 + }, + { + "epoch": 1.672169190795528, + "grad_norm": 9.387591148879437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344790 + }, + { + "epoch": 1.672217688988364, + "grad_norm": 7.926191614160416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344800 + }, + { + "epoch": 1.6722661871812001, + "grad_norm": 7.356135967029331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344810 + }, + { + "epoch": 1.6723146853740363, + "grad_norm": 7.342362806639358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344820 + }, + { + "epoch": 1.6723631835668722, + "grad_norm": 7.771462406935825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344830 + }, + { + "epoch": 1.6724116817597086, + "grad_norm": 9.07192969634707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344840 + }, + { + "epoch": 1.6724601799525445, + "grad_norm": 7.505937560381426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344850 + }, + { + "epoch": 1.6725086781453806, + "grad_norm": 7.417712311053037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344860 + }, + { + "epoch": 1.6725571763382168, + "grad_norm": 7.153663972303548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344870 + }, + { + "epoch": 1.6726056745310527, + "grad_norm": 7.612903374365487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344880 + }, + { + "epoch": 1.6726541727238888, + "grad_norm": 8.615212720997079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344890 + }, + { + "epoch": 1.672702670916725, + "grad_norm": 7.515868816199145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344900 + }, + { + "epoch": 1.672751169109561, + "grad_norm": 7.38531440447332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344910 + }, + { + "epoch": 1.6727996673023973, + "grad_norm": 7.437125049136739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344920 + }, + { + "epoch": 1.6728481654952332, + "grad_norm": 8.486873070978618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344930 + }, + { + "epoch": 1.6728966636880693, + "grad_norm": 8.366333759113331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344940 + }, + { + "epoch": 1.6729451618809055, + "grad_norm": 7.293684234355169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344950 + }, + { + "epoch": 1.6729936600737414, + "grad_norm": 7.452249661810129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344960 + }, + { + "epoch": 1.6730421582665775, + "grad_norm": 7.800494472576247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344970 + }, + { + "epoch": 1.6730906564594137, + "grad_norm": 7.55030171717408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344980 + }, + { + "epoch": 1.6731391546522496, + "grad_norm": 8.28379000950008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 344990 + }, + { + "epoch": 1.673187652845086, + "grad_norm": 7.00423328225952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345000 + }, + { + "epoch": 1.673236151037922, + "grad_norm": 7.042206107144011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345010 + }, + { + "epoch": 1.673284649230758, + "grad_norm": 7.252103984001224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345020 + }, + { + "epoch": 1.6733331474235942, + "grad_norm": 7.498352516677187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345030 + }, + { + "epoch": 1.67338164561643, + "grad_norm": 8.200291290449968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345040 + }, + { + "epoch": 1.6734301438092665, + "grad_norm": 7.188262429735914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345050 + }, + { + "epoch": 1.6734786420021024, + "grad_norm": 9.566818448547565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345060 + }, + { + "epoch": 1.6735271401949385, + "grad_norm": 7.160372206271859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345070 + }, + { + "epoch": 1.6735756383877747, + "grad_norm": 7.16114456622563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345080 + }, + { + "epoch": 1.6736241365806106, + "grad_norm": 7.754758257760841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345090 + }, + { + "epoch": 1.6736726347734467, + "grad_norm": 7.17802208782814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345100 + }, + { + "epoch": 1.6737211329662829, + "grad_norm": 7.624740305800515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345110 + }, + { + "epoch": 1.6737696311591188, + "grad_norm": 6.97211390843222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345120 + }, + { + "epoch": 1.6738181293519552, + "grad_norm": 7.401831680908799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345130 + }, + { + "epoch": 1.673866627544791, + "grad_norm": 8.26642505558084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345140 + }, + { + "epoch": 1.6739151257376272, + "grad_norm": 6.901496618638703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345150 + }, + { + "epoch": 1.6739636239304634, + "grad_norm": 7.022416070867621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345160 + }, + { + "epoch": 1.6740121221232993, + "grad_norm": 3.308012708203023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345170 + }, + { + "epoch": 1.6740606203161355, + "grad_norm": 6.99202971077284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345180 + }, + { + "epoch": 1.6741091185089716, + "grad_norm": 7.754825759320738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345190 + }, + { + "epoch": 1.6741576167018075, + "grad_norm": 7.823831538189552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345200 + }, + { + "epoch": 1.6742061148946439, + "grad_norm": 7.159203363471534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345210 + }, + { + "epoch": 1.6742546130874798, + "grad_norm": 7.034522297999501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345220 + }, + { + "epoch": 1.674303111280316, + "grad_norm": 7.768829135557098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345230 + }, + { + "epoch": 1.674351609473152, + "grad_norm": 7.682587011004216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345240 + }, + { + "epoch": 1.674400107665988, + "grad_norm": 6.696994603316853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345250 + }, + { + "epoch": 1.6744486058588242, + "grad_norm": 6.706306265868989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345260 + }, + { + "epoch": 1.6744971040516603, + "grad_norm": 7.072470253888241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345270 + }, + { + "epoch": 1.6745456022444962, + "grad_norm": 7.595618001232651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345280 + }, + { + "epoch": 1.6745941004373326, + "grad_norm": 7.230968179783304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345290 + }, + { + "epoch": 1.6746425986301685, + "grad_norm": 6.625366921753084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345300 + }, + { + "epoch": 1.6746910968230047, + "grad_norm": 6.671805152791421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345310 + }, + { + "epoch": 1.6747395950158408, + "grad_norm": 7.083161790433223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345320 + }, + { + "epoch": 1.6747880932086767, + "grad_norm": 6.693466758633804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345330 + }, + { + "epoch": 1.6748365914015129, + "grad_norm": 7.589729023038672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345340 + }, + { + "epoch": 1.674885089594349, + "grad_norm": 1.8079933283843275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345350 + }, + { + "epoch": 1.674933587787185, + "grad_norm": 7.082765307586669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345360 + }, + { + "epoch": 1.6749820859800213, + "grad_norm": 6.789304762833126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345370 + }, + { + "epoch": 1.6750305841728572, + "grad_norm": 6.796464191438645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345380 + }, + { + "epoch": 1.6750790823656934, + "grad_norm": 7.773312660219744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345390 + }, + { + "epoch": 1.6751275805585295, + "grad_norm": 6.577289468623349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345400 + }, + { + "epoch": 1.6751760787513654, + "grad_norm": 6.962999776760626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345410 + }, + { + "epoch": 1.6752245769442016, + "grad_norm": 6.692824428000677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345420 + }, + { + "epoch": 1.6752730751370377, + "grad_norm": 6.605549884852735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345430 + }, + { + "epoch": 1.6753215733298736, + "grad_norm": 7.433308013560236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345440 + }, + { + "epoch": 1.67537007152271, + "grad_norm": 6.504426153242093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345450 + }, + { + "epoch": 1.675418569715546, + "grad_norm": 6.368232874365276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345460 + }, + { + "epoch": 1.675467067908382, + "grad_norm": 6.345969438825705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345470 + }, + { + "epoch": 1.6755155661012182, + "grad_norm": 6.438985877821324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345480 + }, + { + "epoch": 1.6755640642940541, + "grad_norm": 7.46474952961762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345490 + }, + { + "epoch": 1.6756125624868903, + "grad_norm": 6.516383166399464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345500 + }, + { + "epoch": 1.6756610606797264, + "grad_norm": 6.450024159221357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345510 + }, + { + "epoch": 1.6757095588725626, + "grad_norm": 7.107151844820692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345520 + }, + { + "epoch": 1.6757580570653987, + "grad_norm": 6.35577563912193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345530 + }, + { + "epoch": 1.6758065552582346, + "grad_norm": 7.59705116593068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345540 + }, + { + "epoch": 1.6758550534510708, + "grad_norm": 6.585746348264365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345550 + }, + { + "epoch": 1.675903551643907, + "grad_norm": 6.35579908703221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345560 + }, + { + "epoch": 1.6759520498367428, + "grad_norm": 6.55375202995856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345570 + }, + { + "epoch": 1.6760005480295792, + "grad_norm": 6.445253575293464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345580 + }, + { + "epoch": 1.6760490462224151, + "grad_norm": 7.188585726680685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345590 + }, + { + "epoch": 1.6760975444152513, + "grad_norm": 6.285335274469617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345600 + }, + { + "epoch": 1.6761460426080874, + "grad_norm": 6.800960505870535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345610 + }, + { + "epoch": 1.6761945408009233, + "grad_norm": 6.350102665919621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345620 + }, + { + "epoch": 1.6762430389937595, + "grad_norm": 6.412277286926837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345630 + }, + { + "epoch": 1.6762915371865956, + "grad_norm": 7.810368884975105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345640 + }, + { + "epoch": 1.6763400353794315, + "grad_norm": 6.716437184195456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345650 + }, + { + "epoch": 1.676388533572268, + "grad_norm": 6.2427680802557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345660 + }, + { + "epoch": 1.6764370317651038, + "grad_norm": 6.407847763512109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345670 + }, + { + "epoch": 1.67648552995794, + "grad_norm": 6.335258717626857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345680 + }, + { + "epoch": 1.6765340281507761, + "grad_norm": 7.305390425926817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345690 + }, + { + "epoch": 1.676582526343612, + "grad_norm": 6.334049373890593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345700 + }, + { + "epoch": 1.6766310245364482, + "grad_norm": 6.385958783994283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345710 + }, + { + "epoch": 1.6766795227292843, + "grad_norm": 6.589785783717161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345720 + }, + { + "epoch": 1.6767280209221203, + "grad_norm": 6.27700416089283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345730 + }, + { + "epoch": 1.6767765191149566, + "grad_norm": 7.076215524648433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345740 + }, + { + "epoch": 1.6768250173077925, + "grad_norm": 6.253862494531859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345750 + }, + { + "epoch": 1.6768735155006287, + "grad_norm": 6.933794338692678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345760 + }, + { + "epoch": 1.6769220136934648, + "grad_norm": 6.315688239055817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345770 + }, + { + "epoch": 1.6769705118863008, + "grad_norm": 6.042569111741614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345780 + }, + { + "epoch": 1.677019010079137, + "grad_norm": 6.661233697968783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345790 + }, + { + "epoch": 1.677067508271973, + "grad_norm": 6.112424699722396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345800 + }, + { + "epoch": 1.677116006464809, + "grad_norm": 6.136114905075374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345810 + }, + { + "epoch": 1.6771645046576453, + "grad_norm": 6.48585825047121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345820 + }, + { + "epoch": 1.6772130028504812, + "grad_norm": 7.680551306066263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345830 + }, + { + "epoch": 1.6772615010433174, + "grad_norm": 6.992720358311999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345840 + }, + { + "epoch": 1.6773099992361535, + "grad_norm": 6.197682012043515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345850 + }, + { + "epoch": 1.6773584974289895, + "grad_norm": 6.21261122546457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345860 + }, + { + "epoch": 1.6774069956218256, + "grad_norm": 6.315755030072978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345870 + }, + { + "epoch": 1.6774554938146617, + "grad_norm": 6.233477023442902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345880 + }, + { + "epoch": 1.6775039920074977, + "grad_norm": 6.936007679314571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345890 + }, + { + "epoch": 1.677552490200334, + "grad_norm": 6.310754230298699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345900 + }, + { + "epoch": 1.67760098839317, + "grad_norm": 5.924678347923873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345910 + }, + { + "epoch": 1.677649486586006, + "grad_norm": 5.9558324494446424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345920 + }, + { + "epoch": 1.6776979847788422, + "grad_norm": 5.91227085067203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345930 + }, + { + "epoch": 1.6777464829716782, + "grad_norm": 6.993075629679879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345940 + }, + { + "epoch": 1.6777949811645143, + "grad_norm": 6.082598247303395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345950 + }, + { + "epoch": 1.6778434793573505, + "grad_norm": 6.16784774365442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345960 + }, + { + "epoch": 1.6778919775501864, + "grad_norm": 6.167417865299285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345970 + }, + { + "epoch": 1.6779404757430227, + "grad_norm": 6.330336077553511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345980 + }, + { + "epoch": 1.6779889739358587, + "grad_norm": 6.846602218502085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 345990 + }, + { + "epoch": 1.6780374721286948, + "grad_norm": 5.916417933349294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346000 + }, + { + "epoch": 1.678085970321531, + "grad_norm": 6.061378599042655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346010 + }, + { + "epoch": 1.6781344685143669, + "grad_norm": 6.134381180800119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346020 + }, + { + "epoch": 1.6781829667072032, + "grad_norm": 5.983537221254664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346030 + }, + { + "epoch": 1.6782314649000392, + "grad_norm": 6.599616853009138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346040 + }, + { + "epoch": 1.6782799630928753, + "grad_norm": 5.979754291729478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346050 + }, + { + "epoch": 1.6783284612857114, + "grad_norm": 5.907084243972349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346060 + }, + { + "epoch": 1.6783769594785474, + "grad_norm": 5.985676665432038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346070 + }, + { + "epoch": 1.6784254576713835, + "grad_norm": 5.834411354044278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346080 + }, + { + "epoch": 1.6784739558642197, + "grad_norm": 6.908922500770132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346090 + }, + { + "epoch": 1.6785224540570556, + "grad_norm": 5.817276971242791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346100 + }, + { + "epoch": 1.678570952249892, + "grad_norm": 5.933142688263615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346110 + }, + { + "epoch": 1.6786194504427279, + "grad_norm": 5.9070210056688666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346120 + }, + { + "epoch": 1.678667948635564, + "grad_norm": 5.7979431034027584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346130 + }, + { + "epoch": 1.6787164468284002, + "grad_norm": 6.680257058633288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346140 + }, + { + "epoch": 1.678764945021236, + "grad_norm": 6.421504394893418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346150 + }, + { + "epoch": 1.6788134432140722, + "grad_norm": 5.657349788634747e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346160 + }, + { + "epoch": 1.6788619414069084, + "grad_norm": 5.7589826241155606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346170 + }, + { + "epoch": 1.6789104395997443, + "grad_norm": 5.674000647104549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346180 + }, + { + "epoch": 1.6789589377925807, + "grad_norm": 6.57557137628828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346190 + }, + { + "epoch": 1.6790074359854166, + "grad_norm": 5.729868846060526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346200 + }, + { + "epoch": 1.6790559341782527, + "grad_norm": 5.8664941349206856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346210 + }, + { + "epoch": 1.6791044323710889, + "grad_norm": 5.794355217858538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346220 + }, + { + "epoch": 1.6791529305639248, + "grad_norm": 5.7364136551996125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346230 + }, + { + "epoch": 1.679201428756761, + "grad_norm": 6.340909664004357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346240 + }, + { + "epoch": 1.679249926949597, + "grad_norm": 6.176871636398573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346250 + }, + { + "epoch": 1.679298425142433, + "grad_norm": 6.280943409819884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346260 + }, + { + "epoch": 1.6793469233352694, + "grad_norm": 5.636948330334235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346270 + }, + { + "epoch": 1.6793954215281053, + "grad_norm": 5.677672376691589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346280 + }, + { + "epoch": 1.6794439197209414, + "grad_norm": 6.329756274681131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346290 + }, + { + "epoch": 1.6794924179137776, + "grad_norm": 5.660520585593076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346300 + }, + { + "epoch": 1.6795409161066135, + "grad_norm": 5.4871694743496846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346310 + }, + { + "epoch": 1.6795894142994496, + "grad_norm": 6.626121518138461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346320 + }, + { + "epoch": 1.6796379124922858, + "grad_norm": 5.777705425202839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346330 + }, + { + "epoch": 1.6796864106851217, + "grad_norm": 6.749329628519263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346340 + }, + { + "epoch": 1.679734908877958, + "grad_norm": 5.6341061593911945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346350 + }, + { + "epoch": 1.679783407070794, + "grad_norm": 5.551199677711338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346360 + }, + { + "epoch": 1.6798319052636301, + "grad_norm": 5.699462946040512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346370 + }, + { + "epoch": 1.6798804034564663, + "grad_norm": 5.621041054837406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346380 + }, + { + "epoch": 1.6799289016493022, + "grad_norm": 6.646666861342965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346390 + }, + { + "epoch": 1.6799773998421383, + "grad_norm": 6.211301695202565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346400 + }, + { + "epoch": 1.6800258980349745, + "grad_norm": 5.633944866190177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346410 + }, + { + "epoch": 1.6800743962278104, + "grad_norm": 5.730065666398332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346420 + }, + { + "epoch": 1.6801228944206468, + "grad_norm": 5.477071240989062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346430 + }, + { + "epoch": 1.6801713926134827, + "grad_norm": 6.155595144718973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346440 + }, + { + "epoch": 1.6802198908063188, + "grad_norm": 6.922540052300974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346450 + }, + { + "epoch": 1.680268388999155, + "grad_norm": 5.472616138035846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346460 + }, + { + "epoch": 1.680316887191991, + "grad_norm": 5.61638486828997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346470 + }, + { + "epoch": 1.680365385384827, + "grad_norm": 5.500524125068296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346480 + }, + { + "epoch": 1.6804138835776632, + "grad_norm": 6.167147859059696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346490 + }, + { + "epoch": 1.6804623817704991, + "grad_norm": 5.585861728718555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346500 + }, + { + "epoch": 1.6805108799633355, + "grad_norm": 5.5305399371263775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346510 + }, + { + "epoch": 1.6805593781561714, + "grad_norm": 5.593548735305376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346520 + }, + { + "epoch": 1.6806078763490075, + "grad_norm": 5.53132046832161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346530 + }, + { + "epoch": 1.6806563745418437, + "grad_norm": 6.066010627137075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346540 + }, + { + "epoch": 1.6807048727346796, + "grad_norm": 5.4301782625998385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346550 + }, + { + "epoch": 1.680753370927516, + "grad_norm": 5.4007937677624795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346560 + }, + { + "epoch": 1.680801869120352, + "grad_norm": 5.50548726607758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346570 + }, + { + "epoch": 1.680850367313188, + "grad_norm": 6.066927227266206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346580 + }, + { + "epoch": 1.6808988655060242, + "grad_norm": 6.191539370092869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346590 + }, + { + "epoch": 1.68094736369886, + "grad_norm": 9.427262170902395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346600 + }, + { + "epoch": 1.6809958618916963, + "grad_norm": 5.4625129308760734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346610 + }, + { + "epoch": 1.6810443600845324, + "grad_norm": 5.391740387494792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346620 + }, + { + "epoch": 1.6810928582773683, + "grad_norm": 5.2749662415862986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346630 + }, + { + "epoch": 1.6811413564702047, + "grad_norm": 5.725664564693034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346640 + }, + { + "epoch": 1.6811898546630406, + "grad_norm": 5.596555396891745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346650 + }, + { + "epoch": 1.6812383528558767, + "grad_norm": 5.403300917805609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346660 + }, + { + "epoch": 1.681286851048713, + "grad_norm": 6.567528743062212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346670 + }, + { + "epoch": 1.6813353492415488, + "grad_norm": 5.253419743667109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346680 + }, + { + "epoch": 1.681383847434385, + "grad_norm": 5.924350077179952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346690 + }, + { + "epoch": 1.681432345627221, + "grad_norm": 5.299278171833066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346700 + }, + { + "epoch": 1.681480843820057, + "grad_norm": 5.292502791576226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346710 + }, + { + "epoch": 1.6815293420128934, + "grad_norm": 5.2928115223949135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346720 + }, + { + "epoch": 1.6815778402057293, + "grad_norm": 6.088863102604591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346730 + }, + { + "epoch": 1.6816263383985655, + "grad_norm": 5.6675212078971526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346740 + }, + { + "epoch": 1.6816748365914016, + "grad_norm": 5.176758577363216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346750 + }, + { + "epoch": 1.6817233347842375, + "grad_norm": 5.4734478283080534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346760 + }, + { + "epoch": 1.6817718329770737, + "grad_norm": 5.457940588371457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346770 + }, + { + "epoch": 1.6818203311699098, + "grad_norm": 5.2767052949320714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346780 + }, + { + "epoch": 1.6818688293627457, + "grad_norm": 5.910898792649277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346790 + }, + { + "epoch": 1.681917327555582, + "grad_norm": 5.085445309305214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346800 + }, + { + "epoch": 1.681965825748418, + "grad_norm": 5.123028401499141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346810 + }, + { + "epoch": 1.6820143239412542, + "grad_norm": 5.396969271487251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346820 + }, + { + "epoch": 1.6820628221340903, + "grad_norm": 6.76693829859687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346830 + }, + { + "epoch": 1.6821113203269262, + "grad_norm": 5.6864294606384647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346840 + }, + { + "epoch": 1.6821598185197624, + "grad_norm": 5.2804161043695785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346850 + }, + { + "epoch": 1.6822083167125985, + "grad_norm": 5.227293442544578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346860 + }, + { + "epoch": 1.6822568149054344, + "grad_norm": 5.2557560081822885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346870 + }, + { + "epoch": 1.6823053130982708, + "grad_norm": 5.2838604602811756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346880 + }, + { + "epoch": 1.6823538112911067, + "grad_norm": 5.603740049764383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346890 + }, + { + "epoch": 1.6824023094839429, + "grad_norm": 5.1934001987774536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346900 + }, + { + "epoch": 1.682450807676779, + "grad_norm": 5.267803260267101e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346910 + }, + { + "epoch": 1.682499305869615, + "grad_norm": 5.0567980025562065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346920 + }, + { + "epoch": 1.682547804062451, + "grad_norm": 5.05726660549044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346930 + }, + { + "epoch": 1.6825963022552872, + "grad_norm": 5.516715972930797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346940 + }, + { + "epoch": 1.6826448004481231, + "grad_norm": 5.139745340443369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346950 + }, + { + "epoch": 1.6826932986409595, + "grad_norm": 5.140129744063415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346960 + }, + { + "epoch": 1.6827417968337954, + "grad_norm": 5.1265754308360556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346970 + }, + { + "epoch": 1.6827902950266316, + "grad_norm": 4.9016982472949167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346980 + }, + { + "epoch": 1.6828387932194677, + "grad_norm": 5.731595464908423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 346990 + }, + { + "epoch": 1.6828872914123036, + "grad_norm": 5.116028134466433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347000 + }, + { + "epoch": 1.6829357896051398, + "grad_norm": 5.053561835666187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347010 + }, + { + "epoch": 1.682984287797976, + "grad_norm": 4.9584418349013504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347020 + }, + { + "epoch": 1.6830327859908119, + "grad_norm": 4.946351950252392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347030 + }, + { + "epoch": 1.6830812841836482, + "grad_norm": 5.8042221695586704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347040 + }, + { + "epoch": 1.6831297823764841, + "grad_norm": 4.963570532368067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347050 + }, + { + "epoch": 1.6831782805693203, + "grad_norm": 5.1169909198733876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347060 + }, + { + "epoch": 1.6832267787621564, + "grad_norm": 5.2451557763788514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347070 + }, + { + "epoch": 1.6832752769549923, + "grad_norm": 4.926919672243457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347080 + }, + { + "epoch": 1.6833237751478287, + "grad_norm": 5.459368779270335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347090 + }, + { + "epoch": 1.6833722733406646, + "grad_norm": 6.852904022025541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347100 + }, + { + "epoch": 1.6834207715335008, + "grad_norm": 5.152513793404978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347110 + }, + { + "epoch": 1.683469269726337, + "grad_norm": 4.924607210909926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347120 + }, + { + "epoch": 1.6835177679191728, + "grad_norm": 4.796248020966232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347130 + }, + { + "epoch": 1.683566266112009, + "grad_norm": 5.455828855360778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347140 + }, + { + "epoch": 1.6836147643048451, + "grad_norm": 4.9056197326535766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347150 + }, + { + "epoch": 1.683663262497681, + "grad_norm": 5.1192184713499955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347160 + }, + { + "epoch": 1.6837117606905174, + "grad_norm": 5.151416004878229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347170 + }, + { + "epoch": 1.6837602588833533, + "grad_norm": 4.7767272803866945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347180 + }, + { + "epoch": 1.6838087570761895, + "grad_norm": 5.303227368358421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347190 + }, + { + "epoch": 1.6838572552690256, + "grad_norm": 4.792699215272478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347200 + }, + { + "epoch": 1.6839057534618616, + "grad_norm": 4.8515879313981713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347210 + }, + { + "epoch": 1.6839542516546977, + "grad_norm": 4.897728800301593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347220 + }, + { + "epoch": 1.6840027498475338, + "grad_norm": 4.719404600450616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347230 + }, + { + "epoch": 1.6840512480403698, + "grad_norm": 5.216134013608098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347240 + }, + { + "epoch": 1.6840997462332061, + "grad_norm": 5.0369742154998676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347250 + }, + { + "epoch": 1.684148244426042, + "grad_norm": 5.0351058433761864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347260 + }, + { + "epoch": 1.6841967426188782, + "grad_norm": 4.862766900259885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347270 + }, + { + "epoch": 1.6842452408117143, + "grad_norm": 4.6399943443020675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347280 + }, + { + "epoch": 1.6842937390045503, + "grad_norm": 5.574531058982757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347290 + }, + { + "epoch": 1.6843422371973864, + "grad_norm": 4.802454611763096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347300 + }, + { + "epoch": 1.6843907353902225, + "grad_norm": 4.651341711792156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347310 + }, + { + "epoch": 1.6844392335830585, + "grad_norm": 4.808953590895726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347320 + }, + { + "epoch": 1.6844877317758948, + "grad_norm": 4.8103856897796504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347330 + }, + { + "epoch": 1.6845362299687308, + "grad_norm": 5.257374979805718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347340 + }, + { + "epoch": 1.684584728161567, + "grad_norm": 4.786639706821916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347350 + }, + { + "epoch": 1.684633226354403, + "grad_norm": 4.70463810131605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347360 + }, + { + "epoch": 1.684681724547239, + "grad_norm": 4.756377691705893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347370 + }, + { + "epoch": 1.6847302227400751, + "grad_norm": 4.698823730109325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347380 + }, + { + "epoch": 1.6847787209329113, + "grad_norm": 5.1006761481176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347390 + }, + { + "epoch": 1.6848272191257472, + "grad_norm": 4.7647024103980584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347400 + }, + { + "epoch": 1.6848757173185835, + "grad_norm": 4.745211512613423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347410 + }, + { + "epoch": 1.6849242155114195, + "grad_norm": 4.7261714541946276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347420 + }, + { + "epoch": 1.6849727137042556, + "grad_norm": 4.638283002122989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347430 + }, + { + "epoch": 1.6850212118970918, + "grad_norm": 5.203724384728048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347440 + }, + { + "epoch": 1.6850697100899277, + "grad_norm": 4.861649571807902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347450 + }, + { + "epoch": 1.6851182082827638, + "grad_norm": 4.786749130403223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347460 + }, + { + "epoch": 1.6851667064756, + "grad_norm": 4.700875422258832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347470 + }, + { + "epoch": 1.6852152046684359, + "grad_norm": 4.545918841358798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347480 + }, + { + "epoch": 1.6852637028612723, + "grad_norm": 5.140544345749731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347490 + }, + { + "epoch": 1.6853122010541082, + "grad_norm": 4.720516955103449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347500 + }, + { + "epoch": 1.6853606992469443, + "grad_norm": 4.5174843421591504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347510 + }, + { + "epoch": 1.6854091974397805, + "grad_norm": 4.873728443044456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347520 + }, + { + "epoch": 1.6854576956326164, + "grad_norm": 4.7503551314775905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347530 + }, + { + "epoch": 1.6855061938254525, + "grad_norm": 4.9282363079328206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347540 + }, + { + "epoch": 1.6855546920182887, + "grad_norm": 4.695133526411155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347550 + }, + { + "epoch": 1.6856031902111248, + "grad_norm": 4.5760753408785604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347560 + }, + { + "epoch": 1.685651688403961, + "grad_norm": 4.6805329390053885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347570 + }, + { + "epoch": 1.6857001865967969, + "grad_norm": 4.626387095640894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347580 + }, + { + "epoch": 1.685748684789633, + "grad_norm": 5.381010481642079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347590 + }, + { + "epoch": 1.6857971829824692, + "grad_norm": 4.581751866794548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347600 + }, + { + "epoch": 1.685845681175305, + "grad_norm": 4.5950482530088266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347610 + }, + { + "epoch": 1.6858941793681415, + "grad_norm": 4.628283889474005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347620 + }, + { + "epoch": 1.6859426775609774, + "grad_norm": 4.60059972340332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347630 + }, + { + "epoch": 1.6859911757538135, + "grad_norm": 5.049566453863008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347640 + }, + { + "epoch": 1.6860396739466497, + "grad_norm": 4.586437540865518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347650 + }, + { + "epoch": 1.6860881721394856, + "grad_norm": 4.6806100328922184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347660 + }, + { + "epoch": 1.6861366703323217, + "grad_norm": 4.682472010131278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347670 + }, + { + "epoch": 1.6861851685251579, + "grad_norm": 4.752050841716482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347680 + }, + { + "epoch": 1.6862336667179938, + "grad_norm": 4.9085524977954265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347690 + }, + { + "epoch": 1.6862821649108302, + "grad_norm": 4.5949207105877576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347700 + }, + { + "epoch": 1.686330663103666, + "grad_norm": 4.5767887257852635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347710 + }, + { + "epoch": 1.6863791612965022, + "grad_norm": 4.4534584020539114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347720 + }, + { + "epoch": 1.6864276594893384, + "grad_norm": 5.808879066648842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347730 + }, + { + "epoch": 1.6864761576821743, + "grad_norm": 4.938418740607631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347740 + }, + { + "epoch": 1.6865246558750104, + "grad_norm": 5.003584391261029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347750 + }, + { + "epoch": 1.6865731540678466, + "grad_norm": 4.619872484568077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347760 + }, + { + "epoch": 1.6866216522606825, + "grad_norm": 4.601383096769496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347770 + }, + { + "epoch": 1.6866701504535189, + "grad_norm": 4.4631349283008603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347780 + }, + { + "epoch": 1.6867186486463548, + "grad_norm": 4.8266187491208257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347790 + }, + { + "epoch": 1.686767146839191, + "grad_norm": 4.4509025798333823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347800 + }, + { + "epoch": 1.686815645032027, + "grad_norm": 4.326654590158796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347810 + }, + { + "epoch": 1.686864143224863, + "grad_norm": 4.5161154815787086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347820 + }, + { + "epoch": 1.6869126414176991, + "grad_norm": 4.4493972950476746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347830 + }, + { + "epoch": 1.6869611396105353, + "grad_norm": 4.952081056330826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347840 + }, + { + "epoch": 1.6870096378033712, + "grad_norm": 4.402242836931691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347850 + }, + { + "epoch": 1.6870581359962076, + "grad_norm": 4.370863493363686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347860 + }, + { + "epoch": 1.6871066341890435, + "grad_norm": 4.5821526128975165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347870 + }, + { + "epoch": 1.6871551323818796, + "grad_norm": 4.280850873783493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347880 + }, + { + "epoch": 1.6872036305747158, + "grad_norm": 4.7438298622637376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347890 + }, + { + "epoch": 1.6872521287675517, + "grad_norm": 4.361019989573833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347900 + }, + { + "epoch": 1.6873006269603879, + "grad_norm": 4.300386535760481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347910 + }, + { + "epoch": 1.687349125153224, + "grad_norm": 4.4281879496566035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347920 + }, + { + "epoch": 1.68739762334606, + "grad_norm": 4.59198759017454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347930 + }, + { + "epoch": 1.6874461215388963, + "grad_norm": 4.803462161362404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347940 + }, + { + "epoch": 1.6874946197317322, + "grad_norm": 4.7568384786700335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347950 + }, + { + "epoch": 1.6875431179245683, + "grad_norm": 4.6516966278886684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347960 + }, + { + "epoch": 1.6875916161174045, + "grad_norm": 4.254653163116018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347970 + }, + { + "epoch": 1.6876401143102404, + "grad_norm": 4.261678299144478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347980 + }, + { + "epoch": 1.6876886125030766, + "grad_norm": 4.620148885692288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 347990 + }, + { + "epoch": 1.6877371106959127, + "grad_norm": 4.416679288965497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348000 + }, + { + "epoch": 1.6877856088887486, + "grad_norm": 5.424809046417067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348010 + }, + { + "epoch": 1.687834107081585, + "grad_norm": 4.3797378168619616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348020 + }, + { + "epoch": 1.687882605274421, + "grad_norm": 4.145879373140815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348030 + }, + { + "epoch": 1.687931103467257, + "grad_norm": 4.915173335007239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348040 + }, + { + "epoch": 1.6879796016600932, + "grad_norm": 4.173004342078457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348050 + }, + { + "epoch": 1.6880280998529291, + "grad_norm": 4.458340541191319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348060 + }, + { + "epoch": 1.6880765980457653, + "grad_norm": 4.60367992616284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348070 + }, + { + "epoch": 1.6881250962386014, + "grad_norm": 4.0304612980435195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348080 + }, + { + "epoch": 1.6881735944314376, + "grad_norm": 4.442787826519634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348090 + }, + { + "epoch": 1.6882220926242737, + "grad_norm": 4.195156222408514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348100 + }, + { + "epoch": 1.6882705908171096, + "grad_norm": 4.2809787714759295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348110 + }, + { + "epoch": 1.6883190890099458, + "grad_norm": 4.678434706306689e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348120 + }, + { + "epoch": 1.688367587202782, + "grad_norm": 4.1689030894076495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348130 + }, + { + "epoch": 1.6884160853956178, + "grad_norm": 4.5543970372818876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348140 + }, + { + "epoch": 1.6884645835884542, + "grad_norm": 4.499833039517398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348150 + }, + { + "epoch": 1.6885130817812901, + "grad_norm": 4.23686365991216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348160 + }, + { + "epoch": 1.6885615799741263, + "grad_norm": 4.278620480135942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348170 + }, + { + "epoch": 1.6886100781669624, + "grad_norm": 4.1198106259798806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348180 + }, + { + "epoch": 1.6886585763597983, + "grad_norm": 4.346179949266116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348190 + }, + { + "epoch": 1.6887070745526345, + "grad_norm": 4.102636808056559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348200 + }, + { + "epoch": 1.6887555727454706, + "grad_norm": 4.111051410404798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348210 + }, + { + "epoch": 1.6888040709383065, + "grad_norm": 4.174109236032564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348220 + }, + { + "epoch": 1.688852569131143, + "grad_norm": 4.425455557566238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348230 + }, + { + "epoch": 1.6889010673239788, + "grad_norm": 4.624095950589435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348240 + }, + { + "epoch": 1.688949565516815, + "grad_norm": 4.370857453750432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348250 + }, + { + "epoch": 1.688998063709651, + "grad_norm": 4.382426510574078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348260 + }, + { + "epoch": 1.689046561902487, + "grad_norm": 4.204031256449525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348270 + }, + { + "epoch": 1.6890950600953232, + "grad_norm": 4.154361121777583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348280 + }, + { + "epoch": 1.6891435582881593, + "grad_norm": 4.5426414629901046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348290 + }, + { + "epoch": 1.6891920564809952, + "grad_norm": 4.1491119873171556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348300 + }, + { + "epoch": 1.6892405546738316, + "grad_norm": 4.4799278953178145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348310 + }, + { + "epoch": 1.6892890528666675, + "grad_norm": 4.1105021608700554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348320 + }, + { + "epoch": 1.6893375510595037, + "grad_norm": 4.0485435448545104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348330 + }, + { + "epoch": 1.6893860492523398, + "grad_norm": 2.0031650649343646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348340 + }, + { + "epoch": 1.6894345474451757, + "grad_norm": 4.1757601820791024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348350 + }, + { + "epoch": 1.6894830456380119, + "grad_norm": 4.052593993719711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348360 + }, + { + "epoch": 1.689531543830848, + "grad_norm": 4.021372745910412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348370 + }, + { + "epoch": 1.689580042023684, + "grad_norm": 4.0054633387853755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348380 + }, + { + "epoch": 1.6896285402165203, + "grad_norm": 4.2382957587960846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348390 + }, + { + "epoch": 1.6896770384093562, + "grad_norm": 4.088763816412211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348400 + }, + { + "epoch": 1.6897255366021924, + "grad_norm": 4.1761108349192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348410 + }, + { + "epoch": 1.6897740347950285, + "grad_norm": 3.981643104111754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348420 + }, + { + "epoch": 1.6898225329878644, + "grad_norm": 3.879068799506058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348430 + }, + { + "epoch": 1.6898710311807006, + "grad_norm": 4.3225249157785584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348440 + }, + { + "epoch": 1.6899195293735367, + "grad_norm": 3.910194834588765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348450 + }, + { + "epoch": 1.6899680275663727, + "grad_norm": 3.9669185269985974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348460 + }, + { + "epoch": 1.690016525759209, + "grad_norm": 4.626435767818293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348470 + }, + { + "epoch": 1.690065023952045, + "grad_norm": 3.9465319900955365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348480 + }, + { + "epoch": 1.690113522144881, + "grad_norm": 4.565944067280725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348490 + }, + { + "epoch": 1.6901620203377172, + "grad_norm": 4.768107331187821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348500 + }, + { + "epoch": 1.6902105185305532, + "grad_norm": 3.967129558191118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348510 + }, + { + "epoch": 1.6902590167233893, + "grad_norm": 3.9388314831967364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348520 + }, + { + "epoch": 1.6903075149162254, + "grad_norm": 3.93081194260958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348530 + }, + { + "epoch": 1.6903560131090614, + "grad_norm": 4.7428468263888135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348540 + }, + { + "epoch": 1.6904045113018977, + "grad_norm": 3.9878731428188985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348550 + }, + { + "epoch": 1.6904530094947336, + "grad_norm": 3.840043660829906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348560 + }, + { + "epoch": 1.6905015076875698, + "grad_norm": 3.869464038075421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348570 + }, + { + "epoch": 1.690550005880406, + "grad_norm": 3.7122159568525603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348580 + }, + { + "epoch": 1.6905985040732419, + "grad_norm": 4.020222732492584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348590 + }, + { + "epoch": 1.6906470022660782, + "grad_norm": 3.965968176089518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348600 + }, + { + "epoch": 1.6906955004589141, + "grad_norm": 3.9510602789505356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348610 + }, + { + "epoch": 1.6907439986517503, + "grad_norm": 4.231772621210439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348620 + }, + { + "epoch": 1.6907924968445864, + "grad_norm": 3.720817431940304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348630 + }, + { + "epoch": 1.6908409950374224, + "grad_norm": 4.3354646095394855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348640 + }, + { + "epoch": 1.6908894932302585, + "grad_norm": 3.80747628980771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348650 + }, + { + "epoch": 1.6909379914230946, + "grad_norm": 4.035767986465544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348660 + }, + { + "epoch": 1.6909864896159306, + "grad_norm": 3.851426910728151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348670 + }, + { + "epoch": 1.691034987808767, + "grad_norm": 3.739881293540748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348680 + }, + { + "epoch": 1.6910834860016029, + "grad_norm": 4.072695247714364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348690 + }, + { + "epoch": 1.691131984194439, + "grad_norm": 4.005493892123013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348700 + }, + { + "epoch": 1.6911804823872751, + "grad_norm": 3.7669348529334457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348710 + }, + { + "epoch": 1.691228980580111, + "grad_norm": 3.730370323751231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348720 + }, + { + "epoch": 1.6912774787729472, + "grad_norm": 3.6725566587847425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348730 + }, + { + "epoch": 1.6913259769657834, + "grad_norm": 4.135285536222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348740 + }, + { + "epoch": 1.6913744751586193, + "grad_norm": 3.811276272358555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348750 + }, + { + "epoch": 1.6914229733514556, + "grad_norm": 3.6922465085353906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348760 + }, + { + "epoch": 1.6914714715442916, + "grad_norm": 3.932337122591889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348770 + }, + { + "epoch": 1.6915199697371277, + "grad_norm": 3.723845054537378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348780 + }, + { + "epoch": 1.6915684679299638, + "grad_norm": 4.7117595158852055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348790 + }, + { + "epoch": 1.6916169661227998, + "grad_norm": 3.671844339692143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348800 + }, + { + "epoch": 1.691665464315636, + "grad_norm": 8.467729628591769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348810 + }, + { + "epoch": 1.691713962508472, + "grad_norm": 3.813235593952413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348820 + }, + { + "epoch": 1.691762460701308, + "grad_norm": 3.7054064705444034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348830 + }, + { + "epoch": 1.6918109588941443, + "grad_norm": 4.358829741590853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348840 + }, + { + "epoch": 1.6918594570869803, + "grad_norm": 3.6964699745567486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348850 + }, + { + "epoch": 1.6919079552798164, + "grad_norm": 3.71568837920222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348860 + }, + { + "epoch": 1.6919564534726526, + "grad_norm": 3.6438592587728635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348870 + }, + { + "epoch": 1.6920049516654885, + "grad_norm": 3.567510731272705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348880 + }, + { + "epoch": 1.6920534498583246, + "grad_norm": 4.000276732085695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348890 + }, + { + "epoch": 1.6921019480511608, + "grad_norm": 3.7374149997049244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348900 + }, + { + "epoch": 1.6921504462439967, + "grad_norm": 3.9580534405558865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348910 + }, + { + "epoch": 1.692198944436833, + "grad_norm": 3.657911662457991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348920 + }, + { + "epoch": 1.692247442629669, + "grad_norm": 3.690980321380266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348930 + }, + { + "epoch": 1.6922959408225051, + "grad_norm": 3.899085498915156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348940 + }, + { + "epoch": 1.6923444390153413, + "grad_norm": 3.8258818335634714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348950 + }, + { + "epoch": 1.6923929372081772, + "grad_norm": 3.805599746442567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348960 + }, + { + "epoch": 1.6924414354010133, + "grad_norm": 3.5881150495242764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348970 + }, + { + "epoch": 1.6924899335938495, + "grad_norm": 3.43428503413179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348980 + }, + { + "epoch": 1.6925384317866854, + "grad_norm": 3.8377617528340124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 348990 + }, + { + "epoch": 1.6925869299795218, + "grad_norm": 3.617554611423657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349000 + }, + { + "epoch": 1.6926354281723577, + "grad_norm": 3.6385454649234816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349010 + }, + { + "epoch": 1.6926839263651938, + "grad_norm": 3.595511444132171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349020 + }, + { + "epoch": 1.69273242455803, + "grad_norm": 3.5872947279358414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349030 + }, + { + "epoch": 1.692780922750866, + "grad_norm": 3.949768156985556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349040 + }, + { + "epoch": 1.692829420943702, + "grad_norm": 3.9779802563089106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349050 + }, + { + "epoch": 1.6928779191365382, + "grad_norm": 3.793326897039151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349060 + }, + { + "epoch": 1.692926417329374, + "grad_norm": 3.583179619681687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349070 + }, + { + "epoch": 1.6929749155222105, + "grad_norm": 3.593808628465922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349080 + }, + { + "epoch": 1.6930234137150464, + "grad_norm": 3.737411091719878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349090 + }, + { + "epoch": 1.6930719119078825, + "grad_norm": 3.585100927239182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349100 + }, + { + "epoch": 1.6931204101007187, + "grad_norm": 3.6183170237791273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349110 + }, + { + "epoch": 1.6931689082935546, + "grad_norm": 3.480023380575403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349120 + }, + { + "epoch": 1.693217406486391, + "grad_norm": 3.4011666372180116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349130 + }, + { + "epoch": 1.6932659046792269, + "grad_norm": 3.701216400031626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349140 + }, + { + "epoch": 1.693314402872063, + "grad_norm": 3.5374100093577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349150 + }, + { + "epoch": 1.6933629010648992, + "grad_norm": 3.4875068166684287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349160 + }, + { + "epoch": 1.693411399257735, + "grad_norm": 3.5797324215991466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349170 + }, + { + "epoch": 1.6934598974505712, + "grad_norm": 3.597075703964947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349180 + }, + { + "epoch": 1.6935083956434074, + "grad_norm": 4.30133688666956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349190 + }, + { + "epoch": 1.6935568938362433, + "grad_norm": 3.674266224606981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349200 + }, + { + "epoch": 1.6936053920290797, + "grad_norm": 3.6708389217210424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349210 + }, + { + "epoch": 1.6936538902219156, + "grad_norm": 3.6319939056284056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349220 + }, + { + "epoch": 1.6937023884147517, + "grad_norm": 3.4945152549425984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349230 + }, + { + "epoch": 1.6937508866075879, + "grad_norm": 4.047966584153073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349240 + }, + { + "epoch": 1.6937993848004238, + "grad_norm": 3.939076265169206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349250 + }, + { + "epoch": 1.69384788299326, + "grad_norm": 3.547053140096068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349260 + }, + { + "epoch": 1.693896381186096, + "grad_norm": 3.714174923175051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349270 + }, + { + "epoch": 1.693944879378932, + "grad_norm": 3.4030712470212165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349280 + }, + { + "epoch": 1.6939933775717684, + "grad_norm": 3.5543557430628425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349290 + }, + { + "epoch": 1.6940418757646043, + "grad_norm": 3.6928216928799884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349300 + }, + { + "epoch": 1.6940903739574404, + "grad_norm": 3.646787050115563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349310 + }, + { + "epoch": 1.6941388721502766, + "grad_norm": 3.41561943173474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349320 + }, + { + "epoch": 1.6941873703431125, + "grad_norm": 3.489113353793982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349330 + }, + { + "epoch": 1.6942358685359487, + "grad_norm": 4.15820942123446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349340 + }, + { + "epoch": 1.6942843667287848, + "grad_norm": 3.6536960124067264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349350 + }, + { + "epoch": 1.6943328649216207, + "grad_norm": 3.419152605488307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349360 + }, + { + "epoch": 1.694381363114457, + "grad_norm": 3.507727086571322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349370 + }, + { + "epoch": 1.694429861307293, + "grad_norm": 3.4003910798219295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349380 + }, + { + "epoch": 1.6944783595001291, + "grad_norm": 3.60673979571402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349390 + }, + { + "epoch": 1.6945268576929653, + "grad_norm": 3.4594236808516143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349400 + }, + { + "epoch": 1.6945753558858012, + "grad_norm": 3.428733208465928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349410 + }, + { + "epoch": 1.6946238540786374, + "grad_norm": 3.331135545181496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349420 + }, + { + "epoch": 1.6946723522714735, + "grad_norm": 3.207546939165695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349430 + }, + { + "epoch": 1.6947208504643094, + "grad_norm": 3.635469170149008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349440 + }, + { + "epoch": 1.6947693486571458, + "grad_norm": 3.410190174690797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349450 + }, + { + "epoch": 1.6948178468499817, + "grad_norm": 3.929291736426421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349460 + }, + { + "epoch": 1.6948663450428179, + "grad_norm": 3.4138288640406245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349470 + }, + { + "epoch": 1.694914843235654, + "grad_norm": 3.3751323513797615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349480 + }, + { + "epoch": 1.69496334142849, + "grad_norm": 3.776538548549979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349490 + }, + { + "epoch": 1.695011839621326, + "grad_norm": 3.4697929862659294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349500 + }, + { + "epoch": 1.6950603378141622, + "grad_norm": 3.33393188611808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349510 + }, + { + "epoch": 1.6951088360069981, + "grad_norm": 3.4737613674451495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349520 + }, + { + "epoch": 1.6951573341998345, + "grad_norm": 3.3586172065724895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349530 + }, + { + "epoch": 1.6952058323926704, + "grad_norm": 4.122324526179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349540 + }, + { + "epoch": 1.6952543305855066, + "grad_norm": 3.245423485509491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349550 + }, + { + "epoch": 1.6953028287783427, + "grad_norm": 3.249151703244024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349560 + }, + { + "epoch": 1.6953513269711786, + "grad_norm": 3.690086103347312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349570 + }, + { + "epoch": 1.6953998251640148, + "grad_norm": 3.274821835930197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349580 + }, + { + "epoch": 1.695448323356851, + "grad_norm": 3.759790345725378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349590 + }, + { + "epoch": 1.6954968215496868, + "grad_norm": 3.324969810591938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349600 + }, + { + "epoch": 1.6955453197425232, + "grad_norm": 3.245958879460886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349610 + }, + { + "epoch": 1.6955938179353591, + "grad_norm": 3.357789424285329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349620 + }, + { + "epoch": 1.6956423161281953, + "grad_norm": 3.267176040822051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349630 + }, + { + "epoch": 1.6956908143210314, + "grad_norm": 3.594445630028531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349640 + }, + { + "epoch": 1.6957393125138673, + "grad_norm": 3.226989164772931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349650 + }, + { + "epoch": 1.6957878107067037, + "grad_norm": 3.2760443247070725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349660 + }, + { + "epoch": 1.6958363088995396, + "grad_norm": 3.3496164064672485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349670 + }, + { + "epoch": 1.6958848070923758, + "grad_norm": 3.2481345613177837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349680 + }, + { + "epoch": 1.695933305285212, + "grad_norm": 3.467237519316768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349690 + }, + { + "epoch": 1.6959818034780478, + "grad_norm": 3.3491456719048074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349700 + }, + { + "epoch": 1.696030301670884, + "grad_norm": 3.382749014235742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349710 + }, + { + "epoch": 1.6960787998637201, + "grad_norm": 3.2533453264704804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349720 + }, + { + "epoch": 1.696127298056556, + "grad_norm": 3.175020069079437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349730 + }, + { + "epoch": 1.6961757962493924, + "grad_norm": 3.6201704745053576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349740 + }, + { + "epoch": 1.6962242944422283, + "grad_norm": 3.3667888033050986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349750 + }, + { + "epoch": 1.6962727926350645, + "grad_norm": 3.229593303899492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349760 + }, + { + "epoch": 1.6963212908279006, + "grad_norm": 3.184998575989084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349770 + }, + { + "epoch": 1.6963697890207365, + "grad_norm": 3.4780349267293786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349780 + }, + { + "epoch": 1.6964182872135727, + "grad_norm": 3.327869535496575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349790 + }, + { + "epoch": 1.6964667854064088, + "grad_norm": 3.254729108448373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349800 + }, + { + "epoch": 1.6965152835992447, + "grad_norm": 3.9995633471789915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349810 + }, + { + "epoch": 1.6965637817920811, + "grad_norm": 3.1064917749290544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349820 + }, + { + "epoch": 1.696612279984917, + "grad_norm": 3.053469299629796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349830 + }, + { + "epoch": 1.6966607781777532, + "grad_norm": 3.233276402170304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349840 + }, + { + "epoch": 1.6967092763705893, + "grad_norm": 3.2920649317702555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349850 + }, + { + "epoch": 1.6967577745634252, + "grad_norm": 3.192155517695028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349860 + }, + { + "epoch": 1.6968062727562614, + "grad_norm": 3.201958165277574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349870 + }, + { + "epoch": 1.6968547709490975, + "grad_norm": 3.250287150535769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349880 + }, + { + "epoch": 1.6969032691419335, + "grad_norm": 3.3324965897918446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349890 + }, + { + "epoch": 1.6969517673347698, + "grad_norm": 3.173938978306978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349900 + }, + { + "epoch": 1.6970002655276057, + "grad_norm": 3.418142213718056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349910 + }, + { + "epoch": 1.6970487637204419, + "grad_norm": 3.0283022312005414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349920 + }, + { + "epoch": 1.697097261913278, + "grad_norm": 3.374106682940692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349930 + }, + { + "epoch": 1.697145760106114, + "grad_norm": 3.1914815679101594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349940 + }, + { + "epoch": 1.69719425829895, + "grad_norm": 3.27956399814866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349950 + }, + { + "epoch": 1.6972427564917862, + "grad_norm": 3.056849351423807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349960 + }, + { + "epoch": 1.6972912546846222, + "grad_norm": 3.069587606319146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349970 + }, + { + "epoch": 1.6973397528774585, + "grad_norm": 2.9155392766710975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349980 + }, + { + "epoch": 1.6973882510702945, + "grad_norm": 3.341055432315443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 349990 + }, + { + "epoch": 1.6974367492631306, + "grad_norm": 3.171600582163592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350000 + }, + { + "epoch": 1.6974852474559667, + "grad_norm": 3.193218489627725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350010 + }, + { + "epoch": 1.6975337456488027, + "grad_norm": 3.109653690103187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350020 + }, + { + "epoch": 1.6975822438416388, + "grad_norm": 2.8676961250084787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350030 + }, + { + "epoch": 1.697630742034475, + "grad_norm": 3.211234300692922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350040 + }, + { + "epoch": 1.6976792402273109, + "grad_norm": 3.071113141572823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350050 + }, + { + "epoch": 1.6977277384201472, + "grad_norm": 3.336280940402503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350060 + }, + { + "epoch": 1.6977762366129832, + "grad_norm": 2.9783800314930886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350070 + }, + { + "epoch": 1.6978247348058193, + "grad_norm": 3.268528203648202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350080 + }, + { + "epoch": 1.6978732329986554, + "grad_norm": 3.2497734281378143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350090 + }, + { + "epoch": 1.6979217311914914, + "grad_norm": 3.061828834916014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350100 + }, + { + "epoch": 1.6979702293843275, + "grad_norm": 3.057191122479708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350110 + }, + { + "epoch": 1.6980187275771637, + "grad_norm": 3.139333770718622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350120 + }, + { + "epoch": 1.6980672257699998, + "grad_norm": 3.121545333328868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350130 + }, + { + "epoch": 1.698115723962836, + "grad_norm": 3.3448181113726605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350140 + }, + { + "epoch": 1.6981642221556719, + "grad_norm": 3.031649598028707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350150 + }, + { + "epoch": 1.698212720348508, + "grad_norm": 3.103892964873012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350160 + }, + { + "epoch": 1.6982612185413442, + "grad_norm": 3.210564969435836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350170 + }, + { + "epoch": 1.69830971673418, + "grad_norm": 3.6141255321808785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350180 + }, + { + "epoch": 1.6983582149270164, + "grad_norm": 3.417601490696143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350190 + }, + { + "epoch": 1.6984067131198524, + "grad_norm": 2.907611573732538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350200 + }, + { + "epoch": 1.6984552113126885, + "grad_norm": 3.035864892808604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350210 + }, + { + "epoch": 1.6985037095055247, + "grad_norm": 3.244066348884189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350220 + }, + { + "epoch": 1.6985522076983606, + "grad_norm": 3.054949004877017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350230 + }, + { + "epoch": 1.6986007058911967, + "grad_norm": 3.0567115061330696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350240 + }, + { + "epoch": 1.6986492040840329, + "grad_norm": 2.989237657402555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350250 + }, + { + "epoch": 1.6986977022768688, + "grad_norm": 2.9571017634566488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350260 + }, + { + "epoch": 1.6987462004697051, + "grad_norm": 3.030319817298732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350270 + }, + { + "epoch": 1.698794698662541, + "grad_norm": 3.09689660582535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350280 + }, + { + "epoch": 1.6988431968553772, + "grad_norm": 3.159272310426786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350290 + }, + { + "epoch": 1.6988916950482134, + "grad_norm": 3.192300823684491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350300 + }, + { + "epoch": 1.6989401932410493, + "grad_norm": 3.026543993200903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350310 + }, + { + "epoch": 1.6989886914338854, + "grad_norm": 3.075150090126044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350320 + }, + { + "epoch": 1.6990371896267216, + "grad_norm": 2.825912481796422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350330 + }, + { + "epoch": 1.6990856878195575, + "grad_norm": 3.137186510571155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350340 + }, + { + "epoch": 1.6991341860123939, + "grad_norm": 2.9124411327074995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350350 + }, + { + "epoch": 1.6991826842052298, + "grad_norm": 2.966934431469781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350360 + }, + { + "epoch": 1.699231182398066, + "grad_norm": 2.9259860312436103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350370 + }, + { + "epoch": 1.699279680590902, + "grad_norm": 2.8095790582938207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350380 + }, + { + "epoch": 1.699328178783738, + "grad_norm": 3.1305084746691136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350390 + }, + { + "epoch": 1.6993766769765741, + "grad_norm": 3.809213211525275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350400 + }, + { + "epoch": 1.6994251751694103, + "grad_norm": 3.109259694156208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350410 + }, + { + "epoch": 1.6994736733622462, + "grad_norm": 2.933208342881244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350420 + }, + { + "epoch": 1.6995221715550826, + "grad_norm": 2.8888331726761862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350430 + }, + { + "epoch": 1.6995706697479185, + "grad_norm": 3.21340820619298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350440 + }, + { + "epoch": 1.6996191679407546, + "grad_norm": 3.092405620463978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350450 + }, + { + "epoch": 1.6996676661335908, + "grad_norm": 3.086474720248589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350460 + }, + { + "epoch": 1.6997161643264267, + "grad_norm": 3.039795259951461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350470 + }, + { + "epoch": 1.6997646625192628, + "grad_norm": 2.689887956819348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350480 + }, + { + "epoch": 1.699813160712099, + "grad_norm": 3.216376498471618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350490 + }, + { + "epoch": 1.699861658904935, + "grad_norm": 2.9896707332000005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350500 + }, + { + "epoch": 1.6999101570977713, + "grad_norm": 2.8965985165996244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350510 + }, + { + "epoch": 1.6999586552906072, + "grad_norm": 2.8974888266475318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350520 + }, + { + "epoch": 1.7000071534834433, + "grad_norm": 2.7192916363105724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350530 + }, + { + "epoch": 1.7000556516762795, + "grad_norm": 3.220411315396632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350540 + }, + { + "epoch": 1.7001041498691154, + "grad_norm": 2.92800130807791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350550 + }, + { + "epoch": 1.7001526480619515, + "grad_norm": 3.126996972468987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350560 + }, + { + "epoch": 1.7002011462547877, + "grad_norm": 2.869410486994184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350570 + }, + { + "epoch": 1.7002496444476236, + "grad_norm": 2.7993324991371082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350580 + }, + { + "epoch": 1.70029814264046, + "grad_norm": 3.208981524949195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350590 + }, + { + "epoch": 1.700346640833296, + "grad_norm": 2.9440206716913053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350600 + }, + { + "epoch": 1.700395139026132, + "grad_norm": 2.79348686405001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350610 + }, + { + "epoch": 1.7004436372189682, + "grad_norm": 2.7726720475129696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350620 + }, + { + "epoch": 1.700492135411804, + "grad_norm": 2.8201510460235113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350630 + }, + { + "epoch": 1.7005406336046405, + "grad_norm": 3.302627860080065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350640 + }, + { + "epoch": 1.7005891317974764, + "grad_norm": 2.8392062034754417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350650 + }, + { + "epoch": 1.7006376299903125, + "grad_norm": 2.924485720257053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350660 + }, + { + "epoch": 1.7006861281831487, + "grad_norm": 2.7044192663083777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350670 + }, + { + "epoch": 1.7007346263759846, + "grad_norm": 3.7844305467160666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350680 + }, + { + "epoch": 1.7007831245688207, + "grad_norm": 3.38589991599747e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350690 + }, + { + "epoch": 1.700831622761657, + "grad_norm": 2.861514047935998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350700 + }, + { + "epoch": 1.7008801209544928, + "grad_norm": 2.918165087351099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350710 + }, + { + "epoch": 1.7009286191473292, + "grad_norm": 3.0860665134468945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350720 + }, + { + "epoch": 1.700977117340165, + "grad_norm": 2.764676665378829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350730 + }, + { + "epoch": 1.7010256155330012, + "grad_norm": 3.0452998345253945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350740 + }, + { + "epoch": 1.7010741137258374, + "grad_norm": 2.9610360385845524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350750 + }, + { + "epoch": 1.7011226119186733, + "grad_norm": 2.954530664567301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350760 + }, + { + "epoch": 1.7011711101115095, + "grad_norm": 2.849873936838776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350770 + }, + { + "epoch": 1.7012196083043456, + "grad_norm": 2.616686423095871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350780 + }, + { + "epoch": 1.7012681064971815, + "grad_norm": 2.8220574321835556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350790 + }, + { + "epoch": 1.7013166046900179, + "grad_norm": 2.9359968678477344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350800 + }, + { + "epoch": 1.7013651028828538, + "grad_norm": 2.7550614234428394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350810 + }, + { + "epoch": 1.70141360107569, + "grad_norm": 2.8505390048394474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350820 + }, + { + "epoch": 1.701462099268526, + "grad_norm": 2.8193353429628587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350830 + }, + { + "epoch": 1.701510597461362, + "grad_norm": 2.7268816538139617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350840 + }, + { + "epoch": 1.7015590956541982, + "grad_norm": 2.7039844141540925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350850 + }, + { + "epoch": 1.7016075938470343, + "grad_norm": 2.9251857824874605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350860 + }, + { + "epoch": 1.7016560920398702, + "grad_norm": 2.8510218186283964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350870 + }, + { + "epoch": 1.7017045902327066, + "grad_norm": 2.591492709314025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350880 + }, + { + "epoch": 1.7017530884255425, + "grad_norm": 3.017758842815965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350890 + }, + { + "epoch": 1.7018015866183787, + "grad_norm": 2.726697445609716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350900 + }, + { + "epoch": 1.7018500848112148, + "grad_norm": 2.7469898356002886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350910 + }, + { + "epoch": 1.7018985830040507, + "grad_norm": 2.8751907521495923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350920 + }, + { + "epoch": 1.7019470811968869, + "grad_norm": 2.7549701187012943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350930 + }, + { + "epoch": 1.701995579389723, + "grad_norm": 3.045108698529475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350940 + }, + { + "epoch": 1.702044077582559, + "grad_norm": 2.7603164198808372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350950 + }, + { + "epoch": 1.7020925757753953, + "grad_norm": 2.6138534892083953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350960 + }, + { + "epoch": 1.7021410739682312, + "grad_norm": 2.7484711395686645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350970 + }, + { + "epoch": 1.7021895721610674, + "grad_norm": 3.213913046806738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350980 + }, + { + "epoch": 1.7022380703539035, + "grad_norm": 2.6745151870954942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 350990 + }, + { + "epoch": 1.7022865685467394, + "grad_norm": 2.6165094979546666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351000 + }, + { + "epoch": 1.7023350667395756, + "grad_norm": 2.6344141090817175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351010 + }, + { + "epoch": 1.7023835649324117, + "grad_norm": 2.7470498764614604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351020 + }, + { + "epoch": 1.7024320631252476, + "grad_norm": 2.6263865748887838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351030 + }, + { + "epoch": 1.702480561318084, + "grad_norm": 3.079826882412817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351040 + }, + { + "epoch": 1.70252905951092, + "grad_norm": 2.747060179331129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351050 + }, + { + "epoch": 1.702577557703756, + "grad_norm": 2.7619970310865938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351060 + }, + { + "epoch": 1.7026260558965922, + "grad_norm": 2.7395154589271442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351070 + }, + { + "epoch": 1.7026745540894281, + "grad_norm": 2.6619670023819708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351080 + }, + { + "epoch": 1.7027230522822643, + "grad_norm": 2.9668829171214384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351090 + }, + { + "epoch": 1.7027715504751004, + "grad_norm": 2.822417499714902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351100 + }, + { + "epoch": 1.7028200486679363, + "grad_norm": 2.7273177494180345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351110 + }, + { + "epoch": 1.7028685468607727, + "grad_norm": 2.679123944915318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351120 + }, + { + "epoch": 1.7029170450536086, + "grad_norm": 2.5044290197229202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351130 + }, + { + "epoch": 1.7029655432464448, + "grad_norm": 2.7472500718772608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351140 + }, + { + "epoch": 1.703014041439281, + "grad_norm": 2.73277507290004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351150 + }, + { + "epoch": 1.7030625396321168, + "grad_norm": 2.7357094367630452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351160 + }, + { + "epoch": 1.7031110378249532, + "grad_norm": 2.6692040577813714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351170 + }, + { + "epoch": 1.7031595360177891, + "grad_norm": 2.490935280263784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351180 + }, + { + "epoch": 1.7032080342106253, + "grad_norm": 3.215965449498981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351190 + }, + { + "epoch": 1.7032565324034614, + "grad_norm": 2.479360361462568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351200 + }, + { + "epoch": 1.7033050305962973, + "grad_norm": 2.7092630361380543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351210 + }, + { + "epoch": 1.7033535287891335, + "grad_norm": 2.81147709557672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351220 + }, + { + "epoch": 1.7034020269819696, + "grad_norm": 2.6432317667968164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351230 + }, + { + "epoch": 1.7034505251748056, + "grad_norm": 2.9770978571264095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351240 + }, + { + "epoch": 1.703499023367642, + "grad_norm": 2.731332315875079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351250 + }, + { + "epoch": 1.7035475215604778, + "grad_norm": 2.7938995117438026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351260 + }, + { + "epoch": 1.703596019753314, + "grad_norm": 2.6032326516656212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351270 + }, + { + "epoch": 1.7036445179461501, + "grad_norm": 2.6017115573040428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351280 + }, + { + "epoch": 1.703693016138986, + "grad_norm": 3.0719302657189473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351290 + }, + { + "epoch": 1.7037415143318222, + "grad_norm": 2.6497021465843318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351300 + }, + { + "epoch": 1.7037900125246583, + "grad_norm": 2.6417060539074555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351310 + }, + { + "epoch": 1.7038385107174943, + "grad_norm": 2.6236479655494804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351320 + }, + { + "epoch": 1.7038870089103306, + "grad_norm": 2.9626122000081523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351330 + }, + { + "epoch": 1.7039355071031665, + "grad_norm": 2.644195795653559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351340 + }, + { + "epoch": 1.7039840052960027, + "grad_norm": 2.643816543468347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351350 + }, + { + "epoch": 1.7040325034888388, + "grad_norm": 2.5397948633099077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351360 + }, + { + "epoch": 1.7040810016816748, + "grad_norm": 2.516510910766101e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351370 + }, + { + "epoch": 1.704129499874511, + "grad_norm": 2.465345438906752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351380 + }, + { + "epoch": 1.704177998067347, + "grad_norm": 2.8581236932723186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351390 + }, + { + "epoch": 1.704226496260183, + "grad_norm": 2.484851435724522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351400 + }, + { + "epoch": 1.7042749944530193, + "grad_norm": 2.5795463542976904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351410 + }, + { + "epoch": 1.7043234926458553, + "grad_norm": 2.5331493347380274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351420 + }, + { + "epoch": 1.7043719908386914, + "grad_norm": 3.044848995159555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351430 + }, + { + "epoch": 1.7044204890315275, + "grad_norm": 3.001071746666639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351440 + }, + { + "epoch": 1.7044689872243635, + "grad_norm": 2.555589340147435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351450 + }, + { + "epoch": 1.7045174854171996, + "grad_norm": 2.481857919178765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351460 + }, + { + "epoch": 1.7045659836100358, + "grad_norm": 2.5889800525646933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351470 + }, + { + "epoch": 1.7046144818028717, + "grad_norm": 2.5763256417121738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351480 + }, + { + "epoch": 1.704662979995708, + "grad_norm": 2.477373506337699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351490 + }, + { + "epoch": 1.704711478188544, + "grad_norm": 2.6320140733560038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351500 + }, + { + "epoch": 1.70475997638138, + "grad_norm": 2.4208121729429877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351510 + }, + { + "epoch": 1.7048084745742162, + "grad_norm": 2.6583547807490504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351520 + }, + { + "epoch": 1.7048569727670522, + "grad_norm": 2.6004249420452652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351530 + }, + { + "epoch": 1.7049054709598883, + "grad_norm": 2.7051299866798217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351540 + }, + { + "epoch": 1.7049539691527245, + "grad_norm": 2.335418969323655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351550 + }, + { + "epoch": 1.7050024673455604, + "grad_norm": 2.4493433059546987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351560 + }, + { + "epoch": 1.7050509655383967, + "grad_norm": 2.7024441351386486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351570 + }, + { + "epoch": 1.7050994637312327, + "grad_norm": 2.6780712758522895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351580 + }, + { + "epoch": 1.7051479619240688, + "grad_norm": 2.7328056262376776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351590 + }, + { + "epoch": 1.705196460116905, + "grad_norm": 2.3954855876695547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351600 + }, + { + "epoch": 1.7052449583097409, + "grad_norm": 2.3135704907417676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351610 + }, + { + "epoch": 1.705293456502577, + "grad_norm": 2.4997863334874637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351620 + }, + { + "epoch": 1.7053419546954132, + "grad_norm": 2.3556555817094704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351630 + }, + { + "epoch": 1.705390452888249, + "grad_norm": 2.6264416419508052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351640 + }, + { + "epoch": 1.7054389510810855, + "grad_norm": 2.776314289576476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351650 + }, + { + "epoch": 1.7054874492739214, + "grad_norm": 2.6812159603650798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351660 + }, + { + "epoch": 1.7055359474667575, + "grad_norm": 2.4638064033410956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351670 + }, + { + "epoch": 1.7055844456595937, + "grad_norm": 2.4420433675231834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351680 + }, + { + "epoch": 1.7056329438524296, + "grad_norm": 3.2675739447540764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351690 + }, + { + "epoch": 1.705681442045266, + "grad_norm": 2.7564443172423125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351700 + }, + { + "epoch": 1.7057299402381019, + "grad_norm": 2.570626378428642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351710 + }, + { + "epoch": 1.705778438430938, + "grad_norm": 2.313987224056291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351720 + }, + { + "epoch": 1.7058269366237742, + "grad_norm": 2.1844847708507587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351730 + }, + { + "epoch": 1.70587543481661, + "grad_norm": 2.82564638354188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351740 + }, + { + "epoch": 1.7059239330094462, + "grad_norm": 2.3864251019745097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351750 + }, + { + "epoch": 1.7059724312022824, + "grad_norm": 2.3406750315757563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351760 + }, + { + "epoch": 1.7060209293951183, + "grad_norm": 2.5393894986791565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351770 + }, + { + "epoch": 1.7060694275879547, + "grad_norm": 2.30606396200983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351780 + }, + { + "epoch": 1.7061179257807906, + "grad_norm": 2.920373987080893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351790 + }, + { + "epoch": 1.7061664239736267, + "grad_norm": 2.3286727213189806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351800 + }, + { + "epoch": 1.7062149221664629, + "grad_norm": 2.9223608422057623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351810 + }, + { + "epoch": 1.7062634203592988, + "grad_norm": 2.3817097627443218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351820 + }, + { + "epoch": 1.706311918552135, + "grad_norm": 2.6412177334123044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351830 + }, + { + "epoch": 1.706360416744971, + "grad_norm": 2.794120135263256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351840 + }, + { + "epoch": 1.706408914937807, + "grad_norm": 2.6429519905946108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351850 + }, + { + "epoch": 1.7064574131306434, + "grad_norm": 2.378661179136543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351860 + }, + { + "epoch": 1.7065059113234793, + "grad_norm": 2.287167255587974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351870 + }, + { + "epoch": 1.7065544095163154, + "grad_norm": 2.6557813725958113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351880 + }, + { + "epoch": 1.7066029077091516, + "grad_norm": 2.5119010871321734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351890 + }, + { + "epoch": 1.7066514059019875, + "grad_norm": 2.514313379720079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351900 + }, + { + "epoch": 1.7066999040948236, + "grad_norm": 2.3310814611932074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351910 + }, + { + "epoch": 1.7067484022876598, + "grad_norm": 2.3578003549573623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351920 + }, + { + "epoch": 1.7067969004804957, + "grad_norm": 2.3363817547306098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351930 + }, + { + "epoch": 1.706845398673332, + "grad_norm": 2.9649513066942745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351940 + }, + { + "epoch": 1.706893896866168, + "grad_norm": 2.455094794129309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351950 + }, + { + "epoch": 1.7069423950590041, + "grad_norm": 2.8610424251951372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351960 + }, + { + "epoch": 1.7069908932518403, + "grad_norm": 2.2509125230385507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351970 + }, + { + "epoch": 1.7070393914446762, + "grad_norm": 2.416874878008457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351980 + }, + { + "epoch": 1.7070878896375123, + "grad_norm": 3.447088658958819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 351990 + }, + { + "epoch": 1.7071363878303485, + "grad_norm": 2.7472886188206758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352000 + }, + { + "epoch": 1.7071848860231844, + "grad_norm": 2.298585499715955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352010 + }, + { + "epoch": 1.7072333842160208, + "grad_norm": 2.4567571088596196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352020 + }, + { + "epoch": 1.7072818824088567, + "grad_norm": 2.1656960669247383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352030 + }, + { + "epoch": 1.7073303806016928, + "grad_norm": 2.85394303745079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352040 + }, + { + "epoch": 1.707378878794529, + "grad_norm": 2.5849308471492805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352050 + }, + { + "epoch": 1.707427376987365, + "grad_norm": 2.2832768564740036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352060 + }, + { + "epoch": 1.707475875180201, + "grad_norm": 6.120684759025607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352070 + }, + { + "epoch": 1.7075243733730372, + "grad_norm": 2.3675942983913956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352080 + }, + { + "epoch": 1.7075728715658731, + "grad_norm": 2.278648914000314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352090 + }, + { + "epoch": 1.7076213697587095, + "grad_norm": 2.5592910901650612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352100 + }, + { + "epoch": 1.7076698679515454, + "grad_norm": 2.4017717592528243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352110 + }, + { + "epoch": 1.7077183661443815, + "grad_norm": 2.272924426449663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352120 + }, + { + "epoch": 1.7077668643372177, + "grad_norm": 2.2431297708180864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352130 + }, + { + "epoch": 1.7078153625300536, + "grad_norm": 2.557662348579015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352140 + }, + { + "epoch": 1.7078638607228898, + "grad_norm": 2.3081378586198298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352150 + }, + { + "epoch": 1.707912358915726, + "grad_norm": 2.320580172465725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352160 + }, + { + "epoch": 1.707960857108562, + "grad_norm": 2.213278449403333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352170 + }, + { + "epoch": 1.7080093553013982, + "grad_norm": 2.114708941292065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352180 + }, + { + "epoch": 1.7080578534942341, + "grad_norm": 2.7328921348157564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352190 + }, + { + "epoch": 1.7081063516870703, + "grad_norm": 2.3145313221561992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352200 + }, + { + "epoch": 1.7081548498799064, + "grad_norm": 2.350062366929251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352210 + }, + { + "epoch": 1.7082033480727423, + "grad_norm": 2.28953673797605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352220 + }, + { + "epoch": 1.7082518462655787, + "grad_norm": 2.092862416702701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352230 + }, + { + "epoch": 1.7083003444584146, + "grad_norm": 2.2845853209219058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352240 + }, + { + "epoch": 1.7083488426512508, + "grad_norm": 2.345482563725909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352250 + }, + { + "epoch": 1.708397340844087, + "grad_norm": 2.7474490238432736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352260 + }, + { + "epoch": 1.7084458390369228, + "grad_norm": 2.4290400801874057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352270 + }, + { + "epoch": 1.708494337229759, + "grad_norm": 2.1604000366437504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352280 + }, + { + "epoch": 1.708542835422595, + "grad_norm": 2.2072210725809782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352290 + }, + { + "epoch": 1.708591333615431, + "grad_norm": 2.157436185257211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352300 + }, + { + "epoch": 1.7086398318082674, + "grad_norm": 2.1848533648949342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352310 + }, + { + "epoch": 1.7086883300011033, + "grad_norm": 2.1186259857586265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352320 + }, + { + "epoch": 1.7087368281939395, + "grad_norm": 2.2362963036925976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352330 + }, + { + "epoch": 1.7087853263867756, + "grad_norm": 2.3971651330612076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352340 + }, + { + "epoch": 1.7088338245796115, + "grad_norm": 2.222220096825822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352350 + }, + { + "epoch": 1.7088823227724477, + "grad_norm": 2.338262028445115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352360 + }, + { + "epoch": 1.7089308209652838, + "grad_norm": 2.288944145334426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352370 + }, + { + "epoch": 1.7089793191581197, + "grad_norm": 2.05226431404526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352380 + }, + { + "epoch": 1.709027817350956, + "grad_norm": 2.438039992114227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352390 + }, + { + "epoch": 1.709076315543792, + "grad_norm": 2.3161677020766547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352400 + }, + { + "epoch": 1.7091248137366282, + "grad_norm": 2.109596941579639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352410 + }, + { + "epoch": 1.7091733119294643, + "grad_norm": 2.0770801967273655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352420 + }, + { + "epoch": 1.7092218101223002, + "grad_norm": 2.0835193126345075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352430 + }, + { + "epoch": 1.7092703083151364, + "grad_norm": 2.4993047631483023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352440 + }, + { + "epoch": 1.7093188065079725, + "grad_norm": 2.347425720472529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352450 + }, + { + "epoch": 1.7093673047008084, + "grad_norm": 2.3856971509417235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352460 + }, + { + "epoch": 1.7094158028936448, + "grad_norm": 2.484327588092583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352470 + }, + { + "epoch": 1.7094643010864807, + "grad_norm": 2.2776051267214825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352480 + }, + { + "epoch": 1.7095127992793169, + "grad_norm": 2.215499605995319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352490 + }, + { + "epoch": 1.709561297472153, + "grad_norm": 2.1102851022192226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352500 + }, + { + "epoch": 1.709609795664989, + "grad_norm": 2.3312047403578617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352510 + }, + { + "epoch": 1.709658293857825, + "grad_norm": 2.1036235864357877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352520 + }, + { + "epoch": 1.7097067920506612, + "grad_norm": 2.0339705031346966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352530 + }, + { + "epoch": 1.7097552902434972, + "grad_norm": 2.3858570230572695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352540 + }, + { + "epoch": 1.7098037884363335, + "grad_norm": 2.4846283253054935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352550 + }, + { + "epoch": 1.7098522866291694, + "grad_norm": 2.3725879927383176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352560 + }, + { + "epoch": 1.7099007848220056, + "grad_norm": 2.0332253214405682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352570 + }, + { + "epoch": 1.7099492830148417, + "grad_norm": 2.1195409871666016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352580 + }, + { + "epoch": 1.7099977812076776, + "grad_norm": 2.232258289325273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352590 + }, + { + "epoch": 1.7100462794005138, + "grad_norm": 2.171223201230532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352600 + }, + { + "epoch": 1.71009477759335, + "grad_norm": 2.403650611881858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352610 + }, + { + "epoch": 1.7101432757861859, + "grad_norm": 2.273931087870551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352620 + }, + { + "epoch": 1.7101917739790222, + "grad_norm": 2.33495978108067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352630 + }, + { + "epoch": 1.7102402721718581, + "grad_norm": 2.402853382932335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352640 + }, + { + "epoch": 1.7102887703646943, + "grad_norm": 2.029043955076304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352650 + }, + { + "epoch": 1.7103372685575304, + "grad_norm": 2.458160075491378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352660 + }, + { + "epoch": 1.7103857667503664, + "grad_norm": 2.05319441448637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352670 + }, + { + "epoch": 1.7104342649432027, + "grad_norm": 2.42309763365256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352680 + }, + { + "epoch": 1.7104827631360386, + "grad_norm": 2.547418098686194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352690 + }, + { + "epoch": 1.7105312613288748, + "grad_norm": 2.2750020534090254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352700 + }, + { + "epoch": 1.710579759521711, + "grad_norm": 2.58423291654708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352710 + }, + { + "epoch": 1.7106282577145469, + "grad_norm": 2.3475168475783903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352720 + }, + { + "epoch": 1.710676755907383, + "grad_norm": 2.228135187465341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352730 + }, + { + "epoch": 1.7107252541002191, + "grad_norm": 2.2226016582749253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352740 + }, + { + "epoch": 1.710773752293055, + "grad_norm": 2.2912530539542786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352750 + }, + { + "epoch": 1.7108222504858914, + "grad_norm": 2.007439903195518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352760 + }, + { + "epoch": 1.7108707486787273, + "grad_norm": 2.2295088442092492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352770 + }, + { + "epoch": 1.7109192468715635, + "grad_norm": 2.053840653104544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352780 + }, + { + "epoch": 1.7109677450643996, + "grad_norm": 2.3675356786156954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352790 + }, + { + "epoch": 1.7110162432572356, + "grad_norm": 1.9995836098019026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352800 + }, + { + "epoch": 1.7110647414500717, + "grad_norm": 1.929206128181704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352810 + }, + { + "epoch": 1.7111132396429078, + "grad_norm": 2.469646354086308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352820 + }, + { + "epoch": 1.7111617378357438, + "grad_norm": 2.0808595735388735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352830 + }, + { + "epoch": 1.7112102360285801, + "grad_norm": 2.241718632944867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352840 + }, + { + "epoch": 1.711258734221416, + "grad_norm": 2.236390272969402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352850 + }, + { + "epoch": 1.7113072324142522, + "grad_norm": 2.1581961107131065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352860 + }, + { + "epoch": 1.7113557306070883, + "grad_norm": 1.93452596164434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352870 + }, + { + "epoch": 1.7114042287999243, + "grad_norm": 2.039454827240661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352880 + }, + { + "epoch": 1.7114527269927604, + "grad_norm": 3.163752992918489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352890 + }, + { + "epoch": 1.7115012251855966, + "grad_norm": 2.2682257849737653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352900 + }, + { + "epoch": 1.7115497233784325, + "grad_norm": 2.5553362092978205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352910 + }, + { + "epoch": 1.7115982215712688, + "grad_norm": 2.353702299728866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352920 + }, + { + "epoch": 1.7116467197641048, + "grad_norm": 2.2504991648020223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352930 + }, + { + "epoch": 1.711695217956941, + "grad_norm": 2.2680449518475143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352940 + }, + { + "epoch": 1.711743716149777, + "grad_norm": 2.2924201203977645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352950 + }, + { + "epoch": 1.711792214342613, + "grad_norm": 2.412136090867989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352960 + }, + { + "epoch": 1.7118407125354491, + "grad_norm": 2.0616701235098844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352970 + }, + { + "epoch": 1.7118892107282853, + "grad_norm": 2.113876540477122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352980 + }, + { + "epoch": 1.7119377089211212, + "grad_norm": 2.1578724584969677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 352990 + }, + { + "epoch": 1.7119862071139575, + "grad_norm": 2.052939862551284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353000 + }, + { + "epoch": 1.7120347053067935, + "grad_norm": 2.5726329511144286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353010 + }, + { + "epoch": 1.7120832034996296, + "grad_norm": 2.0582817228387285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353020 + }, + { + "epoch": 1.7121317016924658, + "grad_norm": 2.0498525543644064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353030 + }, + { + "epoch": 1.7121801998853017, + "grad_norm": 2.3751406175165357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353040 + }, + { + "epoch": 1.7122286980781378, + "grad_norm": 2.2354461393092606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353050 + }, + { + "epoch": 1.712277196270974, + "grad_norm": 2.677817789731307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353060 + }, + { + "epoch": 1.7123256944638099, + "grad_norm": 2.0560541713621205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353070 + }, + { + "epoch": 1.7123741926566463, + "grad_norm": 1.960640183540363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353080 + }, + { + "epoch": 1.7124226908494822, + "grad_norm": 2.1930739890763107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353090 + }, + { + "epoch": 1.7124711890423183, + "grad_norm": 2.1944932981909915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353100 + }, + { + "epoch": 1.7125196872351545, + "grad_norm": 2.105215912706626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353110 + }, + { + "epoch": 1.7125681854279904, + "grad_norm": 1.942993499426393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353120 + }, + { + "epoch": 1.7126166836208265, + "grad_norm": 2.4228860695529875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353130 + }, + { + "epoch": 1.7126651818136627, + "grad_norm": 2.511357166667949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353140 + }, + { + "epoch": 1.7127136800064986, + "grad_norm": 2.1073596201404143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353150 + }, + { + "epoch": 1.712762178199335, + "grad_norm": 2.146246202983093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353160 + }, + { + "epoch": 1.7128106763921709, + "grad_norm": 1.987844910900094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353170 + }, + { + "epoch": 1.712859174585007, + "grad_norm": 1.9982600463208655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353180 + }, + { + "epoch": 1.7129076727778432, + "grad_norm": 2.0508981180000774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353190 + }, + { + "epoch": 1.712956170970679, + "grad_norm": 2.2083805006900548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353200 + }, + { + "epoch": 1.7130046691635155, + "grad_norm": 2.199644910660936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353210 + }, + { + "epoch": 1.7130531673563514, + "grad_norm": 2.051565317628956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353220 + }, + { + "epoch": 1.7131016655491875, + "grad_norm": 2.0163637870496132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353230 + }, + { + "epoch": 1.7131501637420237, + "grad_norm": 2.6506862482733595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353240 + }, + { + "epoch": 1.7131986619348596, + "grad_norm": 1.914225045140938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353250 + }, + { + "epoch": 1.7132471601276957, + "grad_norm": 2.0446520920813782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353260 + }, + { + "epoch": 1.7132956583205319, + "grad_norm": 2.1844821063154995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353270 + }, + { + "epoch": 1.7133441565133678, + "grad_norm": 1.9524977190599202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353280 + }, + { + "epoch": 1.7133926547062042, + "grad_norm": 2.087561057351195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353290 + }, + { + "epoch": 1.71344115289904, + "grad_norm": 2.089882400468923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353300 + }, + { + "epoch": 1.7134896510918762, + "grad_norm": 2.965889045469794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353310 + }, + { + "epoch": 1.7135381492847124, + "grad_norm": 2.0140712209126832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353320 + }, + { + "epoch": 1.7135866474775483, + "grad_norm": 3.430553263683578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353330 + }, + { + "epoch": 1.7136351456703844, + "grad_norm": 2.1905778524455854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353340 + }, + { + "epoch": 1.7136836438632206, + "grad_norm": 1.9974107701159483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353350 + }, + { + "epoch": 1.7137321420560565, + "grad_norm": 1.9279669416505385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353360 + }, + { + "epoch": 1.7137806402488929, + "grad_norm": 1.980028407899681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353370 + }, + { + "epoch": 1.7138291384417288, + "grad_norm": 2.1098795599527875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353380 + }, + { + "epoch": 1.713877636634565, + "grad_norm": 2.0948192513969843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353390 + }, + { + "epoch": 1.713926134827401, + "grad_norm": 2.1159308971618884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353400 + }, + { + "epoch": 1.713974633020237, + "grad_norm": 2.0466176309241746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353410 + }, + { + "epoch": 1.7140231312130731, + "grad_norm": 1.9579591281626563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353420 + }, + { + "epoch": 1.7140716294059093, + "grad_norm": 2.150336442241496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353430 + }, + { + "epoch": 1.7141201275987452, + "grad_norm": 2.7696399840237973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353440 + }, + { + "epoch": 1.7141686257915816, + "grad_norm": 2.1063192079395776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353450 + }, + { + "epoch": 1.7142171239844175, + "grad_norm": 2.1860115495542232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353460 + }, + { + "epoch": 1.7142656221772536, + "grad_norm": 1.9400960837856474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353470 + }, + { + "epoch": 1.7143141203700898, + "grad_norm": 1.7832498144798592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353480 + }, + { + "epoch": 1.7143626185629257, + "grad_norm": 1.957302586674814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353490 + }, + { + "epoch": 1.7144111167557619, + "grad_norm": 1.991005049717387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353500 + }, + { + "epoch": 1.714459614948598, + "grad_norm": 1.929976534142952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353510 + }, + { + "epoch": 1.714508113141434, + "grad_norm": 2.0331318850708158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353520 + }, + { + "epoch": 1.7145566113342703, + "grad_norm": 1.763565826706781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353530 + }, + { + "epoch": 1.7146051095271062, + "grad_norm": 2.151944933359573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353540 + }, + { + "epoch": 1.7146536077199424, + "grad_norm": 1.824366435698721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353550 + }, + { + "epoch": 1.7147021059127785, + "grad_norm": 1.915948999453576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353560 + }, + { + "epoch": 1.7147506041056144, + "grad_norm": 2.0906551156940623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353570 + }, + { + "epoch": 1.7147991022984506, + "grad_norm": 1.9889268898509727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353580 + }, + { + "epoch": 1.7148476004912867, + "grad_norm": 2.024068379569144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353590 + }, + { + "epoch": 1.7148960986841226, + "grad_norm": 1.9019765318262216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353600 + }, + { + "epoch": 1.714944596876959, + "grad_norm": 2.0028032565733156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353610 + }, + { + "epoch": 1.714993095069795, + "grad_norm": 1.759677381585334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353620 + }, + { + "epoch": 1.715041593262631, + "grad_norm": 2.3427746853599274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353630 + }, + { + "epoch": 1.7150900914554672, + "grad_norm": 2.065278437157758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353640 + }, + { + "epoch": 1.7151385896483031, + "grad_norm": 2.0398262634557796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353650 + }, + { + "epoch": 1.7151870878411393, + "grad_norm": 2.427990608566688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353660 + }, + { + "epoch": 1.7152355860339754, + "grad_norm": 1.950818884211003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353670 + }, + { + "epoch": 1.7152840842268113, + "grad_norm": 2.0139244938377487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353680 + }, + { + "epoch": 1.7153325824196477, + "grad_norm": 2.2507510522018492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353690 + }, + { + "epoch": 1.7153810806124836, + "grad_norm": 1.8875734753009965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353700 + }, + { + "epoch": 1.7154295788053198, + "grad_norm": 1.9192558653458036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353710 + }, + { + "epoch": 1.715478076998156, + "grad_norm": 1.9184000166205806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353720 + }, + { + "epoch": 1.7155265751909918, + "grad_norm": 2.0615505746945928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353730 + }, + { + "epoch": 1.7155750733838282, + "grad_norm": 1.8961506143000406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353740 + }, + { + "epoch": 1.7156235715766641, + "grad_norm": 1.8788325562013597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353750 + }, + { + "epoch": 1.7156720697695003, + "grad_norm": 1.825986473136254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353760 + }, + { + "epoch": 1.7157205679623364, + "grad_norm": 2.0175413339984516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353770 + }, + { + "epoch": 1.7157690661551723, + "grad_norm": 1.8569128457102124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353780 + }, + { + "epoch": 1.7158175643480085, + "grad_norm": 1.8815759617041294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353790 + }, + { + "epoch": 1.7158660625408446, + "grad_norm": 2.1891409573981946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353800 + }, + { + "epoch": 1.7159145607336805, + "grad_norm": 2.1779779757480355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353810 + }, + { + "epoch": 1.715963058926517, + "grad_norm": 1.9161401354494956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353820 + }, + { + "epoch": 1.7160115571193528, + "grad_norm": 1.9722257604826154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353830 + }, + { + "epoch": 1.716060055312189, + "grad_norm": 2.1930427251959372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353840 + }, + { + "epoch": 1.7161085535050251, + "grad_norm": 2.1239813463580504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353850 + }, + { + "epoch": 1.716157051697861, + "grad_norm": 1.8039393978597218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353860 + }, + { + "epoch": 1.7162055498906972, + "grad_norm": 1.8132841006490708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353870 + }, + { + "epoch": 1.7162540480835333, + "grad_norm": 1.8167003901226053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353880 + }, + { + "epoch": 1.7163025462763692, + "grad_norm": 2.3053249975646395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353890 + }, + { + "epoch": 1.7163510444692056, + "grad_norm": 1.9597358402734244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353900 + }, + { + "epoch": 1.7163995426620415, + "grad_norm": 2.3428926354540636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353910 + }, + { + "epoch": 1.7164480408548777, + "grad_norm": 1.8039788329815565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353920 + }, + { + "epoch": 1.7164965390477138, + "grad_norm": 1.7929528084437152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353930 + }, + { + "epoch": 1.7165450372405497, + "grad_norm": 2.198338755476925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353940 + }, + { + "epoch": 1.7165935354333859, + "grad_norm": 2.0504662856524192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353950 + }, + { + "epoch": 1.716642033626222, + "grad_norm": 1.987062425712338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353960 + }, + { + "epoch": 1.716690531819058, + "grad_norm": 1.847918618125277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353970 + }, + { + "epoch": 1.7167390300118943, + "grad_norm": 2.023558565156236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353980 + }, + { + "epoch": 1.7167875282047302, + "grad_norm": 1.8596557183059303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 353990 + }, + { + "epoch": 1.7168360263975664, + "grad_norm": 1.925551096348954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354000 + }, + { + "epoch": 1.7168845245904025, + "grad_norm": 2.2599936144729327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354010 + }, + { + "epoch": 1.7169330227832384, + "grad_norm": 1.816458805592447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354020 + }, + { + "epoch": 1.7169815209760746, + "grad_norm": 1.9049029020834496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354030 + }, + { + "epoch": 1.7170300191689107, + "grad_norm": 1.974866314924384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354040 + }, + { + "epoch": 1.7170785173617467, + "grad_norm": 1.7303570132298773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354050 + }, + { + "epoch": 1.717127015554583, + "grad_norm": 2.0874132644621568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354060 + }, + { + "epoch": 1.717175513747419, + "grad_norm": 1.9680117091525062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354070 + }, + { + "epoch": 1.717224011940255, + "grad_norm": 2.343137595062217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354080 + }, + { + "epoch": 1.7172725101330912, + "grad_norm": 1.909833002855521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354090 + }, + { + "epoch": 1.7173210083259272, + "grad_norm": 2.0303316361491852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354100 + }, + { + "epoch": 1.7173695065187633, + "grad_norm": 1.7633789539672762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354110 + }, + { + "epoch": 1.7174180047115994, + "grad_norm": 1.786724190822042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354120 + }, + { + "epoch": 1.7174665029044354, + "grad_norm": 1.659336312798132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354130 + }, + { + "epoch": 1.7175150010972717, + "grad_norm": 1.6874603048222525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354140 + }, + { + "epoch": 1.7175634992901077, + "grad_norm": 2.3121232928247082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354150 + }, + { + "epoch": 1.7176119974829438, + "grad_norm": 2.0646206522201282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354160 + }, + { + "epoch": 1.71766049567578, + "grad_norm": 1.9954377705744264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354170 + }, + { + "epoch": 1.7177089938686159, + "grad_norm": 1.8872777118872364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354180 + }, + { + "epoch": 1.717757492061452, + "grad_norm": 1.6433851612873696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354190 + }, + { + "epoch": 1.7178059902542882, + "grad_norm": 1.7985463784953026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354200 + }, + { + "epoch": 1.717854488447124, + "grad_norm": 1.9275322671319373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354210 + }, + { + "epoch": 1.7179029866399604, + "grad_norm": 2.6173569978027444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354220 + }, + { + "epoch": 1.7179514848327964, + "grad_norm": 1.6635610222692776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354230 + }, + { + "epoch": 1.7179999830256325, + "grad_norm": 1.9818140017946462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354240 + }, + { + "epoch": 1.7180484812184686, + "grad_norm": 1.6750393072584302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354250 + }, + { + "epoch": 1.7180969794113046, + "grad_norm": 1.8449199501446856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354260 + }, + { + "epoch": 1.718145477604141, + "grad_norm": 1.830654916545882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354270 + }, + { + "epoch": 1.7181939757969769, + "grad_norm": 1.735894095133972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354280 + }, + { + "epoch": 1.718242473989813, + "grad_norm": 2.2338388916409713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354290 + }, + { + "epoch": 1.7182909721826491, + "grad_norm": 1.9685183261231032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354300 + }, + { + "epoch": 1.718339470375485, + "grad_norm": 2.026612477834533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354310 + }, + { + "epoch": 1.7183879685683212, + "grad_norm": 1.7374917504753284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354320 + }, + { + "epoch": 1.7184364667611574, + "grad_norm": 1.7012689923490143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354330 + }, + { + "epoch": 1.7184849649539933, + "grad_norm": 1.628924373164864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354340 + }, + { + "epoch": 1.7185334631468296, + "grad_norm": 2.006646404595358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354350 + }, + { + "epoch": 1.7185819613396656, + "grad_norm": 1.918849434900949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354360 + }, + { + "epoch": 1.7186304595325017, + "grad_norm": 1.6758734844302126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354370 + }, + { + "epoch": 1.7186789577253379, + "grad_norm": 1.8626709064051283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354380 + }, + { + "epoch": 1.7187274559181738, + "grad_norm": 2.0308911885535963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354390 + }, + { + "epoch": 1.71877595411101, + "grad_norm": 2.3077864952369964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354400 + }, + { + "epoch": 1.718824452303846, + "grad_norm": 1.7968369903087478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354410 + }, + { + "epoch": 1.718872950496682, + "grad_norm": 2.158428635823384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354420 + }, + { + "epoch": 1.7189214486895183, + "grad_norm": 1.87155766440128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354430 + }, + { + "epoch": 1.7189699468823543, + "grad_norm": 1.6114279688395072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354440 + }, + { + "epoch": 1.7190184450751904, + "grad_norm": 1.7792595841115144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354450 + }, + { + "epoch": 1.7190669432680266, + "grad_norm": 2.2164165613958176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354460 + }, + { + "epoch": 1.7191154414608625, + "grad_norm": 1.5784182849643003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354470 + }, + { + "epoch": 1.7191639396536986, + "grad_norm": 1.6300543137504064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354480 + }, + { + "epoch": 1.7192124378465348, + "grad_norm": 2.020830436322285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354490 + }, + { + "epoch": 1.7192609360393707, + "grad_norm": 1.6929080359773252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354500 + }, + { + "epoch": 1.719309434232207, + "grad_norm": 1.6776148470398766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354510 + }, + { + "epoch": 1.719357932425043, + "grad_norm": 1.6775270950120102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354520 + }, + { + "epoch": 1.7194064306178791, + "grad_norm": 1.6641527267324818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354530 + }, + { + "epoch": 1.7194549288107153, + "grad_norm": 2.4396852538188796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354540 + }, + { + "epoch": 1.7195034270035512, + "grad_norm": 1.7835620980122258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354550 + }, + { + "epoch": 1.7195519251963873, + "grad_norm": 1.738219168601063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354560 + }, + { + "epoch": 1.7196004233892235, + "grad_norm": 1.8483909514088737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354570 + }, + { + "epoch": 1.7196489215820594, + "grad_norm": 1.8527769540810368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354580 + }, + { + "epoch": 1.7196974197748958, + "grad_norm": 1.9004506413011768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354590 + }, + { + "epoch": 1.7197459179677317, + "grad_norm": 1.622168532833257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354600 + }, + { + "epoch": 1.7197944161605678, + "grad_norm": 1.606647792584681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354610 + }, + { + "epoch": 1.719842914353404, + "grad_norm": 1.866178678255892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354620 + }, + { + "epoch": 1.71989141254624, + "grad_norm": 1.6616297671134816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354630 + }, + { + "epoch": 1.719939910739076, + "grad_norm": 1.8403564894242663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354640 + }, + { + "epoch": 1.7199884089319122, + "grad_norm": 1.931884341388468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354650 + }, + { + "epoch": 1.720036907124748, + "grad_norm": 1.7274782493359453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354660 + }, + { + "epoch": 1.7200854053175845, + "grad_norm": 1.8194073803101674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354670 + }, + { + "epoch": 1.7201339035104204, + "grad_norm": 2.3729352705004203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354680 + }, + { + "epoch": 1.7201824017032565, + "grad_norm": 2.024153822333119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354690 + }, + { + "epoch": 1.7202308998960927, + "grad_norm": 1.6111664891127475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354700 + }, + { + "epoch": 1.7202793980889286, + "grad_norm": 1.8954141367544253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354710 + }, + { + "epoch": 1.7203278962817647, + "grad_norm": 1.8258228706713453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354720 + }, + { + "epoch": 1.720376394474601, + "grad_norm": 1.677749672523987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354730 + }, + { + "epoch": 1.720424892667437, + "grad_norm": 1.8909052101889756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354740 + }, + { + "epoch": 1.7204733908602732, + "grad_norm": 1.860794363039986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354750 + }, + { + "epoch": 1.720521889053109, + "grad_norm": 1.743899602502097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354760 + }, + { + "epoch": 1.7205703872459452, + "grad_norm": 2.3011399008510125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354770 + }, + { + "epoch": 1.7206188854387814, + "grad_norm": 1.938681037927381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354780 + }, + { + "epoch": 1.7206673836316173, + "grad_norm": 1.914744096609411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354790 + }, + { + "epoch": 1.7207158818244537, + "grad_norm": 3.7105632344491823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354800 + }, + { + "epoch": 1.7207643800172896, + "grad_norm": 1.6652766277047704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354810 + }, + { + "epoch": 1.7208128782101257, + "grad_norm": 1.4916805568532254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354820 + }, + { + "epoch": 1.7208613764029619, + "grad_norm": 1.568488450232053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354830 + }, + { + "epoch": 1.7209098745957978, + "grad_norm": 1.755127954083946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354840 + }, + { + "epoch": 1.720958372788634, + "grad_norm": 1.964194673576003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354850 + }, + { + "epoch": 1.72100687098147, + "grad_norm": 1.8391034473097534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354860 + }, + { + "epoch": 1.721055369174306, + "grad_norm": 1.942833804946531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354870 + }, + { + "epoch": 1.7211038673671424, + "grad_norm": 1.3977582646873543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354880 + }, + { + "epoch": 1.7211523655599783, + "grad_norm": 1.765500101669204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354890 + }, + { + "epoch": 1.7212008637528144, + "grad_norm": 1.713440056505533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354900 + }, + { + "epoch": 1.7212493619456506, + "grad_norm": 1.7178255262706443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354910 + }, + { + "epoch": 1.7212978601384865, + "grad_norm": 2.4304211976300394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354920 + }, + { + "epoch": 1.7213463583313227, + "grad_norm": 1.694783513528364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354930 + }, + { + "epoch": 1.7213948565241588, + "grad_norm": 1.8207991558938375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354940 + }, + { + "epoch": 1.7214433547169947, + "grad_norm": 1.7368513738347247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354950 + }, + { + "epoch": 1.721491852909831, + "grad_norm": 1.748235867182757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354960 + }, + { + "epoch": 1.721540351102667, + "grad_norm": 1.7083111814031327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354970 + }, + { + "epoch": 1.7215888492955032, + "grad_norm": 1.8239079579984718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354980 + }, + { + "epoch": 1.7216373474883393, + "grad_norm": 1.9700998166172212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 354990 + }, + { + "epoch": 1.7216858456811752, + "grad_norm": 2.0037335346501095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355000 + }, + { + "epoch": 1.7217343438740114, + "grad_norm": 2.984074498613154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355010 + }, + { + "epoch": 1.7217828420668475, + "grad_norm": 1.6215413012332647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355020 + }, + { + "epoch": 1.7218313402596834, + "grad_norm": 1.7135873164875193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355030 + }, + { + "epoch": 1.7218798384525198, + "grad_norm": 1.6709595485053796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355040 + }, + { + "epoch": 1.7219283366453557, + "grad_norm": 2.2335163052389362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355050 + }, + { + "epoch": 1.7219768348381919, + "grad_norm": 1.587653741808026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355060 + }, + { + "epoch": 1.722025333031028, + "grad_norm": 1.6487279097532337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355070 + }, + { + "epoch": 1.722073831223864, + "grad_norm": 1.7023193521481517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355080 + }, + { + "epoch": 1.7221223294167, + "grad_norm": 1.817853600982744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355090 + }, + { + "epoch": 1.7221708276095362, + "grad_norm": 1.839548779969391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355100 + }, + { + "epoch": 1.7222193258023721, + "grad_norm": 1.796519555341547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355110 + }, + { + "epoch": 1.7222678239952085, + "grad_norm": 1.4870817466317021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355120 + }, + { + "epoch": 1.7223163221880444, + "grad_norm": 1.5881118642369074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355130 + }, + { + "epoch": 1.7223648203808806, + "grad_norm": 1.757831569193513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355140 + }, + { + "epoch": 1.7224133185737167, + "grad_norm": 1.4729874209251648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355150 + }, + { + "epoch": 1.7224618167665526, + "grad_norm": 1.8455335037970144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355160 + }, + { + "epoch": 1.7225103149593888, + "grad_norm": 1.8135246193651255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355170 + }, + { + "epoch": 1.722558813152225, + "grad_norm": 1.6706250605125206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355180 + }, + { + "epoch": 1.7226073113450608, + "grad_norm": 1.554955275651082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355190 + }, + { + "epoch": 1.7226558095378972, + "grad_norm": 1.832245111188513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355200 + }, + { + "epoch": 1.7227043077307331, + "grad_norm": 1.5765220240382405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355210 + }, + { + "epoch": 1.7227528059235693, + "grad_norm": 1.637258861819646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355220 + }, + { + "epoch": 1.7228013041164054, + "grad_norm": 1.4852732377335087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355230 + }, + { + "epoch": 1.7228498023092413, + "grad_norm": 1.6373050470974704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355240 + }, + { + "epoch": 1.7228983005020777, + "grad_norm": 1.7419218067971087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355250 + }, + { + "epoch": 1.7229467986949136, + "grad_norm": 1.4812785664730654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355260 + }, + { + "epoch": 1.7229952968877498, + "grad_norm": 1.876482436102833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355270 + }, + { + "epoch": 1.723043795080586, + "grad_norm": 1.43774974148414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355280 + }, + { + "epoch": 1.7230922932734218, + "grad_norm": 1.9558326513902102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355290 + }, + { + "epoch": 1.723140791466258, + "grad_norm": 1.788925452217427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355300 + }, + { + "epoch": 1.7231892896590941, + "grad_norm": 1.836207985661531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355310 + }, + { + "epoch": 1.72323778785193, + "grad_norm": 1.6909840638845708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355320 + }, + { + "epoch": 1.7232862860447664, + "grad_norm": 1.6985826079007893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355330 + }, + { + "epoch": 1.7233347842376023, + "grad_norm": 2.108328800431991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355340 + }, + { + "epoch": 1.7233832824304385, + "grad_norm": 1.4734565567664504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355350 + }, + { + "epoch": 1.7234317806232746, + "grad_norm": 1.5654970653145028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355360 + }, + { + "epoch": 1.7234802788161105, + "grad_norm": 2.026970946644724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355370 + }, + { + "epoch": 1.7235287770089467, + "grad_norm": 1.836815322064922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355380 + }, + { + "epoch": 1.7235772752017828, + "grad_norm": 1.5690932997358686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355390 + }, + { + "epoch": 1.7236257733946188, + "grad_norm": 2.5817259441396345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355400 + }, + { + "epoch": 1.7236742715874551, + "grad_norm": 2.0093457564485107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355410 + }, + { + "epoch": 1.723722769780291, + "grad_norm": 1.8253601297146815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355420 + }, + { + "epoch": 1.7237712679731272, + "grad_norm": 2.1123341298334708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355430 + }, + { + "epoch": 1.7238197661659633, + "grad_norm": 1.9735709955170933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355440 + }, + { + "epoch": 1.7238682643587993, + "grad_norm": 1.8352231734297675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355450 + }, + { + "epoch": 1.7239167625516354, + "grad_norm": 1.5972348776926992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355460 + }, + { + "epoch": 1.7239652607444715, + "grad_norm": 1.895521783978893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355470 + }, + { + "epoch": 1.7240137589373075, + "grad_norm": 1.630289858667311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355480 + }, + { + "epoch": 1.7240622571301438, + "grad_norm": 1.9908350523678564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355490 + }, + { + "epoch": 1.7241107553229797, + "grad_norm": 1.966092888494586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355500 + }, + { + "epoch": 1.724159253515816, + "grad_norm": 1.7175176836303763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355510 + }, + { + "epoch": 1.724207751708652, + "grad_norm": 1.4338892739829134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355520 + }, + { + "epoch": 1.724256249901488, + "grad_norm": 1.6664760238427334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355530 + }, + { + "epoch": 1.724304748094324, + "grad_norm": 1.9471801948611756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355540 + }, + { + "epoch": 1.7243532462871602, + "grad_norm": 1.6326962892776464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355550 + }, + { + "epoch": 1.7244017444799962, + "grad_norm": 2.1836047636725198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355560 + }, + { + "epoch": 1.7244502426728325, + "grad_norm": 1.3876761073561283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355570 + }, + { + "epoch": 1.7244987408656685, + "grad_norm": 1.53497534682856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355580 + }, + { + "epoch": 1.7245472390585046, + "grad_norm": 1.7139027974621968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355590 + }, + { + "epoch": 1.7245957372513407, + "grad_norm": 1.560167817160618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355600 + }, + { + "epoch": 1.7246442354441767, + "grad_norm": 1.7895308346282945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355610 + }, + { + "epoch": 1.7246927336370128, + "grad_norm": 1.5289513655147857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355620 + }, + { + "epoch": 1.724741231829849, + "grad_norm": 1.767855017931197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355630 + }, + { + "epoch": 1.7247897300226849, + "grad_norm": 1.5725538204947043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355640 + }, + { + "epoch": 1.7248382282155212, + "grad_norm": 1.9562996556032886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355650 + }, + { + "epoch": 1.7248867264083572, + "grad_norm": 1.7196366997040968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355660 + }, + { + "epoch": 1.7249352246011933, + "grad_norm": 1.5618260462701983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355670 + }, + { + "epoch": 1.7249837227940295, + "grad_norm": 1.6115540901751046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355680 + }, + { + "epoch": 1.7250322209868654, + "grad_norm": 1.688833428659109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355690 + }, + { + "epoch": 1.7250807191797015, + "grad_norm": 1.8276253399562847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355700 + }, + { + "epoch": 1.7251292173725377, + "grad_norm": 2.0479681950291706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355710 + }, + { + "epoch": 1.7251777155653736, + "grad_norm": 1.7116251527227178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355720 + }, + { + "epoch": 1.72522621375821, + "grad_norm": 1.3341932891819397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355730 + }, + { + "epoch": 1.7252747119510459, + "grad_norm": 1.5994459090507007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355740 + }, + { + "epoch": 1.725323210143882, + "grad_norm": 1.8828348657962124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355750 + }, + { + "epoch": 1.7253717083367182, + "grad_norm": 1.5818649501397886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355760 + }, + { + "epoch": 1.725420206529554, + "grad_norm": 1.9899699665870685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355770 + }, + { + "epoch": 1.7254687047223904, + "grad_norm": 1.50168961710051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355780 + }, + { + "epoch": 1.7255172029152264, + "grad_norm": 1.9046010990564355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355790 + }, + { + "epoch": 1.7255657011080625, + "grad_norm": 1.6952942161196916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355800 + }, + { + "epoch": 1.7256141993008987, + "grad_norm": 1.7503877458580064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355810 + }, + { + "epoch": 1.7256626974937346, + "grad_norm": 1.593805798449921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355820 + }, + { + "epoch": 1.7257111956865707, + "grad_norm": 2.221628392362618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355830 + }, + { + "epoch": 1.7257596938794069, + "grad_norm": 1.855220332913632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355840 + }, + { + "epoch": 1.7258081920722428, + "grad_norm": 1.4855770835708881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355850 + }, + { + "epoch": 1.7258566902650792, + "grad_norm": 1.52924641838581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355860 + }, + { + "epoch": 1.725905188457915, + "grad_norm": 1.903796587043871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355870 + }, + { + "epoch": 1.7259536866507512, + "grad_norm": 1.4800800585135221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355880 + }, + { + "epoch": 1.7260021848435874, + "grad_norm": 1.9685993279949798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355890 + }, + { + "epoch": 1.7260506830364233, + "grad_norm": 1.4174440288172718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355900 + }, + { + "epoch": 1.7260991812292594, + "grad_norm": 1.9704428311229094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355910 + }, + { + "epoch": 1.7261476794220956, + "grad_norm": 1.3829247968999425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355920 + }, + { + "epoch": 1.7261961776149315, + "grad_norm": 1.6718074036248254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355930 + }, + { + "epoch": 1.7262446758077679, + "grad_norm": 1.894784418254858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355940 + }, + { + "epoch": 1.7262931740006038, + "grad_norm": 1.78766637048966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355950 + }, + { + "epoch": 1.72634167219344, + "grad_norm": 1.8290855052782717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355960 + }, + { + "epoch": 1.726390170386276, + "grad_norm": 1.66857141437049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355970 + }, + { + "epoch": 1.726438668579112, + "grad_norm": 1.3761901840325663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355980 + }, + { + "epoch": 1.7264871667719481, + "grad_norm": 1.8963058678878042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 355990 + }, + { + "epoch": 1.7265356649647843, + "grad_norm": 1.499986801434261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356000 + }, + { + "epoch": 1.7265841631576202, + "grad_norm": 1.3824121403160916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356010 + }, + { + "epoch": 1.7266326613504566, + "grad_norm": 1.4564305317321669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356020 + }, + { + "epoch": 1.7266811595432925, + "grad_norm": 1.3920796959610016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356030 + }, + { + "epoch": 1.7267296577361286, + "grad_norm": 1.839256036362258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356040 + }, + { + "epoch": 1.7267781559289648, + "grad_norm": 1.345367550698029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356050 + }, + { + "epoch": 1.7268266541218007, + "grad_norm": 1.4863065445069878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356060 + }, + { + "epoch": 1.7268751523146368, + "grad_norm": 1.5965682109708723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356070 + }, + { + "epoch": 1.726923650507473, + "grad_norm": 1.744580302442955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356080 + }, + { + "epoch": 1.726972148700309, + "grad_norm": 1.9198569845002567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356090 + }, + { + "epoch": 1.7270206468931453, + "grad_norm": 1.7327678847323114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356100 + }, + { + "epoch": 1.7270691450859812, + "grad_norm": 1.672395022467299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356110 + }, + { + "epoch": 1.7271176432788173, + "grad_norm": 1.494099066690069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356120 + }, + { + "epoch": 1.7271661414716535, + "grad_norm": 1.4687581817440787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356130 + }, + { + "epoch": 1.7272146396644894, + "grad_norm": 1.6720740347864194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356140 + }, + { + "epoch": 1.7272631378573255, + "grad_norm": 2.4003437459896304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356150 + }, + { + "epoch": 1.7273116360501617, + "grad_norm": 1.733737953202308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356160 + }, + { + "epoch": 1.7273601342429976, + "grad_norm": 1.642217206665464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356170 + }, + { + "epoch": 1.727408632435834, + "grad_norm": 1.4751805998969303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356180 + }, + { + "epoch": 1.72745713062867, + "grad_norm": 1.6206525899065127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356190 + }, + { + "epoch": 1.727505628821506, + "grad_norm": 1.536716354166856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356200 + }, + { + "epoch": 1.7275541270143422, + "grad_norm": 1.948638761462007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356210 + }, + { + "epoch": 1.727602625207178, + "grad_norm": 1.592144549533714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356220 + }, + { + "epoch": 1.7276511234000143, + "grad_norm": 1.305752661551196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356230 + }, + { + "epoch": 1.7276996215928504, + "grad_norm": 1.4499245359900215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356240 + }, + { + "epoch": 1.7277481197856863, + "grad_norm": 1.4172147011493053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356250 + }, + { + "epoch": 1.7277966179785227, + "grad_norm": 1.6764101218313954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356260 + }, + { + "epoch": 1.7278451161713586, + "grad_norm": 1.501302016038153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356270 + }, + { + "epoch": 1.7278936143641948, + "grad_norm": 2.336375182210304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356280 + }, + { + "epoch": 1.727942112557031, + "grad_norm": 1.991556253244653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356290 + }, + { + "epoch": 1.7279906107498668, + "grad_norm": 1.548652406313522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356300 + }, + { + "epoch": 1.7280391089427032, + "grad_norm": 1.556735007568477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356310 + }, + { + "epoch": 1.728087607135539, + "grad_norm": 1.886808043138899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356320 + }, + { + "epoch": 1.7281361053283752, + "grad_norm": 1.4901672784617404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356330 + }, + { + "epoch": 1.7281846035212114, + "grad_norm": 1.580776753939972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356340 + }, + { + "epoch": 1.7282331017140473, + "grad_norm": 1.4761021738252111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356350 + }, + { + "epoch": 1.7282815999068835, + "grad_norm": 1.8136935509005525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356360 + }, + { + "epoch": 1.7283300980997196, + "grad_norm": 1.6355812704205164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356370 + }, + { + "epoch": 1.7283785962925555, + "grad_norm": 1.49899488377514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356380 + }, + { + "epoch": 1.728427094485392, + "grad_norm": 1.6697427440703905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356390 + }, + { + "epoch": 1.7284755926782278, + "grad_norm": 1.461329102170339e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356400 + }, + { + "epoch": 1.728524090871064, + "grad_norm": 1.6772297328770946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356410 + }, + { + "epoch": 1.7285725890639, + "grad_norm": 1.4240940870990926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356420 + }, + { + "epoch": 1.728621087256736, + "grad_norm": 1.5329092661886534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356430 + }, + { + "epoch": 1.7286695854495722, + "grad_norm": 1.6348076670169576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356440 + }, + { + "epoch": 1.7287180836424083, + "grad_norm": 1.8208675456321544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356450 + }, + { + "epoch": 1.7287665818352442, + "grad_norm": 1.5615910342603456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356460 + }, + { + "epoch": 1.7288150800280806, + "grad_norm": 1.51912935564269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356470 + }, + { + "epoch": 1.7288635782209165, + "grad_norm": 1.3568587142742672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356480 + }, + { + "epoch": 1.7289120764137527, + "grad_norm": 1.5692924293375654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356490 + }, + { + "epoch": 1.7289605746065888, + "grad_norm": 1.436550434164019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356500 + }, + { + "epoch": 1.7290090727994247, + "grad_norm": 1.4703169348706524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356510 + }, + { + "epoch": 1.7290575709922609, + "grad_norm": 1.7391419859791313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356520 + }, + { + "epoch": 1.729106069185097, + "grad_norm": 1.4249345703376548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356530 + }, + { + "epoch": 1.729154567377933, + "grad_norm": 1.643653035898751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356540 + }, + { + "epoch": 1.7292030655707693, + "grad_norm": 1.5426797617124066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356550 + }, + { + "epoch": 1.7292515637636052, + "grad_norm": 1.783973324620547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356560 + }, + { + "epoch": 1.7293000619564414, + "grad_norm": 1.7037766752991956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356570 + }, + { + "epoch": 1.7293485601492775, + "grad_norm": 1.540258942611672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356580 + }, + { + "epoch": 1.7293970583421134, + "grad_norm": 1.8866645135062754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356590 + }, + { + "epoch": 1.7294455565349496, + "grad_norm": 1.3144350496929746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356600 + }, + { + "epoch": 1.7294940547277857, + "grad_norm": 1.3273686150228059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356610 + }, + { + "epoch": 1.7295425529206216, + "grad_norm": 1.5469357350639257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356620 + }, + { + "epoch": 1.729591051113458, + "grad_norm": 1.3716442204270152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356630 + }, + { + "epoch": 1.729639549306294, + "grad_norm": 1.7341886149324637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356640 + }, + { + "epoch": 1.72968804749913, + "grad_norm": 1.6147803094668234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356650 + }, + { + "epoch": 1.7297365456919662, + "grad_norm": 1.686016659618872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356660 + }, + { + "epoch": 1.7297850438848021, + "grad_norm": 1.3875931514917283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356670 + }, + { + "epoch": 1.7298335420776383, + "grad_norm": 1.523101111899905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356680 + }, + { + "epoch": 1.7298820402704744, + "grad_norm": 1.5033682743137433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356690 + }, + { + "epoch": 1.7299305384633104, + "grad_norm": 1.7092038007149313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356700 + }, + { + "epoch": 1.7299790366561467, + "grad_norm": 1.6512393230527778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356710 + }, + { + "epoch": 1.7300275348489826, + "grad_norm": 1.5433670341735706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356720 + }, + { + "epoch": 1.7300760330418188, + "grad_norm": 1.40535938442099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356730 + }, + { + "epoch": 1.730124531234655, + "grad_norm": 1.6412526449016696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356740 + }, + { + "epoch": 1.7301730294274908, + "grad_norm": 1.808304972428232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356750 + }, + { + "epoch": 1.730221527620327, + "grad_norm": 1.4078192833721914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356760 + }, + { + "epoch": 1.7302700258131631, + "grad_norm": 1.4085514088435502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356770 + }, + { + "epoch": 1.7303185240059993, + "grad_norm": 1.7153372056100125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356780 + }, + { + "epoch": 1.7303670221988354, + "grad_norm": 1.381172864967084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356790 + }, + { + "epoch": 1.7304155203916713, + "grad_norm": 1.6380335310373084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356800 + }, + { + "epoch": 1.7304640185845075, + "grad_norm": 1.60102384683114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356810 + }, + { + "epoch": 1.7305125167773436, + "grad_norm": 1.530627002921392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356820 + }, + { + "epoch": 1.7305610149701796, + "grad_norm": 1.6975906902416682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356830 + }, + { + "epoch": 1.730609513163016, + "grad_norm": 2.142893684720093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356840 + }, + { + "epoch": 1.7306580113558518, + "grad_norm": 1.6644312594848998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356850 + }, + { + "epoch": 1.730706509548688, + "grad_norm": 1.5260383179338532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356860 + }, + { + "epoch": 1.7307550077415241, + "grad_norm": 1.7448430256195024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356870 + }, + { + "epoch": 1.73080350593436, + "grad_norm": 1.4091983580044598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356880 + }, + { + "epoch": 1.7308520041271962, + "grad_norm": 2.0972391823192993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356890 + }, + { + "epoch": 1.7309005023200323, + "grad_norm": 1.2924473935527203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356900 + }, + { + "epoch": 1.7309490005128683, + "grad_norm": 1.6884143860806944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356910 + }, + { + "epoch": 1.7309974987057046, + "grad_norm": 1.5939669140152546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356920 + }, + { + "epoch": 1.7310459968985406, + "grad_norm": 1.796590076708071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356930 + }, + { + "epoch": 1.7310944950913767, + "grad_norm": 1.5350158477644982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356940 + }, + { + "epoch": 1.7311429932842128, + "grad_norm": 1.4620355592853684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356950 + }, + { + "epoch": 1.7311914914770488, + "grad_norm": 1.4352274924078756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356960 + }, + { + "epoch": 1.731239989669885, + "grad_norm": 1.3818269195553512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356970 + }, + { + "epoch": 1.731288487862721, + "grad_norm": 1.6451149775775775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356980 + }, + { + "epoch": 1.731336986055557, + "grad_norm": 1.998273368997161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 356990 + }, + { + "epoch": 1.7313854842483933, + "grad_norm": 1.548407269069685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357000 + }, + { + "epoch": 1.7314339824412293, + "grad_norm": 1.3507275298252353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357010 + }, + { + "epoch": 1.7314824806340654, + "grad_norm": 1.7122097517585644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357020 + }, + { + "epoch": 1.7315309788269015, + "grad_norm": 1.6367156518981574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357030 + }, + { + "epoch": 1.7315794770197375, + "grad_norm": 1.8908055565702853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357040 + }, + { + "epoch": 1.7316279752125736, + "grad_norm": 1.5365500871666882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357050 + }, + { + "epoch": 1.7316764734054098, + "grad_norm": 1.6175375705529405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357060 + }, + { + "epoch": 1.7317249715982457, + "grad_norm": 1.418565886979195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357070 + }, + { + "epoch": 1.731773469791082, + "grad_norm": 1.780759895098072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357080 + }, + { + "epoch": 1.731821967983918, + "grad_norm": 1.3998016967775584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357090 + }, + { + "epoch": 1.731870466176754, + "grad_norm": 1.3330622827822936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357100 + }, + { + "epoch": 1.7319189643695903, + "grad_norm": 1.7291625908910646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357110 + }, + { + "epoch": 1.7319674625624262, + "grad_norm": 1.5327998426073464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357120 + }, + { + "epoch": 1.7320159607552623, + "grad_norm": 1.4707087991894241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357130 + }, + { + "epoch": 1.7320644589480985, + "grad_norm": 1.621821432706838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357140 + }, + { + "epoch": 1.7321129571409344, + "grad_norm": 1.9828743091920842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357150 + }, + { + "epoch": 1.7321614553337707, + "grad_norm": 1.9875830759019664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357160 + }, + { + "epoch": 1.7322099535266067, + "grad_norm": 1.4575341822364862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357170 + }, + { + "epoch": 1.7322584517194428, + "grad_norm": 1.655977754921878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357180 + }, + { + "epoch": 1.732306949912279, + "grad_norm": 1.7882253899870193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357190 + }, + { + "epoch": 1.7323554481051149, + "grad_norm": 1.9980081589210386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357200 + }, + { + "epoch": 1.732403946297951, + "grad_norm": 1.6961111626301317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357210 + }, + { + "epoch": 1.7324524444907872, + "grad_norm": 1.689301498686291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357220 + }, + { + "epoch": 1.732500942683623, + "grad_norm": 1.3287429823094499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357230 + }, + { + "epoch": 1.7325494408764595, + "grad_norm": 1.4708497531046305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357240 + }, + { + "epoch": 1.7325979390692954, + "grad_norm": 1.5111801587863738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357250 + }, + { + "epoch": 1.7326464372621315, + "grad_norm": 1.5512965134689694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357260 + }, + { + "epoch": 1.7326949354549677, + "grad_norm": 1.522304415857434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357270 + }, + { + "epoch": 1.7327434336478036, + "grad_norm": 1.473217903225077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357280 + }, + { + "epoch": 1.73279193184064, + "grad_norm": 1.3591088254827355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357290 + }, + { + "epoch": 1.7328404300334759, + "grad_norm": 1.4932949099488724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357300 + }, + { + "epoch": 1.732888928226312, + "grad_norm": 1.553285322586362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357310 + }, + { + "epoch": 1.7329374264191482, + "grad_norm": 1.6070053732164524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357320 + }, + { + "epoch": 1.732985924611984, + "grad_norm": 1.4419964777800942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357330 + }, + { + "epoch": 1.7330344228048202, + "grad_norm": 1.7775004579334563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357340 + }, + { + "epoch": 1.7330829209976564, + "grad_norm": 1.5783223616949726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357350 + }, + { + "epoch": 1.7331314191904923, + "grad_norm": 1.631006618652009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357360 + }, + { + "epoch": 1.7331799173833287, + "grad_norm": 1.606476907056731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357370 + }, + { + "epoch": 1.7332284155761646, + "grad_norm": 1.4317780738792862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357380 + }, + { + "epoch": 1.7332769137690007, + "grad_norm": 2.2978454694566608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357390 + }, + { + "epoch": 1.7333254119618369, + "grad_norm": 1.7373279703747357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357400 + }, + { + "epoch": 1.7333739101546728, + "grad_norm": 1.238021685168178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357410 + }, + { + "epoch": 1.733422408347509, + "grad_norm": 1.5782767093242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357420 + }, + { + "epoch": 1.733470906540345, + "grad_norm": 1.562600360216493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357430 + }, + { + "epoch": 1.733519404733181, + "grad_norm": 1.776420610610785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357440 + }, + { + "epoch": 1.7335679029260174, + "grad_norm": 1.8820594860358142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357450 + }, + { + "epoch": 1.7336164011188533, + "grad_norm": 1.6693757487473704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357460 + }, + { + "epoch": 1.7336648993116894, + "grad_norm": 1.4303316753228046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357470 + }, + { + "epoch": 1.7337133975045256, + "grad_norm": 1.4093264333325806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357480 + }, + { + "epoch": 1.7337618956973615, + "grad_norm": 1.3478310911807512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357490 + }, + { + "epoch": 1.7338103938901976, + "grad_norm": 1.5229280947437474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357500 + }, + { + "epoch": 1.7338588920830338, + "grad_norm": 1.781719483062716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357510 + }, + { + "epoch": 1.7339073902758697, + "grad_norm": 1.5160825483917506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357520 + }, + { + "epoch": 1.733955888468706, + "grad_norm": 2.4087309924425426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357530 + }, + { + "epoch": 1.734004386661542, + "grad_norm": 1.9675960416520866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357540 + }, + { + "epoch": 1.7340528848543781, + "grad_norm": 1.7741269786597513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357550 + }, + { + "epoch": 1.7341013830472143, + "grad_norm": 1.8313633276534347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357560 + }, + { + "epoch": 1.7341498812400502, + "grad_norm": 1.7815663611031596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357570 + }, + { + "epoch": 1.7341983794328863, + "grad_norm": 1.6044408468474103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357580 + }, + { + "epoch": 1.7342468776257225, + "grad_norm": 1.5535615460748886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357590 + }, + { + "epoch": 1.7342953758185584, + "grad_norm": 1.4380357349352835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357600 + }, + { + "epoch": 1.7343438740113948, + "grad_norm": 1.5853546031507904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357610 + }, + { + "epoch": 1.7343923722042307, + "grad_norm": 1.3751953353846602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357620 + }, + { + "epoch": 1.7344408703970668, + "grad_norm": 1.7070403757202257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357630 + }, + { + "epoch": 1.734489368589903, + "grad_norm": 1.6923896950515882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357640 + }, + { + "epoch": 1.734537866782739, + "grad_norm": 1.5270325448568656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357650 + }, + { + "epoch": 1.734586364975575, + "grad_norm": 1.4646745150059814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357660 + }, + { + "epoch": 1.7346348631684112, + "grad_norm": 1.4433265249635951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357670 + }, + { + "epoch": 1.7346833613612471, + "grad_norm": 1.7830791065875928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357680 + }, + { + "epoch": 1.7347318595540835, + "grad_norm": 1.5096762950861375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357690 + }, + { + "epoch": 1.7347803577469194, + "grad_norm": 1.7466625479301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357700 + }, + { + "epoch": 1.7348288559397556, + "grad_norm": 1.4981409890424402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357710 + }, + { + "epoch": 1.7348773541325917, + "grad_norm": 1.5184520307798266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357720 + }, + { + "epoch": 1.7349258523254276, + "grad_norm": 1.5550137177910983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357730 + }, + { + "epoch": 1.7349743505182638, + "grad_norm": 1.6722459861284733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357740 + }, + { + "epoch": 1.7350228487111, + "grad_norm": 1.4837014283841654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357750 + }, + { + "epoch": 1.7350713469039358, + "grad_norm": 1.5458279989388757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357760 + }, + { + "epoch": 1.7351198450967722, + "grad_norm": 1.4273861204117111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357770 + }, + { + "epoch": 1.7351683432896081, + "grad_norm": 1.4914579793412486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357780 + }, + { + "epoch": 1.7352168414824443, + "grad_norm": 1.241315050748426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357790 + }, + { + "epoch": 1.7352653396752804, + "grad_norm": 1.9888096502995722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357800 + }, + { + "epoch": 1.7353138378681163, + "grad_norm": 1.6230204735734333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357810 + }, + { + "epoch": 1.7353623360609527, + "grad_norm": 1.4900577660625913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357820 + }, + { + "epoch": 1.7354108342537886, + "grad_norm": 1.6271702207859562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357830 + }, + { + "epoch": 1.7354593324466248, + "grad_norm": 1.6337748931505303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357840 + }, + { + "epoch": 1.735507830639461, + "grad_norm": 1.6172645445067246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357850 + }, + { + "epoch": 1.7355563288322968, + "grad_norm": 1.4081662058629263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357860 + }, + { + "epoch": 1.735604827025133, + "grad_norm": 1.3922643482544572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357870 + }, + { + "epoch": 1.7356533252179691, + "grad_norm": 1.471212485171236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357880 + }, + { + "epoch": 1.735701823410805, + "grad_norm": 1.813680228224257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357890 + }, + { + "epoch": 1.7357503216036414, + "grad_norm": 1.4809844905983027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357900 + }, + { + "epoch": 1.7357988197964773, + "grad_norm": 1.666204774153357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357910 + }, + { + "epoch": 1.7358473179893135, + "grad_norm": 3.3807186383683074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357920 + }, + { + "epoch": 1.7358958161821496, + "grad_norm": 1.5401052877450638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357930 + }, + { + "epoch": 1.7359443143749855, + "grad_norm": 1.6674645664238597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357940 + }, + { + "epoch": 1.7359928125678217, + "grad_norm": 1.7483587910760434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357950 + }, + { + "epoch": 1.7360413107606578, + "grad_norm": 1.6275183867264786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357960 + }, + { + "epoch": 1.7360898089534937, + "grad_norm": 1.7934791429752295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357970 + }, + { + "epoch": 1.73613830714633, + "grad_norm": 1.501138946480296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357980 + }, + { + "epoch": 1.736186805339166, + "grad_norm": 1.2491980783124745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 357990 + }, + { + "epoch": 1.7362353035320022, + "grad_norm": 1.5314110868303032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358000 + }, + { + "epoch": 1.7362838017248383, + "grad_norm": 1.4548323434837585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358010 + }, + { + "epoch": 1.7363322999176742, + "grad_norm": 1.1923906306776644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358020 + }, + { + "epoch": 1.7363807981105104, + "grad_norm": 1.5337949577087784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358030 + }, + { + "epoch": 1.7364292963033465, + "grad_norm": 1.735988774953512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358040 + }, + { + "epoch": 1.7364777944961824, + "grad_norm": 1.2869545429339269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358050 + }, + { + "epoch": 1.7365262926890188, + "grad_norm": 1.3120263098187479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358060 + }, + { + "epoch": 1.7365747908818547, + "grad_norm": 1.3759233752352884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358070 + }, + { + "epoch": 1.7366232890746909, + "grad_norm": 1.3201171711330062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358080 + }, + { + "epoch": 1.736671787267527, + "grad_norm": 1.4345840071428029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358090 + }, + { + "epoch": 1.736720285460363, + "grad_norm": 1.738364119319158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358100 + }, + { + "epoch": 1.736768783653199, + "grad_norm": 2.6534321406757044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358110 + }, + { + "epoch": 1.7368172818460352, + "grad_norm": 2.1761476176607175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358120 + }, + { + "epoch": 1.7368657800388712, + "grad_norm": 1.4656258429113223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358130 + }, + { + "epoch": 1.7369142782317075, + "grad_norm": 1.4733555708801305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358140 + }, + { + "epoch": 1.7369627764245434, + "grad_norm": 1.4827205241374486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358150 + }, + { + "epoch": 1.7370112746173796, + "grad_norm": 1.5173741374496785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358160 + }, + { + "epoch": 1.7370597728102157, + "grad_norm": 1.6611867437177352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358170 + }, + { + "epoch": 1.7371082710030517, + "grad_norm": 1.9098310488629977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358180 + }, + { + "epoch": 1.7371567691958878, + "grad_norm": 1.5887309245954384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358190 + }, + { + "epoch": 1.737205267388724, + "grad_norm": 1.6399258839783215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358200 + }, + { + "epoch": 1.7372537655815599, + "grad_norm": 1.3431040279954232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358210 + }, + { + "epoch": 1.7373022637743962, + "grad_norm": 1.6301498817483662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358220 + }, + { + "epoch": 1.7373507619672321, + "grad_norm": 1.2965313267443435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358230 + }, + { + "epoch": 1.7373992601600683, + "grad_norm": 1.4601218900622825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358240 + }, + { + "epoch": 1.7374477583529044, + "grad_norm": 2.315130309682445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358250 + }, + { + "epoch": 1.7374962565457404, + "grad_norm": 1.713345554321677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358260 + }, + { + "epoch": 1.7375447547385765, + "grad_norm": 1.349419065377333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358270 + }, + { + "epoch": 1.7375932529314126, + "grad_norm": 1.3815088628632566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358280 + }, + { + "epoch": 1.7376417511242486, + "grad_norm": 1.9330931522176797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358290 + }, + { + "epoch": 1.737690249317085, + "grad_norm": 1.8491201458914475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358300 + }, + { + "epoch": 1.7377387475099209, + "grad_norm": 1.3448561375639656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358310 + }, + { + "epoch": 1.737787245702757, + "grad_norm": 1.7864717705151634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358320 + }, + { + "epoch": 1.7378357438955931, + "grad_norm": 1.2146005978763696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358330 + }, + { + "epoch": 1.737884242088429, + "grad_norm": 2.417174549407264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358340 + }, + { + "epoch": 1.7379327402812654, + "grad_norm": 1.3789055230972735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358350 + }, + { + "epoch": 1.7379812384741014, + "grad_norm": 1.33197692875342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358360 + }, + { + "epoch": 1.7380297366669375, + "grad_norm": 1.3019326061680658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358370 + }, + { + "epoch": 1.7380782348597736, + "grad_norm": 1.3691519917813366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358380 + }, + { + "epoch": 1.7381267330526096, + "grad_norm": 1.4892782118636205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358390 + }, + { + "epoch": 1.7381752312454457, + "grad_norm": 1.7943344587934007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358400 + }, + { + "epoch": 1.7382237294382819, + "grad_norm": 1.4348158217103446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358410 + }, + { + "epoch": 1.7382722276311178, + "grad_norm": 1.4081412480493327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358420 + }, + { + "epoch": 1.7383207258239541, + "grad_norm": 1.424703111041481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358430 + }, + { + "epoch": 1.73836922401679, + "grad_norm": 2.6499392902223917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358440 + }, + { + "epoch": 1.7384177222096262, + "grad_norm": 1.3482495120342719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358450 + }, + { + "epoch": 1.7384662204024623, + "grad_norm": 1.506055546940388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358460 + }, + { + "epoch": 1.7385147185952983, + "grad_norm": 1.8032144666335626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358470 + }, + { + "epoch": 1.7385632167881344, + "grad_norm": 2.1570379260538175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358480 + }, + { + "epoch": 1.7386117149809706, + "grad_norm": 1.2666695248242377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358490 + }, + { + "epoch": 1.7386602131738065, + "grad_norm": 2.1706769715024166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358500 + }, + { + "epoch": 1.7387087113666428, + "grad_norm": 1.0871121780553494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358510 + }, + { + "epoch": 1.7387572095594788, + "grad_norm": 1.5801033370621553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358520 + }, + { + "epoch": 1.738805707752315, + "grad_norm": 1.5688240040390156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358530 + }, + { + "epoch": 1.738854205945151, + "grad_norm": 1.6496191079795608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358540 + }, + { + "epoch": 1.738902704137987, + "grad_norm": 1.4443775953054683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358550 + }, + { + "epoch": 1.7389512023308231, + "grad_norm": 1.3293580458650922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358560 + }, + { + "epoch": 1.7389997005236593, + "grad_norm": 1.465594223759581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358570 + }, + { + "epoch": 1.7390481987164952, + "grad_norm": 1.2897214851648187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358580 + }, + { + "epoch": 1.7390966969093316, + "grad_norm": 1.2690156481198756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358590 + }, + { + "epoch": 1.7391451951021675, + "grad_norm": 1.760811407791607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358600 + }, + { + "epoch": 1.7391936932950036, + "grad_norm": 1.648480996152557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358610 + }, + { + "epoch": 1.7392421914878398, + "grad_norm": 1.7386849293643536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358620 + }, + { + "epoch": 1.7392906896806757, + "grad_norm": 1.2892783729512303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358630 + }, + { + "epoch": 1.7393391878735118, + "grad_norm": 1.4748176013767988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358640 + }, + { + "epoch": 1.739387686066348, + "grad_norm": 1.614805356098259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358650 + }, + { + "epoch": 1.739436184259184, + "grad_norm": 1.6696620974698817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358660 + }, + { + "epoch": 1.7394846824520203, + "grad_norm": 1.4097638612042829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358670 + }, + { + "epoch": 1.7395331806448562, + "grad_norm": 1.529042670256331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358680 + }, + { + "epoch": 1.7395816788376923, + "grad_norm": 1.2704526319851084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358690 + }, + { + "epoch": 1.7396301770305285, + "grad_norm": 1.630153079190677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358700 + }, + { + "epoch": 1.7396786752233644, + "grad_norm": 1.3108540031225857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358710 + }, + { + "epoch": 1.7397271734162005, + "grad_norm": 1.500412949440033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358720 + }, + { + "epoch": 1.7397756716090367, + "grad_norm": 1.509328129145615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358730 + }, + { + "epoch": 1.7398241698018726, + "grad_norm": 1.688072259753426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358740 + }, + { + "epoch": 1.739872667994709, + "grad_norm": 2.016452782527267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358750 + }, + { + "epoch": 1.7399211661875449, + "grad_norm": 1.5155912080899725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358760 + }, + { + "epoch": 1.739969664380381, + "grad_norm": 1.3978366908418138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358770 + }, + { + "epoch": 1.7400181625732172, + "grad_norm": 1.4142233162317552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358780 + }, + { + "epoch": 1.740066660766053, + "grad_norm": 1.2762312096015194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358790 + }, + { + "epoch": 1.7401151589588892, + "grad_norm": 1.597380716589214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358800 + }, + { + "epoch": 1.7401636571517254, + "grad_norm": 1.5719967549898684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358810 + }, + { + "epoch": 1.7402121553445615, + "grad_norm": 1.4783342550117595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358820 + }, + { + "epoch": 1.7402606535373977, + "grad_norm": 1.4911000434381094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358830 + }, + { + "epoch": 1.7403091517302336, + "grad_norm": 2.0543325263133738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358840 + }, + { + "epoch": 1.7403576499230697, + "grad_norm": 1.883822164927551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358850 + }, + { + "epoch": 1.7404061481159059, + "grad_norm": 2.0174015347151908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358860 + }, + { + "epoch": 1.7404546463087418, + "grad_norm": 1.5746079995437867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358870 + }, + { + "epoch": 1.7405031445015782, + "grad_norm": 1.2707783270116124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358880 + }, + { + "epoch": 1.740551642694414, + "grad_norm": 1.690212947380587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358890 + }, + { + "epoch": 1.7406001408872502, + "grad_norm": 1.642613689512018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358900 + }, + { + "epoch": 1.7406486390800864, + "grad_norm": 1.3166497225824969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358910 + }, + { + "epoch": 1.7406971372729223, + "grad_norm": 1.6237391875506546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358920 + }, + { + "epoch": 1.7407456354657584, + "grad_norm": 1.6668277424969347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358930 + }, + { + "epoch": 1.7407941336585946, + "grad_norm": 1.590488807323709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358940 + }, + { + "epoch": 1.7408426318514305, + "grad_norm": 1.2353127409880926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358950 + }, + { + "epoch": 1.7408911300442669, + "grad_norm": 1.7983548872280153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358960 + }, + { + "epoch": 1.7409396282371028, + "grad_norm": 1.3588892677773856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358970 + }, + { + "epoch": 1.740988126429939, + "grad_norm": 1.7244664363147422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358980 + }, + { + "epoch": 1.741036624622775, + "grad_norm": 1.2646298230833963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 358990 + }, + { + "epoch": 1.741085122815611, + "grad_norm": 1.4527036462652632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359000 + }, + { + "epoch": 1.7411336210084472, + "grad_norm": 1.556361972632203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359010 + }, + { + "epoch": 1.7411821192012833, + "grad_norm": 1.3623727035394495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359020 + }, + { + "epoch": 1.7412306173941192, + "grad_norm": 1.5985605728019436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359030 + }, + { + "epoch": 1.7412791155869556, + "grad_norm": 1.4313155105583064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359040 + }, + { + "epoch": 1.7413276137797915, + "grad_norm": 1.2409998362272745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359050 + }, + { + "epoch": 1.7413761119726276, + "grad_norm": 1.13726015271709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359060 + }, + { + "epoch": 1.7414246101654638, + "grad_norm": 1.6395619084619284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359070 + }, + { + "epoch": 1.7414731083582997, + "grad_norm": 1.62921409696537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359080 + }, + { + "epoch": 1.7415216065511359, + "grad_norm": 1.4117217617126698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359090 + }, + { + "epoch": 1.741570104743972, + "grad_norm": 1.695606677287742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359100 + }, + { + "epoch": 1.741618602936808, + "grad_norm": 1.2500421142647156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359110 + }, + { + "epoch": 1.7416671011296443, + "grad_norm": 1.1173463931868355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359120 + }, + { + "epoch": 1.7417155993224802, + "grad_norm": 1.1501008145842206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359130 + }, + { + "epoch": 1.7417640975153164, + "grad_norm": 1.3094681783343276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359140 + }, + { + "epoch": 1.7418125957081525, + "grad_norm": 1.3930181452792567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359150 + }, + { + "epoch": 1.7418610939009884, + "grad_norm": 1.6337692088086442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359160 + }, + { + "epoch": 1.7419095920938246, + "grad_norm": 1.3623071559720756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359170 + }, + { + "epoch": 1.7419580902866607, + "grad_norm": 1.2983882413664105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359180 + }, + { + "epoch": 1.7420065884794966, + "grad_norm": 1.6491069843027617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359190 + }, + { + "epoch": 1.742055086672333, + "grad_norm": 1.4802797210222707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359200 + }, + { + "epoch": 1.742103584865169, + "grad_norm": 1.1089757556703717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359210 + }, + { + "epoch": 1.742152083058005, + "grad_norm": 1.5919210838433173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359220 + }, + { + "epoch": 1.7422005812508412, + "grad_norm": 1.3157123390783454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359230 + }, + { + "epoch": 1.7422490794436771, + "grad_norm": 1.3632305950750379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359240 + }, + { + "epoch": 1.7422975776365133, + "grad_norm": 1.834001750466996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359250 + }, + { + "epoch": 1.7423460758293494, + "grad_norm": 1.340711452968435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359260 + }, + { + "epoch": 1.7423945740221853, + "grad_norm": 1.4241641643764069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359270 + }, + { + "epoch": 1.7424430722150217, + "grad_norm": 1.8375098775891274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359280 + }, + { + "epoch": 1.7424915704078576, + "grad_norm": 2.076273020179542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359290 + }, + { + "epoch": 1.7425400686006938, + "grad_norm": 1.4036321438481991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359300 + }, + { + "epoch": 1.74258856679353, + "grad_norm": 1.1961730272957993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359310 + }, + { + "epoch": 1.7426370649863658, + "grad_norm": 1.2126778692334028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359320 + }, + { + "epoch": 1.742685563179202, + "grad_norm": 1.2828321516167307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359330 + }, + { + "epoch": 1.7427340613720381, + "grad_norm": 1.2592595410865215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359340 + }, + { + "epoch": 1.7427825595648743, + "grad_norm": 1.4733607223149647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359350 + }, + { + "epoch": 1.7428310577577104, + "grad_norm": 1.0539436878787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359360 + }, + { + "epoch": 1.7428795559505463, + "grad_norm": 1.3153781175390122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359370 + }, + { + "epoch": 1.7429280541433825, + "grad_norm": 1.2014242045665924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359380 + }, + { + "epoch": 1.7429765523362186, + "grad_norm": 2.277992194876788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359390 + }, + { + "epoch": 1.7430250505290545, + "grad_norm": 1.2920347458589276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359400 + }, + { + "epoch": 1.743073548721891, + "grad_norm": 1.5586202550821326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359410 + }, + { + "epoch": 1.7431220469147268, + "grad_norm": 1.2268145610505599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359420 + }, + { + "epoch": 1.743170545107563, + "grad_norm": 1.4936972547729965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359430 + }, + { + "epoch": 1.7432190433003991, + "grad_norm": 1.5583550450060102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359440 + }, + { + "epoch": 1.743267541493235, + "grad_norm": 1.6703044281030088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359450 + }, + { + "epoch": 1.7433160396860712, + "grad_norm": 1.2618741607184347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359460 + }, + { + "epoch": 1.7433645378789073, + "grad_norm": 1.7108696681589208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359470 + }, + { + "epoch": 1.7434130360717432, + "grad_norm": 1.2550382066933707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359480 + }, + { + "epoch": 1.7434615342645796, + "grad_norm": 1.616167111251343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359490 + }, + { + "epoch": 1.7435100324574155, + "grad_norm": 1.2307427077473676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359500 + }, + { + "epoch": 1.7435585306502517, + "grad_norm": 1.6151499693251026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359510 + }, + { + "epoch": 1.7436070288430878, + "grad_norm": 1.5941161279897642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359520 + }, + { + "epoch": 1.7436555270359237, + "grad_norm": 1.274265848394407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359530 + }, + { + "epoch": 1.74370402522876, + "grad_norm": 1.6706348304751373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359540 + }, + { + "epoch": 1.743752523421596, + "grad_norm": 1.3096378204124903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359550 + }, + { + "epoch": 1.743801021614432, + "grad_norm": 1.1968763757863599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359560 + }, + { + "epoch": 1.7438495198072683, + "grad_norm": 1.500463575609956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359570 + }, + { + "epoch": 1.7438980180001042, + "grad_norm": 1.693873841190907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359580 + }, + { + "epoch": 1.7439465161929404, + "grad_norm": 1.413471650835163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359590 + }, + { + "epoch": 1.7439950143857765, + "grad_norm": 1.1556913648291811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359600 + }, + { + "epoch": 1.7440435125786125, + "grad_norm": 1.2141963878775641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359610 + }, + { + "epoch": 1.7440920107714486, + "grad_norm": 1.7028735754820445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359620 + }, + { + "epoch": 1.7441405089642847, + "grad_norm": 1.532845139706751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359630 + }, + { + "epoch": 1.7441890071571207, + "grad_norm": 1.8879934060578307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359640 + }, + { + "epoch": 1.744237505349957, + "grad_norm": 1.2350098721469749e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359650 + }, + { + "epoch": 1.744286003542793, + "grad_norm": 1.0698226304839409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359660 + }, + { + "epoch": 1.744334501735629, + "grad_norm": 1.343747069171286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359670 + }, + { + "epoch": 1.7443829999284652, + "grad_norm": 1.2200128907124963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359680 + }, + { + "epoch": 1.7444314981213012, + "grad_norm": 1.5225365856963435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359690 + }, + { + "epoch": 1.7444799963141373, + "grad_norm": 1.381474934447624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359700 + }, + { + "epoch": 1.7445284945069734, + "grad_norm": 1.2080188405150238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359710 + }, + { + "epoch": 1.7445769926998094, + "grad_norm": 1.1853717118981422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359720 + }, + { + "epoch": 1.7446254908926457, + "grad_norm": 1.5071444536829404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359730 + }, + { + "epoch": 1.7446739890854817, + "grad_norm": 1.2694423290326995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359740 + }, + { + "epoch": 1.7447224872783178, + "grad_norm": 1.163570040318973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359750 + }, + { + "epoch": 1.744770985471154, + "grad_norm": 1.1227545115843895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359760 + }, + { + "epoch": 1.7448194836639899, + "grad_norm": 1.2384588465863544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359770 + }, + { + "epoch": 1.744867981856826, + "grad_norm": 1.3852502256384014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359780 + }, + { + "epoch": 1.7449164800496622, + "grad_norm": 1.3888740824086199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359790 + }, + { + "epoch": 1.744964978242498, + "grad_norm": 1.583315167863475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359800 + }, + { + "epoch": 1.7450134764353344, + "grad_norm": 1.9948929619317823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359810 + }, + { + "epoch": 1.7450619746281704, + "grad_norm": 1.319670683841423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359820 + }, + { + "epoch": 1.7451104728210065, + "grad_norm": 1.2067668642146145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359830 + }, + { + "epoch": 1.7451589710138427, + "grad_norm": 1.276282546314178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359840 + }, + { + "epoch": 1.7452074692066786, + "grad_norm": 1.1626564599964695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359850 + }, + { + "epoch": 1.745255967399515, + "grad_norm": 1.30907604756203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359860 + }, + { + "epoch": 1.7453044655923509, + "grad_norm": 1.4867716835453848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359870 + }, + { + "epoch": 1.745352963785187, + "grad_norm": 1.3445987434579365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359880 + }, + { + "epoch": 1.7454014619780231, + "grad_norm": 1.50312331470559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359890 + }, + { + "epoch": 1.745449960170859, + "grad_norm": 1.566837859456882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359900 + }, + { + "epoch": 1.7454984583636952, + "grad_norm": 1.2266846205477577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359910 + }, + { + "epoch": 1.7455469565565314, + "grad_norm": 1.2205967792056072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359920 + }, + { + "epoch": 1.7455954547493673, + "grad_norm": 1.4302731443649463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359930 + }, + { + "epoch": 1.7456439529422036, + "grad_norm": 1.2878059507670514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359940 + }, + { + "epoch": 1.7456924511350396, + "grad_norm": 1.3547488464382695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359950 + }, + { + "epoch": 1.7457409493278757, + "grad_norm": 1.1630287843900078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359960 + }, + { + "epoch": 1.7457894475207119, + "grad_norm": 1.8634812803952627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359970 + }, + { + "epoch": 1.7458379457135478, + "grad_norm": 1.3630005568643355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359980 + }, + { + "epoch": 1.745886443906384, + "grad_norm": 1.772783519982113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 359990 + }, + { + "epoch": 1.74593494209922, + "grad_norm": 1.342230770973174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360000 + }, + { + "epoch": 1.745983440292056, + "grad_norm": 1.4274647242018546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360010 + }, + { + "epoch": 1.7460319384848924, + "grad_norm": 1.2647514147090533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360020 + }, + { + "epoch": 1.7460804366777283, + "grad_norm": 2.345631422429051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360030 + }, + { + "epoch": 1.7461289348705644, + "grad_norm": 1.72367915496352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360040 + }, + { + "epoch": 1.7461774330634006, + "grad_norm": 1.6746390940625133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360050 + }, + { + "epoch": 1.7462259312562365, + "grad_norm": 1.5298443400979522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360060 + }, + { + "epoch": 1.7462744294490726, + "grad_norm": 1.5394846286653774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360070 + }, + { + "epoch": 1.7463229276419088, + "grad_norm": 1.5594414648489874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360080 + }, + { + "epoch": 1.7463714258347447, + "grad_norm": 1.2704259866325174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360090 + }, + { + "epoch": 1.746419924027581, + "grad_norm": 1.37504203578942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360100 + }, + { + "epoch": 1.746468422220417, + "grad_norm": 1.4079250654219777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360110 + }, + { + "epoch": 1.7465169204132531, + "grad_norm": 1.200074883911384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360120 + }, + { + "epoch": 1.7465654186060893, + "grad_norm": 1.6467440744349915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360130 + }, + { + "epoch": 1.7466139167989252, + "grad_norm": 1.4116752211634775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360140 + }, + { + "epoch": 1.7466624149917613, + "grad_norm": 1.641772584548562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360150 + }, + { + "epoch": 1.7467109131845975, + "grad_norm": 1.0725520027676794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360160 + }, + { + "epoch": 1.7467594113774334, + "grad_norm": 2.452686764797818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360170 + }, + { + "epoch": 1.7468079095702698, + "grad_norm": 1.4152465865890917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360180 + }, + { + "epoch": 1.7468564077631057, + "grad_norm": 1.4716087903821062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360190 + }, + { + "epoch": 1.7469049059559418, + "grad_norm": 1.628708190537509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360200 + }, + { + "epoch": 1.746953404148778, + "grad_norm": 1.6172704064842947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360210 + }, + { + "epoch": 1.747001902341614, + "grad_norm": 1.5550909893136122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360220 + }, + { + "epoch": 1.74705040053445, + "grad_norm": 1.1669322397267479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360230 + }, + { + "epoch": 1.7470988987272862, + "grad_norm": 1.37454430060302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360240 + }, + { + "epoch": 1.747147396920122, + "grad_norm": 1.1707149027984087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360250 + }, + { + "epoch": 1.7471958951129585, + "grad_norm": 1.1941663657921708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360260 + }, + { + "epoch": 1.7472443933057944, + "grad_norm": 1.0886896717465788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360270 + }, + { + "epoch": 1.7472928914986305, + "grad_norm": 1.4064707620775607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360280 + }, + { + "epoch": 1.7473413896914667, + "grad_norm": 1.7202127722271143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360290 + }, + { + "epoch": 1.7473898878843026, + "grad_norm": 1.7561479381811296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360300 + }, + { + "epoch": 1.7474383860771387, + "grad_norm": 1.0820409457323876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360310 + }, + { + "epoch": 1.747486884269975, + "grad_norm": 1.4488856336924982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360320 + }, + { + "epoch": 1.7475353824628108, + "grad_norm": 1.1806978506001542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360330 + }, + { + "epoch": 1.7475838806556472, + "grad_norm": 1.7062319557226147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360340 + }, + { + "epoch": 1.747632378848483, + "grad_norm": 1.770378332821565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360350 + }, + { + "epoch": 1.7476808770413192, + "grad_norm": 1.3254505937254635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360360 + }, + { + "epoch": 1.7477293752341554, + "grad_norm": 1.4718784413503272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360370 + }, + { + "epoch": 1.7477778734269913, + "grad_norm": 1.2251456737999433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360380 + }, + { + "epoch": 1.7478263716198277, + "grad_norm": 1.4059790665044147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360390 + }, + { + "epoch": 1.7478748698126636, + "grad_norm": 1.6504571931363898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360400 + }, + { + "epoch": 1.7479233680054997, + "grad_norm": 1.5891236770926298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360410 + }, + { + "epoch": 1.7479718661983359, + "grad_norm": 1.2749858946392578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360420 + }, + { + "epoch": 1.7480203643911718, + "grad_norm": 2.0515647847219043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360430 + }, + { + "epoch": 1.748068862584008, + "grad_norm": 1.2458783338331614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360440 + }, + { + "epoch": 1.748117360776844, + "grad_norm": 2.1881763956344003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360450 + }, + { + "epoch": 1.74816585896968, + "grad_norm": 1.3325478498416032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360460 + }, + { + "epoch": 1.7482143571625164, + "grad_norm": 1.535603288971288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360470 + }, + { + "epoch": 1.7482628553553523, + "grad_norm": 1.4119035718351824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360480 + }, + { + "epoch": 1.7483113535481885, + "grad_norm": 1.4341535070627742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360490 + }, + { + "epoch": 1.7483598517410246, + "grad_norm": 1.0603945277409821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360500 + }, + { + "epoch": 1.7484083499338605, + "grad_norm": 1.494439239024814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360510 + }, + { + "epoch": 1.7484568481266967, + "grad_norm": 1.2075546784728886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360520 + }, + { + "epoch": 1.7485053463195328, + "grad_norm": 1.5046531132156815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360530 + }, + { + "epoch": 1.7485538445123687, + "grad_norm": 1.4728276376274607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360540 + }, + { + "epoch": 1.748602342705205, + "grad_norm": 1.2673108784611031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360550 + }, + { + "epoch": 1.748650840898041, + "grad_norm": 1.4798763992018849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360560 + }, + { + "epoch": 1.7486993390908772, + "grad_norm": 1.3667423637286902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360570 + }, + { + "epoch": 1.7487478372837133, + "grad_norm": 1.2411810246248933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360580 + }, + { + "epoch": 1.7487963354765492, + "grad_norm": 1.3430182299600801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360590 + }, + { + "epoch": 1.7488448336693854, + "grad_norm": 1.2052362663439453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360600 + }, + { + "epoch": 1.7488933318622215, + "grad_norm": 1.0193025978821879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360610 + }, + { + "epoch": 1.7489418300550574, + "grad_norm": 1.5268081909880493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360620 + }, + { + "epoch": 1.7489903282478938, + "grad_norm": 1.3815532717842416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360630 + }, + { + "epoch": 1.7490388264407297, + "grad_norm": 1.3588795866326109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360640 + }, + { + "epoch": 1.7490873246335659, + "grad_norm": 1.5361935723490205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360650 + }, + { + "epoch": 1.749135822826402, + "grad_norm": 1.6989261553135293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360660 + }, + { + "epoch": 1.749184321019238, + "grad_norm": 1.207974076322671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360670 + }, + { + "epoch": 1.749232819212074, + "grad_norm": 1.5051362822759984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360680 + }, + { + "epoch": 1.7492813174049102, + "grad_norm": 1.420254402972887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360690 + }, + { + "epoch": 1.7493298155977461, + "grad_norm": 1.1424575063756492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360700 + }, + { + "epoch": 1.7493783137905825, + "grad_norm": 1.60830371243037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360710 + }, + { + "epoch": 1.7494268119834184, + "grad_norm": 1.2033553709045464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360720 + }, + { + "epoch": 1.7494753101762546, + "grad_norm": 1.4551350346891923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360730 + }, + { + "epoch": 1.7495238083690907, + "grad_norm": 1.2121063264203258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360740 + }, + { + "epoch": 1.7495723065619266, + "grad_norm": 1.63943845166159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360750 + }, + { + "epoch": 1.7496208047547628, + "grad_norm": 1.4629301325896904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360760 + }, + { + "epoch": 1.749669302947599, + "grad_norm": 1.5324950197737053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360770 + }, + { + "epoch": 1.7497178011404348, + "grad_norm": 1.5691648869164965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360780 + }, + { + "epoch": 1.7497662993332712, + "grad_norm": 1.5515025708623398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360790 + }, + { + "epoch": 1.7498147975261071, + "grad_norm": 1.834975549286355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360800 + }, + { + "epoch": 1.7498632957189433, + "grad_norm": 1.3809788868002215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360810 + }, + { + "epoch": 1.7499117939117794, + "grad_norm": 1.2363730483855306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360820 + }, + { + "epoch": 1.7499602921046153, + "grad_norm": 1.5993876445463684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360830 + }, + { + "epoch": 1.7500087902974515, + "grad_norm": 1.106048497234724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360840 + }, + { + "epoch": 1.7500572884902876, + "grad_norm": 2.006445498636822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360850 + }, + { + "epoch": 1.7501057866831236, + "grad_norm": 1.0777943870721174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360860 + }, + { + "epoch": 1.75015428487596, + "grad_norm": 1.2799016069209301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360870 + }, + { + "epoch": 1.7502027830687958, + "grad_norm": 1.3002062537736947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360880 + }, + { + "epoch": 1.750251281261632, + "grad_norm": 1.3226387984843768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360890 + }, + { + "epoch": 1.7502997794544681, + "grad_norm": 1.4421207339410103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360900 + }, + { + "epoch": 1.750348277647304, + "grad_norm": 1.7905586346955715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360910 + }, + { + "epoch": 1.7503967758401404, + "grad_norm": 1.6374409383956845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360920 + }, + { + "epoch": 1.7504452740329763, + "grad_norm": 1.721143405575276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360930 + }, + { + "epoch": 1.7504937722258125, + "grad_norm": 1.4175061124888089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360940 + }, + { + "epoch": 1.7505422704186486, + "grad_norm": 1.6903360489095576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360950 + }, + { + "epoch": 1.7505907686114845, + "grad_norm": 1.1818706902033682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360960 + }, + { + "epoch": 1.7506392668043207, + "grad_norm": 1.424160789298412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360970 + }, + { + "epoch": 1.7506877649971568, + "grad_norm": 1.1776553066056294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360980 + }, + { + "epoch": 1.7507362631899928, + "grad_norm": 1.2023950723971666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 360990 + }, + { + "epoch": 1.7507847613828291, + "grad_norm": 1.3743393978415952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361000 + }, + { + "epoch": 1.750833259575665, + "grad_norm": 1.5943957265562858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361010 + }, + { + "epoch": 1.7508817577685012, + "grad_norm": 1.540590410797904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361020 + }, + { + "epoch": 1.7509302559613373, + "grad_norm": 1.0372499303912264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361030 + }, + { + "epoch": 1.7509787541541733, + "grad_norm": 1.4969886663607213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361040 + }, + { + "epoch": 1.7510272523470094, + "grad_norm": 2.02705372487344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361050 + }, + { + "epoch": 1.7510757505398455, + "grad_norm": 1.1261009014162937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361060 + }, + { + "epoch": 1.7511242487326815, + "grad_norm": 1.1990561432639879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361070 + }, + { + "epoch": 1.7511727469255178, + "grad_norm": 1.6646838574274625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361080 + }, + { + "epoch": 1.7512212451183538, + "grad_norm": 1.3418824273969676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361090 + }, + { + "epoch": 1.75126974331119, + "grad_norm": 1.321089282413368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361100 + }, + { + "epoch": 1.751318241504026, + "grad_norm": 1.3917219376935464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361110 + }, + { + "epoch": 1.751366739696862, + "grad_norm": 1.0237322989326003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361120 + }, + { + "epoch": 1.751415237889698, + "grad_norm": 1.2291826223531643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361130 + }, + { + "epoch": 1.7514637360825343, + "grad_norm": 1.3293665723779213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361140 + }, + { + "epoch": 1.7515122342753702, + "grad_norm": 1.4824927063727955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361150 + }, + { + "epoch": 1.7515607324682065, + "grad_norm": 1.2428984952350675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361160 + }, + { + "epoch": 1.7516092306610425, + "grad_norm": 2.149803535189676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361170 + }, + { + "epoch": 1.7516577288538786, + "grad_norm": 1.606103516849089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361180 + }, + { + "epoch": 1.7517062270467147, + "grad_norm": 1.6197464702827347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361190 + }, + { + "epoch": 1.7517547252395507, + "grad_norm": 1.3368123497059514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361200 + }, + { + "epoch": 1.7518032234323868, + "grad_norm": 1.0763416824488559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361210 + }, + { + "epoch": 1.751851721625223, + "grad_norm": 1.117691539320731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361220 + }, + { + "epoch": 1.7519002198180589, + "grad_norm": 1.1684804235301272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361230 + }, + { + "epoch": 1.7519487180108952, + "grad_norm": 1.928353121627424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361240 + }, + { + "epoch": 1.7519972162037312, + "grad_norm": 1.688501249930141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361250 + }, + { + "epoch": 1.7520457143965673, + "grad_norm": 1.1879442318729616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361260 + }, + { + "epoch": 1.7520942125894035, + "grad_norm": 1.881509348322652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361270 + }, + { + "epoch": 1.7521427107822394, + "grad_norm": 1.4454901275939847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361280 + }, + { + "epoch": 1.7521912089750755, + "grad_norm": 1.1251574782988882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361290 + }, + { + "epoch": 1.7522397071679117, + "grad_norm": 1.5452743085120346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361300 + }, + { + "epoch": 1.7522882053607476, + "grad_norm": 1.4161084749275687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361310 + }, + { + "epoch": 1.752336703553584, + "grad_norm": 1.6197970964526576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361320 + }, + { + "epoch": 1.7523852017464199, + "grad_norm": 1.3454291014625142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361330 + }, + { + "epoch": 1.752433699939256, + "grad_norm": 1.1938924515675353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361340 + }, + { + "epoch": 1.7524821981320922, + "grad_norm": 1.1299738034153961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361350 + }, + { + "epoch": 1.752530696324928, + "grad_norm": 1.1848609204889726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361360 + }, + { + "epoch": 1.7525791945177642, + "grad_norm": 1.5195009694934924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361370 + }, + { + "epoch": 1.7526276927106004, + "grad_norm": 1.2591025111419185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361380 + }, + { + "epoch": 1.7526761909034365, + "grad_norm": 1.4630448852415157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361390 + }, + { + "epoch": 1.7527246890962727, + "grad_norm": 1.567270402347276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361400 + }, + { + "epoch": 1.7527731872891086, + "grad_norm": 1.5137910480689243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361410 + }, + { + "epoch": 1.7528216854819447, + "grad_norm": 1.765091717231826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361420 + }, + { + "epoch": 1.7528701836747809, + "grad_norm": 1.753599399023642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361430 + }, + { + "epoch": 1.7529186818676168, + "grad_norm": 1.2341612176669514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361440 + }, + { + "epoch": 1.7529671800604532, + "grad_norm": 1.2509899782742195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361450 + }, + { + "epoch": 1.753015678253289, + "grad_norm": 1.4260843173019566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361460 + }, + { + "epoch": 1.7530641764461252, + "grad_norm": 1.064398968964042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361470 + }, + { + "epoch": 1.7531126746389614, + "grad_norm": 1.4052959684818234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361480 + }, + { + "epoch": 1.7531611728317973, + "grad_norm": 1.5222147098370442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361490 + }, + { + "epoch": 1.7532096710246334, + "grad_norm": 1.3802829990083865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361500 + }, + { + "epoch": 1.7532581692174696, + "grad_norm": 1.2511243596691202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361510 + }, + { + "epoch": 1.7533066674103055, + "grad_norm": 1.2481711664236173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361520 + }, + { + "epoch": 1.7533551656031419, + "grad_norm": 1.0393147675813452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361530 + }, + { + "epoch": 1.7534036637959778, + "grad_norm": 1.4573497963965565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361540 + }, + { + "epoch": 1.753452161988814, + "grad_norm": 1.1364894803023162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361550 + }, + { + "epoch": 1.75350066018165, + "grad_norm": 1.1055785620328606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361560 + }, + { + "epoch": 1.753549158374486, + "grad_norm": 1.9433958442505173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361570 + }, + { + "epoch": 1.7535976565673221, + "grad_norm": 1.460529119867715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361580 + }, + { + "epoch": 1.7536461547601583, + "grad_norm": 1.8541351565204423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361590 + }, + { + "epoch": 1.7536946529529942, + "grad_norm": 1.1639183838951794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361600 + }, + { + "epoch": 1.7537431511458306, + "grad_norm": 1.4650487045742011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361610 + }, + { + "epoch": 1.7537916493386665, + "grad_norm": 1.5578471845856257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361620 + }, + { + "epoch": 1.7538401475315026, + "grad_norm": 1.4600417763688256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361630 + }, + { + "epoch": 1.7538886457243388, + "grad_norm": 2.3905652568601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361640 + }, + { + "epoch": 1.7539371439171747, + "grad_norm": 1.0688111728995864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361650 + }, + { + "epoch": 1.7539856421100108, + "grad_norm": 1.1898555918321563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361660 + }, + { + "epoch": 1.754034140302847, + "grad_norm": 1.7956198306023907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361670 + }, + { + "epoch": 1.754082638495683, + "grad_norm": 1.092362023058513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361680 + }, + { + "epoch": 1.7541311366885193, + "grad_norm": 1.4564826678054033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361690 + }, + { + "epoch": 1.7541796348813552, + "grad_norm": 1.9285000263380425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361700 + }, + { + "epoch": 1.7542281330741913, + "grad_norm": 1.2573350360867153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361710 + }, + { + "epoch": 1.7542766312670275, + "grad_norm": 1.2919877612205255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361720 + }, + { + "epoch": 1.7543251294598634, + "grad_norm": 1.623823564500526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361730 + }, + { + "epoch": 1.7543736276526996, + "grad_norm": 1.3077366745051222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361740 + }, + { + "epoch": 1.7544221258455357, + "grad_norm": 1.445589781212675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361750 + }, + { + "epoch": 1.7544706240383716, + "grad_norm": 1.126245141591653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361760 + }, + { + "epoch": 1.754519122231208, + "grad_norm": 1.217907819039965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361770 + }, + { + "epoch": 1.754567620424044, + "grad_norm": 1.4436659867556045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361780 + }, + { + "epoch": 1.75461611861688, + "grad_norm": 1.3589965597304854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361790 + }, + { + "epoch": 1.7546646168097162, + "grad_norm": 1.749942057927001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361800 + }, + { + "epoch": 1.7547131150025521, + "grad_norm": 1.4951510252103617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361810 + }, + { + "epoch": 1.7547616131953883, + "grad_norm": 1.610759348125157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361820 + }, + { + "epoch": 1.7548101113882244, + "grad_norm": 1.1315838044367865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361830 + }, + { + "epoch": 1.7548586095810603, + "grad_norm": 1.2523033277034301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361840 + }, + { + "epoch": 1.7549071077738967, + "grad_norm": 1.1050897974484997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361850 + }, + { + "epoch": 1.7549556059667326, + "grad_norm": 1.5257377583566267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361860 + }, + { + "epoch": 1.7550041041595688, + "grad_norm": 1.688143136391318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361870 + }, + { + "epoch": 1.755052602352405, + "grad_norm": 1.0298752073367723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361880 + }, + { + "epoch": 1.7551011005452408, + "grad_norm": 1.5263287522770952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361890 + }, + { + "epoch": 1.7551495987380772, + "grad_norm": 1.2601516274912683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361900 + }, + { + "epoch": 1.755198096930913, + "grad_norm": 1.2993087494805877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361910 + }, + { + "epoch": 1.7552465951237493, + "grad_norm": 1.1117312403996493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361920 + }, + { + "epoch": 1.7552950933165854, + "grad_norm": 1.1167520241883722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361930 + }, + { + "epoch": 1.7553435915094213, + "grad_norm": 1.4461564390444437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361940 + }, + { + "epoch": 1.7553920897022575, + "grad_norm": 1.684985662109284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361950 + }, + { + "epoch": 1.7554405878950936, + "grad_norm": 1.2233729584920638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361960 + }, + { + "epoch": 1.7554890860879295, + "grad_norm": 1.511161684675244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361970 + }, + { + "epoch": 1.755537584280766, + "grad_norm": 1.252146564212353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361980 + }, + { + "epoch": 1.7555860824736018, + "grad_norm": 1.578205655050624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 361990 + }, + { + "epoch": 1.755634580666438, + "grad_norm": 1.494673007584879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362000 + }, + { + "epoch": 1.755683078859274, + "grad_norm": 1.3508481444546305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362010 + }, + { + "epoch": 1.75573157705211, + "grad_norm": 1.3437423618256616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362020 + }, + { + "epoch": 1.7557800752449462, + "grad_norm": 1.1881225780996374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362030 + }, + { + "epoch": 1.7558285734377823, + "grad_norm": 1.582085396023558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362040 + }, + { + "epoch": 1.7558770716306182, + "grad_norm": 1.3616115346337665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362050 + }, + { + "epoch": 1.7559255698234546, + "grad_norm": 1.4075987486705799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362060 + }, + { + "epoch": 1.7559740680162905, + "grad_norm": 1.1833478197331715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362070 + }, + { + "epoch": 1.7560225662091267, + "grad_norm": 1.5556166133023908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362080 + }, + { + "epoch": 1.7560710644019628, + "grad_norm": 1.2707878305207032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362090 + }, + { + "epoch": 1.7561195625947987, + "grad_norm": 1.1195170124267406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362100 + }, + { + "epoch": 1.7561680607876349, + "grad_norm": 1.160809759426229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362110 + }, + { + "epoch": 1.756216558980471, + "grad_norm": 1.6699088334348744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362120 + }, + { + "epoch": 1.756265057173307, + "grad_norm": 1.1085350415385165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362130 + }, + { + "epoch": 1.7563135553661433, + "grad_norm": 1.9508108906052257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362140 + }, + { + "epoch": 1.7563620535589792, + "grad_norm": 1.1129832167000586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362150 + }, + { + "epoch": 1.7564105517518154, + "grad_norm": 1.3887482275265484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362160 + }, + { + "epoch": 1.7564590499446515, + "grad_norm": 1.5678832454568692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362170 + }, + { + "epoch": 1.7565075481374874, + "grad_norm": 1.0937001526656331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362180 + }, + { + "epoch": 1.7565560463303236, + "grad_norm": 1.9243870497120952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362190 + }, + { + "epoch": 1.7566045445231597, + "grad_norm": 1.023824847123933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362200 + }, + { + "epoch": 1.7566530427159956, + "grad_norm": 1.3429410472554082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362210 + }, + { + "epoch": 1.756701540908832, + "grad_norm": 1.3813077792690365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362220 + }, + { + "epoch": 1.756750039101668, + "grad_norm": 1.3068136794913698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362230 + }, + { + "epoch": 1.756798537294504, + "grad_norm": 1.5897342109383317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362240 + }, + { + "epoch": 1.7568470354873402, + "grad_norm": 1.183055964304458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362250 + }, + { + "epoch": 1.7568955336801761, + "grad_norm": 1.4289984306969927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362260 + }, + { + "epoch": 1.7569440318730123, + "grad_norm": 1.5059180569210184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362270 + }, + { + "epoch": 1.7569925300658484, + "grad_norm": 2.3807510629580975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362280 + }, + { + "epoch": 1.7570410282586844, + "grad_norm": 1.391933324157435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362290 + }, + { + "epoch": 1.7570895264515207, + "grad_norm": 1.4779939938591724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362300 + }, + { + "epoch": 1.7571380246443566, + "grad_norm": 1.3650058861003345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362310 + }, + { + "epoch": 1.7571865228371928, + "grad_norm": 1.2330145793271186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362320 + }, + { + "epoch": 1.757235021030029, + "grad_norm": 2.0879332041090493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362330 + }, + { + "epoch": 1.7572835192228649, + "grad_norm": 1.7193952928096223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362340 + }, + { + "epoch": 1.757332017415701, + "grad_norm": 1.125341775320976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362350 + }, + { + "epoch": 1.7573805156085371, + "grad_norm": 1.4800187742025628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362360 + }, + { + "epoch": 1.757429013801373, + "grad_norm": 1.0175228659647928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362370 + }, + { + "epoch": 1.7574775119942094, + "grad_norm": 1.2101196489311405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362380 + }, + { + "epoch": 1.7575260101870454, + "grad_norm": 1.2313380537420926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362390 + }, + { + "epoch": 1.7575745083798815, + "grad_norm": 1.4093763489597677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362400 + }, + { + "epoch": 1.7576230065727176, + "grad_norm": 1.5795780683447447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362410 + }, + { + "epoch": 1.7576715047655536, + "grad_norm": 1.211936684342163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362420 + }, + { + "epoch": 1.75772000295839, + "grad_norm": 1.1296578783515088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362430 + }, + { + "epoch": 1.7577685011512258, + "grad_norm": 9.504510067870342e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362440 + }, + { + "epoch": 1.757816999344062, + "grad_norm": 1.2190711551340883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362450 + }, + { + "epoch": 1.7578654975368981, + "grad_norm": 1.59435984414813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362460 + }, + { + "epoch": 1.757913995729734, + "grad_norm": 1.1770580954362231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362470 + }, + { + "epoch": 1.7579624939225702, + "grad_norm": 1.4281487992207076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362480 + }, + { + "epoch": 1.7580109921154063, + "grad_norm": 1.5299058020445955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362490 + }, + { + "epoch": 1.7580594903082423, + "grad_norm": 1.4455665997559208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362500 + }, + { + "epoch": 1.7581079885010786, + "grad_norm": 1.4152313987381149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362510 + }, + { + "epoch": 1.7581564866939146, + "grad_norm": 1.3170687651609114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362520 + }, + { + "epoch": 1.7582049848867507, + "grad_norm": 1.5539908915229717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362530 + }, + { + "epoch": 1.7582534830795868, + "grad_norm": 1.133871929681618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362540 + }, + { + "epoch": 1.7583019812724228, + "grad_norm": 9.570363168620588e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362550 + }, + { + "epoch": 1.758350479465259, + "grad_norm": 1.2396824899951753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362560 + }, + { + "epoch": 1.758398977658095, + "grad_norm": 2.0499328456935473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362570 + }, + { + "epoch": 1.758447475850931, + "grad_norm": 1.5639718853321938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362580 + }, + { + "epoch": 1.7584959740437673, + "grad_norm": 1.1023650436925436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362590 + }, + { + "epoch": 1.7585444722366033, + "grad_norm": 1.3962999645400487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362600 + }, + { + "epoch": 1.7585929704294394, + "grad_norm": 1.1408573641347175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362610 + }, + { + "epoch": 1.7586414686222755, + "grad_norm": 1.4558568572908825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362620 + }, + { + "epoch": 1.7586899668151115, + "grad_norm": 1.1023436385926288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362630 + }, + { + "epoch": 1.7587384650079476, + "grad_norm": 1.1688468859460954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362640 + }, + { + "epoch": 1.7587869632007838, + "grad_norm": 9.546583967789957e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362650 + }, + { + "epoch": 1.7588354613936197, + "grad_norm": 1.562196771942581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362660 + }, + { + "epoch": 1.758883959586456, + "grad_norm": 1.4920027879838926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362670 + }, + { + "epoch": 1.758932457779292, + "grad_norm": 1.1531289700883463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362680 + }, + { + "epoch": 1.7589809559721281, + "grad_norm": 1.1704520908040195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362690 + }, + { + "epoch": 1.7590294541649643, + "grad_norm": 1.425807560906378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362700 + }, + { + "epoch": 1.7590779523578002, + "grad_norm": 2.1525352167373057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362710 + }, + { + "epoch": 1.7591264505506363, + "grad_norm": 1.032997776206912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362720 + }, + { + "epoch": 1.7591749487434725, + "grad_norm": 1.5459828972552714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362730 + }, + { + "epoch": 1.7592234469363084, + "grad_norm": 1.4795048741689243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362740 + }, + { + "epoch": 1.7592719451291448, + "grad_norm": 1.4634093048471186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362750 + }, + { + "epoch": 1.7593204433219807, + "grad_norm": 1.2786070868742172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362760 + }, + { + "epoch": 1.7593689415148168, + "grad_norm": 9.764139718981824e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362770 + }, + { + "epoch": 1.759417439707653, + "grad_norm": 1.4423372718397331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362780 + }, + { + "epoch": 1.7594659379004889, + "grad_norm": 1.3139060506262012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362790 + }, + { + "epoch": 1.759514436093325, + "grad_norm": 1.2810789762340846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362800 + }, + { + "epoch": 1.7595629342861612, + "grad_norm": 1.2732932930248353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362810 + }, + { + "epoch": 1.759611432478997, + "grad_norm": 1.508415969908583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362820 + }, + { + "epoch": 1.7596599306718335, + "grad_norm": 1.4186230856694237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362830 + }, + { + "epoch": 1.7597084288646694, + "grad_norm": 1.2792627401836398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362840 + }, + { + "epoch": 1.7597569270575055, + "grad_norm": 1.308498553953541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362850 + }, + { + "epoch": 1.7598054252503417, + "grad_norm": 1.4289781802290236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362860 + }, + { + "epoch": 1.7598539234431776, + "grad_norm": 1.012125849797485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362870 + }, + { + "epoch": 1.7599024216360137, + "grad_norm": 2.0595098959574898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362880 + }, + { + "epoch": 1.7599509198288499, + "grad_norm": 1.76071370816544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362890 + }, + { + "epoch": 1.7599994180216858, + "grad_norm": 1.0833294261658466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362900 + }, + { + "epoch": 1.7600479162145222, + "grad_norm": 1.353050294028435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362910 + }, + { + "epoch": 1.760096414407358, + "grad_norm": 1.4787933544369025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362920 + }, + { + "epoch": 1.7601449126001942, + "grad_norm": 1.0518840021234155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362930 + }, + { + "epoch": 1.7601934107930304, + "grad_norm": 1.2240237268201781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362940 + }, + { + "epoch": 1.7602419089858663, + "grad_norm": 1.6181534334691605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362950 + }, + { + "epoch": 1.7602904071787027, + "grad_norm": 1.2142513661217436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362960 + }, + { + "epoch": 1.7603389053715386, + "grad_norm": 1.555074291559322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362970 + }, + { + "epoch": 1.7603874035643747, + "grad_norm": 1.1572254265956872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362980 + }, + { + "epoch": 1.7604359017572109, + "grad_norm": 1.5400113184682596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 362990 + }, + { + "epoch": 1.7604843999500468, + "grad_norm": 1.5077889159442748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363000 + }, + { + "epoch": 1.760532898142883, + "grad_norm": 1.229204116270921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363010 + }, + { + "epoch": 1.760581396335719, + "grad_norm": 1.1226003238107296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363020 + }, + { + "epoch": 1.760629894528555, + "grad_norm": 1.1483485273799943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363030 + }, + { + "epoch": 1.7606783927213914, + "grad_norm": 1.3879240867709086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363040 + }, + { + "epoch": 1.7607268909142273, + "grad_norm": 1.6366508148735193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363050 + }, + { + "epoch": 1.7607753891070634, + "grad_norm": 1.224915813224925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363060 + }, + { + "epoch": 1.7608238872998996, + "grad_norm": 1.1277357714334357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363070 + }, + { + "epoch": 1.7608723854927355, + "grad_norm": 1.255603176986142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363080 + }, + { + "epoch": 1.7609208836855716, + "grad_norm": 2.9393522282816775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363090 + }, + { + "epoch": 1.7609693818784078, + "grad_norm": 1.916693115333601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363100 + }, + { + "epoch": 1.7610178800712437, + "grad_norm": 1.1250144815733165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363110 + }, + { + "epoch": 1.76106637826408, + "grad_norm": 1.4996249575460752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363120 + }, + { + "epoch": 1.761114876456916, + "grad_norm": 1.2816663286230323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363130 + }, + { + "epoch": 1.7611633746497521, + "grad_norm": 1.0983495002392374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363140 + }, + { + "epoch": 1.7612118728425883, + "grad_norm": 1.6064367613921604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363150 + }, + { + "epoch": 1.7612603710354242, + "grad_norm": 1.2220342071600498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363160 + }, + { + "epoch": 1.7613088692282604, + "grad_norm": 1.2171473606770178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363170 + }, + { + "epoch": 1.7613573674210965, + "grad_norm": 1.4528318104112259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363180 + }, + { + "epoch": 1.7614058656139324, + "grad_norm": 1.3974209345235522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363190 + }, + { + "epoch": 1.7614543638067688, + "grad_norm": 1.6353343568198397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363200 + }, + { + "epoch": 1.7615028619996047, + "grad_norm": 1.4167379269736102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363210 + }, + { + "epoch": 1.7615513601924409, + "grad_norm": 1.4308292328735206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363220 + }, + { + "epoch": 1.761599858385277, + "grad_norm": 1.399572013838224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363230 + }, + { + "epoch": 1.761648356578113, + "grad_norm": 1.2235884305766831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363240 + }, + { + "epoch": 1.761696854770949, + "grad_norm": 1.7885687597640754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363250 + }, + { + "epoch": 1.7617453529637852, + "grad_norm": 1.908648883386377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363260 + }, + { + "epoch": 1.7617938511566211, + "grad_norm": 1.5697255051350112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363270 + }, + { + "epoch": 1.7618423493494575, + "grad_norm": 1.8094826970127542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363280 + }, + { + "epoch": 1.7618908475422934, + "grad_norm": 1.3328484094188298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363290 + }, + { + "epoch": 1.7619393457351296, + "grad_norm": 1.500439950063992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363300 + }, + { + "epoch": 1.7619878439279657, + "grad_norm": 1.4831884165289466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363310 + }, + { + "epoch": 1.7620363421208016, + "grad_norm": 1.176258646040651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363320 + }, + { + "epoch": 1.7620848403136378, + "grad_norm": 1.0032393582548593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363330 + }, + { + "epoch": 1.762133338506474, + "grad_norm": 1.6076441511359008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363340 + }, + { + "epoch": 1.7621818366993098, + "grad_norm": 1.0494630053869969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363350 + }, + { + "epoch": 1.7622303348921462, + "grad_norm": 9.91724746768341e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363360 + }, + { + "epoch": 1.7622788330849821, + "grad_norm": 1.3334717330337753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363370 + }, + { + "epoch": 1.7623273312778183, + "grad_norm": 1.07609254840213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363380 + }, + { + "epoch": 1.7623758294706544, + "grad_norm": 1.6308243644402864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363390 + }, + { + "epoch": 1.7624243276634903, + "grad_norm": 1.3504031670663608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363400 + }, + { + "epoch": 1.7624728258563265, + "grad_norm": 1.3951065191974976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363410 + }, + { + "epoch": 1.7625213240491626, + "grad_norm": 1.554712802942504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363420 + }, + { + "epoch": 1.7625698222419988, + "grad_norm": 5.102673412693548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363430 + }, + { + "epoch": 1.762618320434835, + "grad_norm": 2.452461345114898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363440 + }, + { + "epoch": 1.7626668186276708, + "grad_norm": 1.0525470273137216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363450 + }, + { + "epoch": 1.762715316820507, + "grad_norm": 1.1144794420658855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363460 + }, + { + "epoch": 1.7627638150133431, + "grad_norm": 1.4277719451172288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363470 + }, + { + "epoch": 1.762812313206179, + "grad_norm": 1.1280706146976627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363480 + }, + { + "epoch": 1.7628608113990154, + "grad_norm": 1.30623885041814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363490 + }, + { + "epoch": 1.7629093095918513, + "grad_norm": 1.7153581666207174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363500 + }, + { + "epoch": 1.7629578077846875, + "grad_norm": 1.6921630319188807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363510 + }, + { + "epoch": 1.7630063059775236, + "grad_norm": 1.1248924458584497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363520 + }, + { + "epoch": 1.7630548041703595, + "grad_norm": 1.4366369427420977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363530 + }, + { + "epoch": 1.7631033023631957, + "grad_norm": 1.5200996017483703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363540 + }, + { + "epoch": 1.7631518005560318, + "grad_norm": 1.1275198552596066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363550 + }, + { + "epoch": 1.7632002987488677, + "grad_norm": 1.3172908985836784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363560 + }, + { + "epoch": 1.763248796941704, + "grad_norm": 1.1402143229588546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363570 + }, + { + "epoch": 1.76329729513454, + "grad_norm": 1.3461266767933466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363580 + }, + { + "epoch": 1.7633457933273762, + "grad_norm": 1.3405361265483862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363590 + }, + { + "epoch": 1.7633942915202123, + "grad_norm": 1.1272989652866272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363600 + }, + { + "epoch": 1.7634427897130482, + "grad_norm": 9.337237649731378e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363610 + }, + { + "epoch": 1.7634912879058844, + "grad_norm": 9.197442807362677e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363620 + }, + { + "epoch": 1.7635397860987205, + "grad_norm": 1.19710721335764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363630 + }, + { + "epoch": 1.7635882842915565, + "grad_norm": 1.3627494688250863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363640 + }, + { + "epoch": 1.7636367824843928, + "grad_norm": 1.5657146690273294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363650 + }, + { + "epoch": 1.7636852806772287, + "grad_norm": 1.153091311323351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363660 + }, + { + "epoch": 1.7637337788700649, + "grad_norm": 1.5099468342327782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363670 + }, + { + "epoch": 1.763782277062901, + "grad_norm": 1.5317096924150064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363680 + }, + { + "epoch": 1.763830775255737, + "grad_norm": 1.82359318756653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363690 + }, + { + "epoch": 1.763879273448573, + "grad_norm": 1.0823889340372261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363700 + }, + { + "epoch": 1.7639277716414092, + "grad_norm": 1.6769241995007178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363710 + }, + { + "epoch": 1.7639762698342452, + "grad_norm": 1.2395368287343445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363720 + }, + { + "epoch": 1.7640247680270815, + "grad_norm": 1.0711051601219879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363730 + }, + { + "epoch": 1.7640732662199174, + "grad_norm": 1.4637722145494081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363740 + }, + { + "epoch": 1.7641217644127536, + "grad_norm": 1.7565517040907253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363750 + }, + { + "epoch": 1.7641702626055897, + "grad_norm": 1.3884378091688632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363760 + }, + { + "epoch": 1.7642187607984257, + "grad_norm": 1.4809634407697558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363770 + }, + { + "epoch": 1.7642672589912618, + "grad_norm": 1.3068968129914538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363780 + }, + { + "epoch": 1.764315757184098, + "grad_norm": 1.0180795761982608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363790 + }, + { + "epoch": 1.7643642553769339, + "grad_norm": 1.3084135552787757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363800 + }, + { + "epoch": 1.7644127535697702, + "grad_norm": 1.8828503201007152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363810 + }, + { + "epoch": 1.7644612517626062, + "grad_norm": 9.683446933195228e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363820 + }, + { + "epoch": 1.7645097499554423, + "grad_norm": 1.588009368447274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363830 + }, + { + "epoch": 1.7645582481482784, + "grad_norm": 1.3815951938056514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363840 + }, + { + "epoch": 1.7646067463411144, + "grad_norm": 1.0155138063794311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363850 + }, + { + "epoch": 1.7646552445339505, + "grad_norm": 1.3198516057855159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363860 + }, + { + "epoch": 1.7647037427267867, + "grad_norm": 1.0387013915647003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363870 + }, + { + "epoch": 1.7647522409196226, + "grad_norm": 1.560875695361119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363880 + }, + { + "epoch": 1.764800739112459, + "grad_norm": 1.4169515338835481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363890 + }, + { + "epoch": 1.7648492373052949, + "grad_norm": 1.4879344867324562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363900 + }, + { + "epoch": 1.764897735498131, + "grad_norm": 1.4415663329714334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363910 + }, + { + "epoch": 1.7649462336909671, + "grad_norm": 1.1222690332601815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363920 + }, + { + "epoch": 1.764994731883803, + "grad_norm": 1.3417813526928057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363930 + }, + { + "epoch": 1.7650432300766394, + "grad_norm": 1.5909872530528446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363940 + }, + { + "epoch": 1.7650917282694754, + "grad_norm": 1.445053854354228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363950 + }, + { + "epoch": 1.7651402264623115, + "grad_norm": 1.5395844599197517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363960 + }, + { + "epoch": 1.7651887246551476, + "grad_norm": 1.0874021683093815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363970 + }, + { + "epoch": 1.7652372228479836, + "grad_norm": 1.1623279227990224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363980 + }, + { + "epoch": 1.7652857210408197, + "grad_norm": 1.2383701175622264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 363990 + }, + { + "epoch": 1.7653342192336559, + "grad_norm": 1.0931582750117741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364000 + }, + { + "epoch": 1.7653827174264918, + "grad_norm": 1.6860916218774946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364010 + }, + { + "epoch": 1.7654312156193281, + "grad_norm": 1.2592939135913639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364020 + }, + { + "epoch": 1.765479713812164, + "grad_norm": 1.517058301203633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364030 + }, + { + "epoch": 1.7655282120050002, + "grad_norm": 1.2664633786130253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364040 + }, + { + "epoch": 1.7655767101978364, + "grad_norm": 1.3056507874864565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364050 + }, + { + "epoch": 1.7656252083906723, + "grad_norm": 1.8098571530344998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364060 + }, + { + "epoch": 1.7656737065835084, + "grad_norm": 9.382712384820024e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364070 + }, + { + "epoch": 1.7657222047763446, + "grad_norm": 1.2700891893757671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364080 + }, + { + "epoch": 1.7657707029691805, + "grad_norm": 1.1712331549063038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364090 + }, + { + "epoch": 1.7658192011620168, + "grad_norm": 1.2367998181161965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364100 + }, + { + "epoch": 1.7658676993548528, + "grad_norm": 1.173912878016381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364110 + }, + { + "epoch": 1.765916197547689, + "grad_norm": 9.223819930070931e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364120 + }, + { + "epoch": 1.765964695740525, + "grad_norm": 9.401792233632023e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364130 + }, + { + "epoch": 1.766013193933361, + "grad_norm": 1.0349978651902347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364140 + }, + { + "epoch": 1.7660616921261971, + "grad_norm": 1.215470568638466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364150 + }, + { + "epoch": 1.7661101903190333, + "grad_norm": 1.5556432586549818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364160 + }, + { + "epoch": 1.7661586885118692, + "grad_norm": 1.4545164184198711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364170 + }, + { + "epoch": 1.7662071867047056, + "grad_norm": 1.243418079610592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364180 + }, + { + "epoch": 1.7662556848975415, + "grad_norm": 1.3883559191185668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364190 + }, + { + "epoch": 1.7663041830903776, + "grad_norm": 1.0415209139580384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364200 + }, + { + "epoch": 1.7663526812832138, + "grad_norm": 1.2338714050486033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364210 + }, + { + "epoch": 1.7664011794760497, + "grad_norm": 1.543398475689628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364220 + }, + { + "epoch": 1.7664496776688858, + "grad_norm": 1.5195958269487164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364230 + }, + { + "epoch": 1.766498175861722, + "grad_norm": 1.64527964585659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364240 + }, + { + "epoch": 1.766546674054558, + "grad_norm": 1.1749357042845077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364250 + }, + { + "epoch": 1.7665951722473943, + "grad_norm": 1.5655929885838304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364260 + }, + { + "epoch": 1.7666436704402302, + "grad_norm": 1.3292590139712956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364270 + }, + { + "epoch": 1.7666921686330663, + "grad_norm": 1.617325473546316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364280 + }, + { + "epoch": 1.7667406668259025, + "grad_norm": 1.2565458007429697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364290 + }, + { + "epoch": 1.7667891650187384, + "grad_norm": 1.1117227138868202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364300 + }, + { + "epoch": 1.7668376632115745, + "grad_norm": 1.597614662784963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364310 + }, + { + "epoch": 1.7668861614044107, + "grad_norm": 1.1350540951582389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364320 + }, + { + "epoch": 1.7669346595972466, + "grad_norm": 1.4758474442544411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364330 + }, + { + "epoch": 1.766983157790083, + "grad_norm": 1.3000678755759054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364340 + }, + { + "epoch": 1.767031655982919, + "grad_norm": 1.1762653073787988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364350 + }, + { + "epoch": 1.767080154175755, + "grad_norm": 1.2346584199462995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364360 + }, + { + "epoch": 1.7671286523685912, + "grad_norm": 1.2995216458477898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364370 + }, + { + "epoch": 1.767177150561427, + "grad_norm": 1.3894460693109068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364380 + }, + { + "epoch": 1.7672256487542632, + "grad_norm": 1.394817861211095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364390 + }, + { + "epoch": 1.7672741469470994, + "grad_norm": 1.5086362381566687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364400 + }, + { + "epoch": 1.7673226451399353, + "grad_norm": 1.0120295712567895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364410 + }, + { + "epoch": 1.7673711433327717, + "grad_norm": 1.7365080040576686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364420 + }, + { + "epoch": 1.7674196415256076, + "grad_norm": 1.2352661116210584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364430 + }, + { + "epoch": 1.7674681397184437, + "grad_norm": 1.65242379779329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364440 + }, + { + "epoch": 1.7675166379112799, + "grad_norm": 1.5207834991315394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364450 + }, + { + "epoch": 1.7675651361041158, + "grad_norm": 1.6560475657456664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364460 + }, + { + "epoch": 1.7676136342969522, + "grad_norm": 1.1796088550397599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364470 + }, + { + "epoch": 1.767662132489788, + "grad_norm": 2.4922099939317377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364480 + }, + { + "epoch": 1.7677106306826242, + "grad_norm": 1.1337635719144146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364490 + }, + { + "epoch": 1.7677591288754604, + "grad_norm": 1.858323628312064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364500 + }, + { + "epoch": 1.7678076270682963, + "grad_norm": 1.8537074097935147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364510 + }, + { + "epoch": 1.7678561252611324, + "grad_norm": 1.1138257427489862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364520 + }, + { + "epoch": 1.7679046234539686, + "grad_norm": 1.2354068879005808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364530 + }, + { + "epoch": 1.7679531216468045, + "grad_norm": 1.1066101812673423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364540 + }, + { + "epoch": 1.7680016198396409, + "grad_norm": 1.4897248767908877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364550 + }, + { + "epoch": 1.7680501180324768, + "grad_norm": 1.4935768177792852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364560 + }, + { + "epoch": 1.768098616225313, + "grad_norm": 1.1925460619011119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364570 + }, + { + "epoch": 1.768147114418149, + "grad_norm": 1.8019937542135267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364580 + }, + { + "epoch": 1.768195612610985, + "grad_norm": 2.0318806193131422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364590 + }, + { + "epoch": 1.7682441108038212, + "grad_norm": 9.15108167021117e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364600 + }, + { + "epoch": 1.7682926089966573, + "grad_norm": 1.0511857162498472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364610 + }, + { + "epoch": 1.7683411071894932, + "grad_norm": 1.2318276176870313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364620 + }, + { + "epoch": 1.7683896053823296, + "grad_norm": 1.2373599034276594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364630 + }, + { + "epoch": 1.7684381035751655, + "grad_norm": 1.925027071081331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364640 + }, + { + "epoch": 1.7684866017680017, + "grad_norm": 1.32631381433157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364650 + }, + { + "epoch": 1.7685350999608378, + "grad_norm": 1.2525060100188057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364660 + }, + { + "epoch": 1.7685835981536737, + "grad_norm": 1.521036629981154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364670 + }, + { + "epoch": 1.7686320963465099, + "grad_norm": 1.3200631698850884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364680 + }, + { + "epoch": 1.768680594539346, + "grad_norm": 1.2056260878523517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364690 + }, + { + "epoch": 1.768729092732182, + "grad_norm": 1.2366985657763507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364700 + }, + { + "epoch": 1.7687775909250183, + "grad_norm": 1.3626567429980696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364710 + }, + { + "epoch": 1.7688260891178542, + "grad_norm": 1.5111689677382856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364720 + }, + { + "epoch": 1.7688745873106904, + "grad_norm": 1.218287160043019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364730 + }, + { + "epoch": 1.7689230855035265, + "grad_norm": 1.2631274692864736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364740 + }, + { + "epoch": 1.7689715836963624, + "grad_norm": 1.7396327933738576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364750 + }, + { + "epoch": 1.7690200818891986, + "grad_norm": 1.2218338341085655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364760 + }, + { + "epoch": 1.7690685800820347, + "grad_norm": 1.1838241498196567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364770 + }, + { + "epoch": 1.7691170782748706, + "grad_norm": 1.0445427633953841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364780 + }, + { + "epoch": 1.769165576467707, + "grad_norm": 1.2746554034492874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364790 + }, + { + "epoch": 1.769214074660543, + "grad_norm": 1.5990194057735607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364800 + }, + { + "epoch": 1.769262572853379, + "grad_norm": 1.1068665095592678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364810 + }, + { + "epoch": 1.7693110710462152, + "grad_norm": 1.3647713181796917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364820 + }, + { + "epoch": 1.7693595692390511, + "grad_norm": 1.203587540743456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364830 + }, + { + "epoch": 1.7694080674318873, + "grad_norm": 1.1643000341621246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364840 + }, + { + "epoch": 1.7694565656247234, + "grad_norm": 1.312985720147708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364850 + }, + { + "epoch": 1.7695050638175593, + "grad_norm": 1.1360416607431034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364860 + }, + { + "epoch": 1.7695535620103957, + "grad_norm": 1.4359233801997107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364870 + }, + { + "epoch": 1.7696020602032316, + "grad_norm": 1.3183808711403344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364880 + }, + { + "epoch": 1.7696505583960678, + "grad_norm": 1.3245711194542764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364890 + }, + { + "epoch": 1.769699056588904, + "grad_norm": 9.095920461277274e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364900 + }, + { + "epoch": 1.7697475547817398, + "grad_norm": 1.24779573340561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364910 + }, + { + "epoch": 1.769796052974576, + "grad_norm": 1.5296281574705972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364920 + }, + { + "epoch": 1.7698445511674121, + "grad_norm": 1.6935565838593902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364930 + }, + { + "epoch": 1.769893049360248, + "grad_norm": 1.0856846088813654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364940 + }, + { + "epoch": 1.7699415475530844, + "grad_norm": 1.3631378692480212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364950 + }, + { + "epoch": 1.7699900457459203, + "grad_norm": 1.1948257494509562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364960 + }, + { + "epoch": 1.7700385439387565, + "grad_norm": 1.490618473098948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364970 + }, + { + "epoch": 1.7700870421315926, + "grad_norm": 1.712953867638589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364980 + }, + { + "epoch": 1.7701355403244285, + "grad_norm": 1.6664786883779925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 364990 + }, + { + "epoch": 1.770184038517265, + "grad_norm": 1.510743885546617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365000 + }, + { + "epoch": 1.7702325367101008, + "grad_norm": 1.4064414521897106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365010 + }, + { + "epoch": 1.770281034902937, + "grad_norm": 1.3078702565394451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365020 + }, + { + "epoch": 1.7703295330957731, + "grad_norm": 1.1009062106381862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365030 + }, + { + "epoch": 1.770378031288609, + "grad_norm": 1.3920466557237887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365040 + }, + { + "epoch": 1.7704265294814452, + "grad_norm": 1.884888334302559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365050 + }, + { + "epoch": 1.7704750276742813, + "grad_norm": 1.568369611959497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365060 + }, + { + "epoch": 1.7705235258671173, + "grad_norm": 1.361710921798931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365070 + }, + { + "epoch": 1.7705720240599536, + "grad_norm": 1.1672105948434819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365080 + }, + { + "epoch": 1.7706205222527895, + "grad_norm": 1.670073146442519e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365090 + }, + { + "epoch": 1.7706690204456257, + "grad_norm": 1.1247282216686472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365100 + }, + { + "epoch": 1.7707175186384618, + "grad_norm": 1.511801883680164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365110 + }, + { + "epoch": 1.7707660168312978, + "grad_norm": 1.6673293856683813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365120 + }, + { + "epoch": 1.770814515024134, + "grad_norm": 1.1189713156056769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365130 + }, + { + "epoch": 1.77086301321697, + "grad_norm": 1.2604509436187072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365140 + }, + { + "epoch": 1.770911511409806, + "grad_norm": 1.5859470181567303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365150 + }, + { + "epoch": 1.7709600096026423, + "grad_norm": 9.698838177030211e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365160 + }, + { + "epoch": 1.7710085077954782, + "grad_norm": 1.0391746130267165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365170 + }, + { + "epoch": 1.7710570059883144, + "grad_norm": 1.8818722580249414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365180 + }, + { + "epoch": 1.7711055041811505, + "grad_norm": 1.3559774636462407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365190 + }, + { + "epoch": 1.7711540023739865, + "grad_norm": 1.6884250442217308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365200 + }, + { + "epoch": 1.7712025005668226, + "grad_norm": 1.91618045874975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365210 + }, + { + "epoch": 1.7712509987596587, + "grad_norm": 1.504166391441686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365220 + }, + { + "epoch": 1.7712994969524947, + "grad_norm": 1.0578084186363412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365230 + }, + { + "epoch": 1.771347995145331, + "grad_norm": 2.2596442050826226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365240 + }, + { + "epoch": 1.771396493338167, + "grad_norm": 1.2549747019363622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365250 + }, + { + "epoch": 1.771444991531003, + "grad_norm": 1.2445591224263808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365260 + }, + { + "epoch": 1.7714934897238392, + "grad_norm": 1.0762070346004293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365270 + }, + { + "epoch": 1.7715419879166752, + "grad_norm": 1.1655599152504692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365280 + }, + { + "epoch": 1.7715904861095113, + "grad_norm": 1.064185806143314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365290 + }, + { + "epoch": 1.7716389843023475, + "grad_norm": 1.0233728531261477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365300 + }, + { + "epoch": 1.7716874824951834, + "grad_norm": 1.3175119661923418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365310 + }, + { + "epoch": 1.7717359806880197, + "grad_norm": 1.2138661631411196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365320 + }, + { + "epoch": 1.7717844788808557, + "grad_norm": 1.394939719290278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365330 + }, + { + "epoch": 1.7718329770736918, + "grad_norm": 1.545803662850176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365340 + }, + { + "epoch": 1.771881475266528, + "grad_norm": 1.240604508012666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365350 + }, + { + "epoch": 1.7719299734593639, + "grad_norm": 1.3954971400664817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365360 + }, + { + "epoch": 1.7719784716522, + "grad_norm": 1.1933351196091735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365370 + }, + { + "epoch": 1.7720269698450362, + "grad_norm": 9.601179407070504e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365380 + }, + { + "epoch": 1.772075468037872, + "grad_norm": 9.378234189227896e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365390 + }, + { + "epoch": 1.7721239662307084, + "grad_norm": 1.1561149371175361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365400 + }, + { + "epoch": 1.7721724644235444, + "grad_norm": 1.959307915910813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365410 + }, + { + "epoch": 1.7722209626163805, + "grad_norm": 1.0359749502697468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365420 + }, + { + "epoch": 1.7722694608092167, + "grad_norm": 2.877815674651174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365430 + }, + { + "epoch": 1.7723179590020526, + "grad_norm": 1.0475891265571136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365440 + }, + { + "epoch": 1.7723664571948887, + "grad_norm": 1.3311113100655803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365450 + }, + { + "epoch": 1.7724149553877249, + "grad_norm": 1.2200320753663618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365460 + }, + { + "epoch": 1.7724634535805608, + "grad_norm": 1.2672485283360402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365470 + }, + { + "epoch": 1.7725119517733972, + "grad_norm": 9.761406793984406e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365480 + }, + { + "epoch": 1.772560449966233, + "grad_norm": 1.0282934503891283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365490 + }, + { + "epoch": 1.7726089481590692, + "grad_norm": 1.3108212293388988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365500 + }, + { + "epoch": 1.7726574463519054, + "grad_norm": 9.680904966558046e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365510 + }, + { + "epoch": 1.7727059445447413, + "grad_norm": 1.5477070292035933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365520 + }, + { + "epoch": 1.7727544427375777, + "grad_norm": 1.2835174700853713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365530 + }, + { + "epoch": 1.7728029409304136, + "grad_norm": 1.726697007597977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365540 + }, + { + "epoch": 1.7728514391232497, + "grad_norm": 9.223743546726837e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365550 + }, + { + "epoch": 1.7728999373160859, + "grad_norm": 9.817329171823985e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365560 + }, + { + "epoch": 1.7729484355089218, + "grad_norm": 1.1862480775448603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365570 + }, + { + "epoch": 1.772996933701758, + "grad_norm": 9.331032835291353e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365580 + }, + { + "epoch": 1.773045431894594, + "grad_norm": 1.3840110391072358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365590 + }, + { + "epoch": 1.77309393008743, + "grad_norm": 1.2886323119687404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365600 + }, + { + "epoch": 1.7731424282802664, + "grad_norm": 1.3033824686203843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365610 + }, + { + "epoch": 1.7731909264731023, + "grad_norm": 1.6064515051539274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365620 + }, + { + "epoch": 1.7732394246659384, + "grad_norm": 1.1694536894424346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365630 + }, + { + "epoch": 1.7732879228587746, + "grad_norm": 9.650461763044405e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365640 + }, + { + "epoch": 1.7733364210516105, + "grad_norm": 1.3480813798594227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365650 + }, + { + "epoch": 1.7733849192444466, + "grad_norm": 1.675513416898866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365660 + }, + { + "epoch": 1.7734334174372828, + "grad_norm": 1.0326411725714024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365670 + }, + { + "epoch": 1.7734819156301187, + "grad_norm": 8.40991454253981e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365680 + }, + { + "epoch": 1.773530413822955, + "grad_norm": 1.2400116489175161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365690 + }, + { + "epoch": 1.773578912015791, + "grad_norm": 1.2339278931960962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365700 + }, + { + "epoch": 1.7736274102086271, + "grad_norm": 1.2959147532853876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365710 + }, + { + "epoch": 1.7736759084014633, + "grad_norm": 1.3420266675723269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365720 + }, + { + "epoch": 1.7737244065942992, + "grad_norm": 1.1259694510101781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365730 + }, + { + "epoch": 1.7737729047871353, + "grad_norm": 1.3441693091920115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365740 + }, + { + "epoch": 1.7738214029799715, + "grad_norm": 1.1998960047776563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365750 + }, + { + "epoch": 1.7738699011728074, + "grad_norm": 1.0416425944015373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365760 + }, + { + "epoch": 1.7739183993656438, + "grad_norm": 8.325907963069312e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365770 + }, + { + "epoch": 1.7739668975584797, + "grad_norm": 1.2098775314939303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365780 + }, + { + "epoch": 1.7740153957513158, + "grad_norm": 1.7318143363809213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365790 + }, + { + "epoch": 1.774063893944152, + "grad_norm": 1.1852264947265212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365800 + }, + { + "epoch": 1.774112392136988, + "grad_norm": 9.46325862116737e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365810 + }, + { + "epoch": 1.774160890329824, + "grad_norm": 9.878621476389071e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365820 + }, + { + "epoch": 1.7742093885226602, + "grad_norm": 1.0796574301252804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365830 + }, + { + "epoch": 1.7742578867154961, + "grad_norm": 1.44966838533378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365840 + }, + { + "epoch": 1.7743063849083325, + "grad_norm": 1.1458684667786656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365850 + }, + { + "epoch": 1.7743548831011684, + "grad_norm": 1.180982955872878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365860 + }, + { + "epoch": 1.7744033812940045, + "grad_norm": 1.765375401419078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365870 + }, + { + "epoch": 1.7744518794868407, + "grad_norm": 1.907063484907212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365880 + }, + { + "epoch": 1.7745003776796766, + "grad_norm": 1.0614435552724899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365890 + }, + { + "epoch": 1.7745488758725128, + "grad_norm": 1.3843166613014546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365900 + }, + { + "epoch": 1.774597374065349, + "grad_norm": 1.404292859774614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365910 + }, + { + "epoch": 1.7746458722581848, + "grad_norm": 9.89165904741185e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365920 + }, + { + "epoch": 1.7746943704510212, + "grad_norm": 9.508946519076744e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365930 + }, + { + "epoch": 1.774742868643857, + "grad_norm": 1.1853272141593152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365940 + }, + { + "epoch": 1.7747913668366933, + "grad_norm": 1.0090786872751778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365950 + }, + { + "epoch": 1.7748398650295294, + "grad_norm": 1.2476279565021287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365960 + }, + { + "epoch": 1.7748883632223653, + "grad_norm": 1.88469044815065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365970 + }, + { + "epoch": 1.7749368614152015, + "grad_norm": 1.392051718340781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365980 + }, + { + "epoch": 1.7749853596080376, + "grad_norm": 1.120753800876173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 365990 + }, + { + "epoch": 1.7750338578008737, + "grad_norm": 1.777359237564724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366000 + }, + { + "epoch": 1.77508235599371, + "grad_norm": 1.2089726553199398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366010 + }, + { + "epoch": 1.7751308541865458, + "grad_norm": 1.5494604710397653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366020 + }, + { + "epoch": 1.775179352379382, + "grad_norm": 1.0168889730266528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366030 + }, + { + "epoch": 1.775227850572218, + "grad_norm": 1.1243323605469868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366040 + }, + { + "epoch": 1.775276348765054, + "grad_norm": 1.072796163015255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366050 + }, + { + "epoch": 1.7753248469578904, + "grad_norm": 1.1358263662941681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366060 + }, + { + "epoch": 1.7753733451507263, + "grad_norm": 1.739471855444208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366070 + }, + { + "epoch": 1.7754218433435625, + "grad_norm": 1.163253138258824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366080 + }, + { + "epoch": 1.7754703415363986, + "grad_norm": 1.204043620361972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366090 + }, + { + "epoch": 1.7755188397292345, + "grad_norm": 1.2632263235445862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366100 + }, + { + "epoch": 1.7755673379220707, + "grad_norm": 1.0282125373350937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366110 + }, + { + "epoch": 1.7756158361149068, + "grad_norm": 1.219442058442155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366120 + }, + { + "epoch": 1.7756643343077427, + "grad_norm": 1.566192686652812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366130 + }, + { + "epoch": 1.775712832500579, + "grad_norm": 1.4026809047607003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366140 + }, + { + "epoch": 1.775761330693415, + "grad_norm": 1.2914164848609744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366150 + }, + { + "epoch": 1.7758098288862512, + "grad_norm": 1.606822586097678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366160 + }, + { + "epoch": 1.7758583270790873, + "grad_norm": 1.530073134858867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366170 + }, + { + "epoch": 1.7759068252719232, + "grad_norm": 9.98163951493325e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366180 + }, + { + "epoch": 1.7759553234647594, + "grad_norm": 1.2936603788205048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366190 + }, + { + "epoch": 1.7760038216575955, + "grad_norm": 1.2924468606456685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366200 + }, + { + "epoch": 1.7760523198504314, + "grad_norm": 1.1621322570931625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366210 + }, + { + "epoch": 1.7761008180432678, + "grad_norm": 1.238812341597395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366220 + }, + { + "epoch": 1.7761493162361037, + "grad_norm": 2.209261928953765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366230 + }, + { + "epoch": 1.7761978144289399, + "grad_norm": 1.1701758673154927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366240 + }, + { + "epoch": 1.776246312621776, + "grad_norm": 1.4666645675731615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366250 + }, + { + "epoch": 1.776294810814612, + "grad_norm": 1.4371419609915392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366260 + }, + { + "epoch": 1.776343309007448, + "grad_norm": 1.7282737019286287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366270 + }, + { + "epoch": 1.7763918072002842, + "grad_norm": 1.1040978797893786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366280 + }, + { + "epoch": 1.7764403053931201, + "grad_norm": 1.3722689651274322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366290 + }, + { + "epoch": 1.7764888035859565, + "grad_norm": 1.7038416899595177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366300 + }, + { + "epoch": 1.7765373017787924, + "grad_norm": 1.1638654484613653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366310 + }, + { + "epoch": 1.7765857999716286, + "grad_norm": 1.6373649103229582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366320 + }, + { + "epoch": 1.7766342981644647, + "grad_norm": 1.4935098491264398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366330 + }, + { + "epoch": 1.7766827963573006, + "grad_norm": 1.043408737189111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366340 + }, + { + "epoch": 1.7767312945501368, + "grad_norm": 1.5612840797984973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366350 + }, + { + "epoch": 1.776779792742973, + "grad_norm": 1.155407147734877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366360 + }, + { + "epoch": 1.7768282909358089, + "grad_norm": 1.3049997527048163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366370 + }, + { + "epoch": 1.7768767891286452, + "grad_norm": 1.682379213718832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366380 + }, + { + "epoch": 1.7769252873214811, + "grad_norm": 1.465214971574369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366390 + }, + { + "epoch": 1.7769737855143173, + "grad_norm": 1.611905275922254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366400 + }, + { + "epoch": 1.7770222837071534, + "grad_norm": 1.840008323483744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366410 + }, + { + "epoch": 1.7770707818999893, + "grad_norm": 1.96349301262444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366420 + }, + { + "epoch": 1.7771192800928255, + "grad_norm": 1.1178852510340676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366430 + }, + { + "epoch": 1.7771677782856616, + "grad_norm": 1.2762844114888594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366440 + }, + { + "epoch": 1.7772162764784976, + "grad_norm": 9.213032114985253e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366450 + }, + { + "epoch": 1.777264774671334, + "grad_norm": 1.3309902513469751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366460 + }, + { + "epoch": 1.7773132728641698, + "grad_norm": 1.3293087519627989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366470 + }, + { + "epoch": 1.777361771057006, + "grad_norm": 9.915183341036027e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366480 + }, + { + "epoch": 1.7774102692498421, + "grad_norm": 1.4001273029862205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366490 + }, + { + "epoch": 1.777458767442678, + "grad_norm": 1.2583711850311374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366500 + }, + { + "epoch": 1.7775072656355144, + "grad_norm": 1.3828602263288303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366510 + }, + { + "epoch": 1.7775557638283503, + "grad_norm": 1.1533415111841805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366520 + }, + { + "epoch": 1.7776042620211865, + "grad_norm": 1.3263993459133872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366530 + }, + { + "epoch": 1.7776527602140226, + "grad_norm": 1.1664098131802803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366540 + }, + { + "epoch": 1.7777012584068586, + "grad_norm": 1.884589728717856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366550 + }, + { + "epoch": 1.7777497565996947, + "grad_norm": 1.050928677415186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366560 + }, + { + "epoch": 1.7777982547925308, + "grad_norm": 1.0212488632532768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366570 + }, + { + "epoch": 1.7778467529853668, + "grad_norm": 1.4799046432756313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366580 + }, + { + "epoch": 1.7778952511782031, + "grad_norm": 1.1376400266271958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366590 + }, + { + "epoch": 1.777943749371039, + "grad_norm": 1.3756237038364816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366600 + }, + { + "epoch": 1.7779922475638752, + "grad_norm": 2.1917882619959528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366610 + }, + { + "epoch": 1.7780407457567113, + "grad_norm": 1.0267434902289096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366620 + }, + { + "epoch": 1.7780892439495473, + "grad_norm": 1.1699599511416636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366630 + }, + { + "epoch": 1.7781377421423834, + "grad_norm": 1.4707335793673337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366640 + }, + { + "epoch": 1.7781862403352195, + "grad_norm": 1.3362305040232059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366650 + }, + { + "epoch": 1.7782347385280555, + "grad_norm": 1.6919630141387643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366660 + }, + { + "epoch": 1.7782832367208918, + "grad_norm": 1.2027248530444012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366670 + }, + { + "epoch": 1.7783317349137278, + "grad_norm": 1.650353276261285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366680 + }, + { + "epoch": 1.778380233106564, + "grad_norm": 1.3874582371897759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366690 + }, + { + "epoch": 1.7784287312994, + "grad_norm": 1.2739760357760588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366700 + }, + { + "epoch": 1.778477229492236, + "grad_norm": 1.2166570861893433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366710 + }, + { + "epoch": 1.778525727685072, + "grad_norm": 1.0750865975239776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366720 + }, + { + "epoch": 1.7785742258779083, + "grad_norm": 1.1735788341127318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366730 + }, + { + "epoch": 1.7786227240707442, + "grad_norm": 1.3083766070565161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366740 + }, + { + "epoch": 1.7786712222635805, + "grad_norm": 1.2888552447520851e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366750 + }, + { + "epoch": 1.7787197204564165, + "grad_norm": 1.4145236093554558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366760 + }, + { + "epoch": 1.7787682186492526, + "grad_norm": 1.002283855910946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366770 + }, + { + "epoch": 1.7788167168420888, + "grad_norm": 1.196159171712452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366780 + }, + { + "epoch": 1.7788652150349247, + "grad_norm": 1.5801891350974984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366790 + }, + { + "epoch": 1.7789137132277608, + "grad_norm": 1.2947663385887154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366800 + }, + { + "epoch": 1.778962211420597, + "grad_norm": 1.6738260555371198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366810 + }, + { + "epoch": 1.7790107096134329, + "grad_norm": 1.1989572001880333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366820 + }, + { + "epoch": 1.7790592078062692, + "grad_norm": 1.369654167859835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366830 + }, + { + "epoch": 1.7791077059991052, + "grad_norm": 1.0309721965029439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366840 + }, + { + "epoch": 1.7791562041919413, + "grad_norm": 1.2147785000138356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366850 + }, + { + "epoch": 1.7792047023847775, + "grad_norm": 1.2643016411573171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366860 + }, + { + "epoch": 1.7792532005776134, + "grad_norm": 1.3214810579142977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366870 + }, + { + "epoch": 1.7793016987704495, + "grad_norm": 1.0761051605356897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366880 + }, + { + "epoch": 1.7793501969632857, + "grad_norm": 1.5801830954842444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366890 + }, + { + "epoch": 1.7793986951561216, + "grad_norm": 1.3073111482242439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366900 + }, + { + "epoch": 1.779447193348958, + "grad_norm": 1.7974036481405165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366910 + }, + { + "epoch": 1.7794956915417939, + "grad_norm": 1.5039512746284345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366920 + }, + { + "epoch": 1.77954418973463, + "grad_norm": 1.4107654600081787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366930 + }, + { + "epoch": 1.7795926879274662, + "grad_norm": 1.1689177625839875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366940 + }, + { + "epoch": 1.779641186120302, + "grad_norm": 1.3858504566144347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366950 + }, + { + "epoch": 1.7796896843131382, + "grad_norm": 1.3094674677915918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366960 + }, + { + "epoch": 1.7797381825059744, + "grad_norm": 9.795741995333174e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366970 + }, + { + "epoch": 1.7797866806988103, + "grad_norm": 1.5750794446489635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366980 + }, + { + "epoch": 1.7798351788916467, + "grad_norm": 1.4452909979922879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 366990 + }, + { + "epoch": 1.7798836770844826, + "grad_norm": 1.1721824400012792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367000 + }, + { + "epoch": 1.7799321752773187, + "grad_norm": 1.610569455579025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367010 + }, + { + "epoch": 1.7799806734701549, + "grad_norm": 1.2804278526346025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367020 + }, + { + "epoch": 1.7800291716629908, + "grad_norm": 2.0150540791519234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367030 + }, + { + "epoch": 1.7800776698558272, + "grad_norm": 1.5281937493227815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367040 + }, + { + "epoch": 1.780126168048663, + "grad_norm": 9.244366161453854e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367050 + }, + { + "epoch": 1.7801746662414992, + "grad_norm": 1.1429828639109019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367060 + }, + { + "epoch": 1.7802231644343354, + "grad_norm": 1.5203781345007883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367070 + }, + { + "epoch": 1.7802716626271713, + "grad_norm": 1.3133504062068369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367080 + }, + { + "epoch": 1.7803201608200074, + "grad_norm": 1.5873473202532296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367090 + }, + { + "epoch": 1.7803686590128436, + "grad_norm": 1.644692915192536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367100 + }, + { + "epoch": 1.7804171572056795, + "grad_norm": 1.2325728881990017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367110 + }, + { + "epoch": 1.7804656553985159, + "grad_norm": 1.335566501836638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367120 + }, + { + "epoch": 1.7805141535913518, + "grad_norm": 1.3133879761539902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367130 + }, + { + "epoch": 1.780562651784188, + "grad_norm": 1.54717803013682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367140 + }, + { + "epoch": 1.780611149977024, + "grad_norm": 1.0336616007577959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367150 + }, + { + "epoch": 1.78065964816986, + "grad_norm": 1.4886093246957444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367160 + }, + { + "epoch": 1.7807081463626961, + "grad_norm": 1.3444688917729763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367170 + }, + { + "epoch": 1.7807566445555323, + "grad_norm": 1.0540164296912735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367180 + }, + { + "epoch": 1.7808051427483682, + "grad_norm": 1.2947315219946631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367190 + }, + { + "epoch": 1.7808536409412046, + "grad_norm": 1.043515229781633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367200 + }, + { + "epoch": 1.7809021391340405, + "grad_norm": 1.6413366665801732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367210 + }, + { + "epoch": 1.7809506373268766, + "grad_norm": 1.474070909779357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367220 + }, + { + "epoch": 1.7809991355197128, + "grad_norm": 1.11219113918537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367230 + }, + { + "epoch": 1.7810476337125487, + "grad_norm": 1.3533598242077005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367240 + }, + { + "epoch": 1.7810961319053848, + "grad_norm": 1.5869630942688673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367250 + }, + { + "epoch": 1.781144630098221, + "grad_norm": 1.9070794721187667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367260 + }, + { + "epoch": 1.781193128291057, + "grad_norm": 1.3411217913983364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367270 + }, + { + "epoch": 1.7812416264838933, + "grad_norm": 1.600405141743977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367280 + }, + { + "epoch": 1.7812901246767292, + "grad_norm": 1.2424084872009189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367290 + }, + { + "epoch": 1.7813386228695653, + "grad_norm": 1.7270695096271993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367300 + }, + { + "epoch": 1.7813871210624015, + "grad_norm": 1.650207615000454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367310 + }, + { + "epoch": 1.7814356192552374, + "grad_norm": 1.268765448259046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367320 + }, + { + "epoch": 1.7814841174480736, + "grad_norm": 9.146766011269847e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367330 + }, + { + "epoch": 1.7815326156409097, + "grad_norm": 1.838805374632102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367340 + }, + { + "epoch": 1.7815811138337456, + "grad_norm": 1.1251042764115482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367350 + }, + { + "epoch": 1.781629612026582, + "grad_norm": 9.947571655288812e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367360 + }, + { + "epoch": 1.781678110219418, + "grad_norm": 1.7807726848673155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367370 + }, + { + "epoch": 1.781726608412254, + "grad_norm": 9.97944482605817e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367380 + }, + { + "epoch": 1.7817751066050902, + "grad_norm": 1.3890391059590002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367390 + }, + { + "epoch": 1.7818236047979261, + "grad_norm": 1.2257825865447103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367400 + }, + { + "epoch": 1.7818721029907623, + "grad_norm": 1.1367601970846408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367410 + }, + { + "epoch": 1.7819206011835984, + "grad_norm": 1.1294491564228792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367420 + }, + { + "epoch": 1.7819690993764343, + "grad_norm": 1.3083735872498892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367430 + }, + { + "epoch": 1.7820175975692707, + "grad_norm": 1.0195561728210123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367440 + }, + { + "epoch": 1.7820660957621066, + "grad_norm": 1.09005444670629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367450 + }, + { + "epoch": 1.7821145939549428, + "grad_norm": 2.3955244898843375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367460 + }, + { + "epoch": 1.782163092147779, + "grad_norm": 9.990594129760666e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367470 + }, + { + "epoch": 1.7822115903406148, + "grad_norm": 1.2475048549731582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367480 + }, + { + "epoch": 1.782260088533451, + "grad_norm": 1.7106541960743016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367490 + }, + { + "epoch": 1.7823085867262871, + "grad_norm": 1.562919393904849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367500 + }, + { + "epoch": 1.782357084919123, + "grad_norm": 1.325567566823338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367510 + }, + { + "epoch": 1.7824055831119594, + "grad_norm": 1.2378984948213656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367520 + }, + { + "epoch": 1.7824540813047953, + "grad_norm": 9.842233694712377e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367530 + }, + { + "epoch": 1.7825025794976315, + "grad_norm": 1.1437593983032457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367540 + }, + { + "epoch": 1.7825510776904676, + "grad_norm": 1.2626509615643045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367550 + }, + { + "epoch": 1.7825995758833035, + "grad_norm": 1.2737078058933093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367560 + }, + { + "epoch": 1.78264807407614, + "grad_norm": 1.5671780317916273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367570 + }, + { + "epoch": 1.7826965722689758, + "grad_norm": 1.021224349528893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367580 + }, + { + "epoch": 1.782745070461812, + "grad_norm": 1.2888026645896389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367590 + }, + { + "epoch": 1.782793568654648, + "grad_norm": 1.336179877853283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367600 + }, + { + "epoch": 1.782842066847484, + "grad_norm": 1.393174198227598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367610 + }, + { + "epoch": 1.7828905650403202, + "grad_norm": 1.3074287430470122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367620 + }, + { + "epoch": 1.7829390632331563, + "grad_norm": 1.4148374916089779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367630 + }, + { + "epoch": 1.7829875614259922, + "grad_norm": 1.6306557881762274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367640 + }, + { + "epoch": 1.7830360596188286, + "grad_norm": 1.2655482883872082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367650 + }, + { + "epoch": 1.7830845578116645, + "grad_norm": 1.2462335163831995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367660 + }, + { + "epoch": 1.7831330560045007, + "grad_norm": 1.1626007712095543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367670 + }, + { + "epoch": 1.7831815541973368, + "grad_norm": 1.3335893278565436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367680 + }, + { + "epoch": 1.7832300523901727, + "grad_norm": 1.0279514128797018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367690 + }, + { + "epoch": 1.7832785505830089, + "grad_norm": 1.1458447524148596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367700 + }, + { + "epoch": 1.783327048775845, + "grad_norm": 1.8254107558846044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367710 + }, + { + "epoch": 1.783375546968681, + "grad_norm": 1.659854653723869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367720 + }, + { + "epoch": 1.7834240451615173, + "grad_norm": 1.0131333993967928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367730 + }, + { + "epoch": 1.7834725433543532, + "grad_norm": 1.2367584290018385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367740 + }, + { + "epoch": 1.7835210415471894, + "grad_norm": 9.207158591095777e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367750 + }, + { + "epoch": 1.7835695397400255, + "grad_norm": 8.86055495641358e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367760 + }, + { + "epoch": 1.7836180379328614, + "grad_norm": 1.2568921903266528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367770 + }, + { + "epoch": 1.7836665361256976, + "grad_norm": 1.4685298310723738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367780 + }, + { + "epoch": 1.7837150343185337, + "grad_norm": 1.076980815639672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367790 + }, + { + "epoch": 1.7837635325113697, + "grad_norm": 1.0600078148570447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367800 + }, + { + "epoch": 1.783812030704206, + "grad_norm": 1.33797186663287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367810 + }, + { + "epoch": 1.783860528897042, + "grad_norm": 9.16973785791697e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367820 + }, + { + "epoch": 1.783909027089878, + "grad_norm": 1.964242102303615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367830 + }, + { + "epoch": 1.7839575252827142, + "grad_norm": 1.3494558359639086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367840 + }, + { + "epoch": 1.7840060234755502, + "grad_norm": 1.6197953200958182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367850 + }, + { + "epoch": 1.7840545216683863, + "grad_norm": 9.718384319512552e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367860 + }, + { + "epoch": 1.7841030198612224, + "grad_norm": 1.4554935923172252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367870 + }, + { + "epoch": 1.7841515180540584, + "grad_norm": 1.3128694575925692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367880 + }, + { + "epoch": 1.7842000162468947, + "grad_norm": 1.0918804527193515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367890 + }, + { + "epoch": 1.7842485144397306, + "grad_norm": 1.0929344540500097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367900 + }, + { + "epoch": 1.7842970126325668, + "grad_norm": 1.2560541939876657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367910 + }, + { + "epoch": 1.784345510825403, + "grad_norm": 1.2106800895139713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367920 + }, + { + "epoch": 1.7843940090182389, + "grad_norm": 1.1529659005304893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367930 + }, + { + "epoch": 1.784442507211075, + "grad_norm": 1.3023688794078225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367940 + }, + { + "epoch": 1.7844910054039111, + "grad_norm": 1.1861507331900611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367950 + }, + { + "epoch": 1.784539503596747, + "grad_norm": 1.5428096133973668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367960 + }, + { + "epoch": 1.7845880017895834, + "grad_norm": 1.4080027810337015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367970 + }, + { + "epoch": 1.7846364999824194, + "grad_norm": 9.504946163474415e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367980 + }, + { + "epoch": 1.7846849981752555, + "grad_norm": 1.9868926059984915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 367990 + }, + { + "epoch": 1.7847334963680916, + "grad_norm": 1.0754168222604221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368000 + }, + { + "epoch": 1.7847819945609276, + "grad_norm": 1.4230822742433702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368010 + }, + { + "epoch": 1.7848304927537637, + "grad_norm": 1.0358971458401811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368020 + }, + { + "epoch": 1.7848789909465999, + "grad_norm": 9.418064550459349e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368030 + }, + { + "epoch": 1.784927489139436, + "grad_norm": 1.7400360263764014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368040 + }, + { + "epoch": 1.7849759873322721, + "grad_norm": 9.846798043611216e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368050 + }, + { + "epoch": 1.785024485525108, + "grad_norm": 1.0776552095137504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368060 + }, + { + "epoch": 1.7850729837179442, + "grad_norm": 1.6168739236377405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368070 + }, + { + "epoch": 1.7851214819107803, + "grad_norm": 9.145495027951256e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368080 + }, + { + "epoch": 1.7851699801036163, + "grad_norm": 1.226612855731446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368090 + }, + { + "epoch": 1.7852184782964526, + "grad_norm": 1.6383262746444416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368100 + }, + { + "epoch": 1.7852669764892886, + "grad_norm": 1.2729101328545767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368110 + }, + { + "epoch": 1.7853154746821247, + "grad_norm": 1.5084147264587955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368120 + }, + { + "epoch": 1.7853639728749608, + "grad_norm": 1.7187257839168524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368130 + }, + { + "epoch": 1.7854124710677968, + "grad_norm": 1.3598570269834909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368140 + }, + { + "epoch": 1.785460969260633, + "grad_norm": 1.0587606347201017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368150 + }, + { + "epoch": 1.785509467453469, + "grad_norm": 9.289998992301207e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368160 + }, + { + "epoch": 1.785557965646305, + "grad_norm": 1.0325768684538161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368170 + }, + { + "epoch": 1.7856064638391413, + "grad_norm": 1.3102925855434933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368180 + }, + { + "epoch": 1.7856549620319773, + "grad_norm": 1.7124197171369815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368190 + }, + { + "epoch": 1.7857034602248134, + "grad_norm": 1.3025196032856456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368200 + }, + { + "epoch": 1.7857519584176496, + "grad_norm": 1.1258521226409357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368210 + }, + { + "epoch": 1.7858004566104855, + "grad_norm": 1.2943867311321355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368220 + }, + { + "epoch": 1.7858489548033216, + "grad_norm": 1.428941232006764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368230 + }, + { + "epoch": 1.7858974529961578, + "grad_norm": 1.1927394183430806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368240 + }, + { + "epoch": 1.7859459511889937, + "grad_norm": 1.295291873759652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368250 + }, + { + "epoch": 1.78599444938183, + "grad_norm": 1.5099614003588613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368260 + }, + { + "epoch": 1.786042947574666, + "grad_norm": 2.0479919982108186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368270 + }, + { + "epoch": 1.7860914457675021, + "grad_norm": 9.80523306992609e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368280 + }, + { + "epoch": 1.7861399439603383, + "grad_norm": 1.1442717884335707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368290 + }, + { + "epoch": 1.7861884421531742, + "grad_norm": 1.5529403540881503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368300 + }, + { + "epoch": 1.7862369403460103, + "grad_norm": 1.4914766310880623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368310 + }, + { + "epoch": 1.7862854385388465, + "grad_norm": 1.1421163570446424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368320 + }, + { + "epoch": 1.7863339367316824, + "grad_norm": 1.3868967307928415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368330 + }, + { + "epoch": 1.7863824349245188, + "grad_norm": 1.0574904507620886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368340 + }, + { + "epoch": 1.7864309331173547, + "grad_norm": 1.9313111110363934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368350 + }, + { + "epoch": 1.7864794313101908, + "grad_norm": 1.735799060043064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368360 + }, + { + "epoch": 1.786527929503027, + "grad_norm": 1.3167548829073894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368370 + }, + { + "epoch": 1.786576427695863, + "grad_norm": 1.0153745400032221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368380 + }, + { + "epoch": 1.786624925888699, + "grad_norm": 1.4891329058741576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368390 + }, + { + "epoch": 1.7866734240815352, + "grad_norm": 1.2085480172174812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368400 + }, + { + "epoch": 1.786721922274371, + "grad_norm": 1.3867861525795888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368410 + }, + { + "epoch": 1.7867704204672075, + "grad_norm": 1.0220133184191127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368420 + }, + { + "epoch": 1.7868189186600434, + "grad_norm": 9.718945648273802e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368430 + }, + { + "epoch": 1.7868674168528795, + "grad_norm": 9.857942906421613e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368440 + }, + { + "epoch": 1.7869159150457157, + "grad_norm": 1.5417228382830217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368450 + }, + { + "epoch": 1.7869644132385516, + "grad_norm": 1.8237781063135117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368460 + }, + { + "epoch": 1.7870129114313877, + "grad_norm": 1.0199489253182037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368470 + }, + { + "epoch": 1.7870614096242239, + "grad_norm": 1.792462001048989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368480 + }, + { + "epoch": 1.7871099078170598, + "grad_norm": 1.2793015535805807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368490 + }, + { + "epoch": 1.7871584060098962, + "grad_norm": 1.3145000643532967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368500 + }, + { + "epoch": 1.787206904202732, + "grad_norm": 1.5023051247453623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368510 + }, + { + "epoch": 1.7872554023955682, + "grad_norm": 1.2403486238099504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368520 + }, + { + "epoch": 1.7873039005884044, + "grad_norm": 1.1270068434043878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368530 + }, + { + "epoch": 1.7873523987812403, + "grad_norm": 1.0621623580675532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368540 + }, + { + "epoch": 1.7874008969740767, + "grad_norm": 1.750560407742796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368550 + }, + { + "epoch": 1.7874493951669126, + "grad_norm": 1.2619931766266745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368560 + }, + { + "epoch": 1.7874978933597487, + "grad_norm": 1.904961344223466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368570 + }, + { + "epoch": 1.7875463915525849, + "grad_norm": 1.1035520941504728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368580 + }, + { + "epoch": 1.7875948897454208, + "grad_norm": 1.53118957513243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368590 + }, + { + "epoch": 1.787643387938257, + "grad_norm": 2.02553120942639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368600 + }, + { + "epoch": 1.787691886131093, + "grad_norm": 1.0447643639110993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368610 + }, + { + "epoch": 1.787740384323929, + "grad_norm": 1.327596343969617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368620 + }, + { + "epoch": 1.7877888825167654, + "grad_norm": 1.782077951872907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368630 + }, + { + "epoch": 1.7878373807096013, + "grad_norm": 1.507320313010041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368640 + }, + { + "epoch": 1.7878858789024374, + "grad_norm": 1.171777253006212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368650 + }, + { + "epoch": 1.7879343770952736, + "grad_norm": 1.0771503688999928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368660 + }, + { + "epoch": 1.7879828752881095, + "grad_norm": 1.0119646454143094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368670 + }, + { + "epoch": 1.7880313734809457, + "grad_norm": 1.4316294816296704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368680 + }, + { + "epoch": 1.7880798716737818, + "grad_norm": 1.5285674948017913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368690 + }, + { + "epoch": 1.7881283698666177, + "grad_norm": 1.4752783883409393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368700 + }, + { + "epoch": 1.788176868059454, + "grad_norm": 1.3422861044887213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368710 + }, + { + "epoch": 1.78822536625229, + "grad_norm": 1.1388527454414543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368720 + }, + { + "epoch": 1.7882738644451261, + "grad_norm": 1.3789866137869922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368730 + }, + { + "epoch": 1.7883223626379623, + "grad_norm": 1.3172437363095924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368740 + }, + { + "epoch": 1.7883708608307982, + "grad_norm": 1.6488739262854324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368750 + }, + { + "epoch": 1.7884193590236344, + "grad_norm": 2.5524451885416966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368760 + }, + { + "epoch": 1.7884678572164705, + "grad_norm": 1.579793718065048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368770 + }, + { + "epoch": 1.7885163554093064, + "grad_norm": 1.0646462378360866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368780 + }, + { + "epoch": 1.7885648536021428, + "grad_norm": 1.0051217635975718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368790 + }, + { + "epoch": 1.7886133517949787, + "grad_norm": 1.272879579516939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368800 + }, + { + "epoch": 1.7886618499878149, + "grad_norm": 1.150505291036552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368810 + }, + { + "epoch": 1.788710348180651, + "grad_norm": 1.4468475306728124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368820 + }, + { + "epoch": 1.788758846373487, + "grad_norm": 1.2192441722902458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368830 + }, + { + "epoch": 1.788807344566323, + "grad_norm": 1.823110906684633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368840 + }, + { + "epoch": 1.7888558427591592, + "grad_norm": 1.491560119859514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368850 + }, + { + "epoch": 1.7889043409519951, + "grad_norm": 1.177745634350913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368860 + }, + { + "epoch": 1.7889528391448315, + "grad_norm": 9.842772819013135e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368870 + }, + { + "epoch": 1.7890013373376674, + "grad_norm": 7.691419945388134e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368880 + }, + { + "epoch": 1.7890498355305036, + "grad_norm": 1.319688092138449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368890 + }, + { + "epoch": 1.7890983337233397, + "grad_norm": 1.6727401686011945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368900 + }, + { + "epoch": 1.7891468319161756, + "grad_norm": 1.3612361016157593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368910 + }, + { + "epoch": 1.7891953301090118, + "grad_norm": 1.6739868158310856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368920 + }, + { + "epoch": 1.789243828301848, + "grad_norm": 1.1407497169102498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368930 + }, + { + "epoch": 1.7892923264946838, + "grad_norm": 1.4433314099449035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368940 + }, + { + "epoch": 1.7893408246875202, + "grad_norm": 1.7569421473240254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368950 + }, + { + "epoch": 1.7893893228803561, + "grad_norm": 1.3754986483149878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368960 + }, + { + "epoch": 1.7894378210731923, + "grad_norm": 2.0383316368111082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368970 + }, + { + "epoch": 1.7894863192660284, + "grad_norm": 1.0852758691726194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368980 + }, + { + "epoch": 1.7895348174588643, + "grad_norm": 1.2073417821056864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 368990 + }, + { + "epoch": 1.7895833156517005, + "grad_norm": 1.2792342296563675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369000 + }, + { + "epoch": 1.7896318138445366, + "grad_norm": 1.129743054661958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369010 + }, + { + "epoch": 1.7896803120373725, + "grad_norm": 1.542657912523282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369020 + }, + { + "epoch": 1.789728810230209, + "grad_norm": 1.0451841170322496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369030 + }, + { + "epoch": 1.7897773084230448, + "grad_norm": 1.1433945346084329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369040 + }, + { + "epoch": 1.789825806615881, + "grad_norm": 1.74538978825467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369050 + }, + { + "epoch": 1.7898743048087171, + "grad_norm": 1.5467204406149904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369060 + }, + { + "epoch": 1.789922803001553, + "grad_norm": 1.1791547294137672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369070 + }, + { + "epoch": 1.7899713011943894, + "grad_norm": 1.3598287829097444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369080 + }, + { + "epoch": 1.7900197993872253, + "grad_norm": 1.505478408603267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369090 + }, + { + "epoch": 1.7900682975800615, + "grad_norm": 1.4317864227564314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369100 + }, + { + "epoch": 1.7901167957728976, + "grad_norm": 1.1394293508715236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369110 + }, + { + "epoch": 1.7901652939657335, + "grad_norm": 1.438798236108596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369120 + }, + { + "epoch": 1.7902137921585697, + "grad_norm": 1.4958921212837595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369130 + }, + { + "epoch": 1.7902622903514058, + "grad_norm": 1.784106729019186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369140 + }, + { + "epoch": 1.7903107885442417, + "grad_norm": 8.997254496989626e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369150 + }, + { + "epoch": 1.7903592867370781, + "grad_norm": 1.0864680710653829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369160 + }, + { + "epoch": 1.790407784929914, + "grad_norm": 1.217748035742261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369170 + }, + { + "epoch": 1.7904562831227502, + "grad_norm": 1.5418541110534534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369180 + }, + { + "epoch": 1.7905047813155863, + "grad_norm": 1.271422345183737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369190 + }, + { + "epoch": 1.7905532795084222, + "grad_norm": 1.5139383080509106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369200 + }, + { + "epoch": 1.7906017777012584, + "grad_norm": 1.5083147175687373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369210 + }, + { + "epoch": 1.7906502758940945, + "grad_norm": 1.6233604682724945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369220 + }, + { + "epoch": 1.7906987740869305, + "grad_norm": 1.4327960151661046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369230 + }, + { + "epoch": 1.7907472722797668, + "grad_norm": 8.868795475791558e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369240 + }, + { + "epoch": 1.7907957704726027, + "grad_norm": 1.0270556849434342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369250 + }, + { + "epoch": 1.7908442686654389, + "grad_norm": 1.0876338940590813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369260 + }, + { + "epoch": 1.790892766858275, + "grad_norm": 1.1837231639333368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369270 + }, + { + "epoch": 1.790941265051111, + "grad_norm": 1.2093049228667496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369280 + }, + { + "epoch": 1.790989763243947, + "grad_norm": 1.0860271792978438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369290 + }, + { + "epoch": 1.7910382614367832, + "grad_norm": 9.649905763353672e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369300 + }, + { + "epoch": 1.7910867596296192, + "grad_norm": 1.5317366930389653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369310 + }, + { + "epoch": 1.7911352578224555, + "grad_norm": 1.4826921024280182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369320 + }, + { + "epoch": 1.7911837560152915, + "grad_norm": 1.2432914253679428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369330 + }, + { + "epoch": 1.7912322542081276, + "grad_norm": 1.0736493472052189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369340 + }, + { + "epoch": 1.7912807524009637, + "grad_norm": 1.0726037835695479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369350 + }, + { + "epoch": 1.7913292505937997, + "grad_norm": 1.6176681327806364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369360 + }, + { + "epoch": 1.7913777487866358, + "grad_norm": 1.44205039021017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369370 + }, + { + "epoch": 1.791426246979472, + "grad_norm": 1.30648727392213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369380 + }, + { + "epoch": 1.7914747451723079, + "grad_norm": 1.1594840643169846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369390 + }, + { + "epoch": 1.7915232433651442, + "grad_norm": 1.0908563830014373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369400 + }, + { + "epoch": 1.7915717415579802, + "grad_norm": 1.3104370033545365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369410 + }, + { + "epoch": 1.7916202397508163, + "grad_norm": 1.0651329596100823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369420 + }, + { + "epoch": 1.7916687379436524, + "grad_norm": 1.1797955501435808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369430 + }, + { + "epoch": 1.7917172361364884, + "grad_norm": 1.139682925810348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369440 + }, + { + "epoch": 1.7917657343293245, + "grad_norm": 1.1856728043824205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369450 + }, + { + "epoch": 1.7918142325221607, + "grad_norm": 1.324874521202446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369460 + }, + { + "epoch": 1.7918627307149966, + "grad_norm": 1.559803664008541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369470 + }, + { + "epoch": 1.791911228907833, + "grad_norm": 1.8448396588155447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369480 + }, + { + "epoch": 1.7919597271006689, + "grad_norm": 1.1675425071189238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369490 + }, + { + "epoch": 1.792008225293505, + "grad_norm": 8.709663212869145e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369500 + }, + { + "epoch": 1.7920567234863412, + "grad_norm": 1.0450568410647065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369510 + }, + { + "epoch": 1.792105221679177, + "grad_norm": 1.482692546517228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369520 + }, + { + "epoch": 1.7921537198720132, + "grad_norm": 1.1126967791597053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369530 + }, + { + "epoch": 1.7922022180648494, + "grad_norm": 2.1550480511223213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369540 + }, + { + "epoch": 1.7922507162576853, + "grad_norm": 1.3087220196439375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369550 + }, + { + "epoch": 1.7922992144505216, + "grad_norm": 1.8118605282779754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369560 + }, + { + "epoch": 1.7923477126433576, + "grad_norm": 1.2016164063766155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369570 + }, + { + "epoch": 1.7923962108361937, + "grad_norm": 1.464894872071909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369580 + }, + { + "epoch": 1.7924447090290299, + "grad_norm": 1.1751472683840802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369590 + }, + { + "epoch": 1.7924932072218658, + "grad_norm": 1.057595877540507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369600 + }, + { + "epoch": 1.7925417054147021, + "grad_norm": 1.1582089953776631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369610 + }, + { + "epoch": 1.792590203607538, + "grad_norm": 1.2281489603083173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369620 + }, + { + "epoch": 1.7926387018003742, + "grad_norm": 1.1322889292841865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369630 + }, + { + "epoch": 1.7926871999932104, + "grad_norm": 1.0211066658882828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369640 + }, + { + "epoch": 1.7927356981860463, + "grad_norm": 1.2237557633909546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369650 + }, + { + "epoch": 1.7927841963788824, + "grad_norm": 1.2579365993303782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369660 + }, + { + "epoch": 1.7928326945717186, + "grad_norm": 1.2327383558385918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369670 + }, + { + "epoch": 1.7928811927645545, + "grad_norm": 1.0340726497304331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369680 + }, + { + "epoch": 1.7929296909573909, + "grad_norm": 1.5391634633488138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369690 + }, + { + "epoch": 1.7929781891502268, + "grad_norm": 1.0293224939061929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369700 + }, + { + "epoch": 1.793026687343063, + "grad_norm": 1.4499660139222215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369710 + }, + { + "epoch": 1.793075185535899, + "grad_norm": 1.0890160773158186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369720 + }, + { + "epoch": 1.793123683728735, + "grad_norm": 1.1651390963152153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369730 + }, + { + "epoch": 1.7931721819215711, + "grad_norm": 1.04958983726533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369740 + }, + { + "epoch": 1.7932206801144073, + "grad_norm": 1.2681178773732427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369750 + }, + { + "epoch": 1.7932691783072432, + "grad_norm": 1.4676174941996578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369760 + }, + { + "epoch": 1.7933176765000796, + "grad_norm": 1.3190236458626714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369770 + }, + { + "epoch": 1.7933661746929155, + "grad_norm": 1.467261334653358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369780 + }, + { + "epoch": 1.7934146728857516, + "grad_norm": 1.564107066087672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369790 + }, + { + "epoch": 1.7934631710785878, + "grad_norm": 1.4931888614455602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369800 + }, + { + "epoch": 1.7935116692714237, + "grad_norm": 9.894396413301365e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369810 + }, + { + "epoch": 1.7935601674642598, + "grad_norm": 1.0281484996710333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369820 + }, + { + "epoch": 1.793608665657096, + "grad_norm": 1.2663891268971383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369830 + }, + { + "epoch": 1.793657163849932, + "grad_norm": 1.280621919619307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369840 + }, + { + "epoch": 1.7937056620427683, + "grad_norm": 9.884891127853734e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369850 + }, + { + "epoch": 1.7937541602356042, + "grad_norm": 1.3539528609385343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369860 + }, + { + "epoch": 1.7938026584284403, + "grad_norm": 1.1220413931312123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369870 + }, + { + "epoch": 1.7938511566212765, + "grad_norm": 1.2922460435049743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369880 + }, + { + "epoch": 1.7938996548141124, + "grad_norm": 1.1091897178516774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369890 + }, + { + "epoch": 1.7939481530069485, + "grad_norm": 1.1771523311665533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369900 + }, + { + "epoch": 1.7939966511997847, + "grad_norm": 1.3869204451566475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369910 + }, + { + "epoch": 1.7940451493926206, + "grad_norm": 1.471532673491538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369920 + }, + { + "epoch": 1.794093647585457, + "grad_norm": 1.6462488261481667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369930 + }, + { + "epoch": 1.794142145778293, + "grad_norm": 1.1288571855061491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369940 + }, + { + "epoch": 1.794190643971129, + "grad_norm": 9.414314661171375e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369950 + }, + { + "epoch": 1.7942391421639652, + "grad_norm": 1.1640460151340903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369960 + }, + { + "epoch": 1.794287640356801, + "grad_norm": 2.822023681403607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369970 + }, + { + "epoch": 1.7943361385496372, + "grad_norm": 1.1012931011578075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369980 + }, + { + "epoch": 1.7943846367424734, + "grad_norm": 1.0902772906717928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 369990 + }, + { + "epoch": 1.7944331349353093, + "grad_norm": 1.2358671419576694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370000 + }, + { + "epoch": 1.7944816331281457, + "grad_norm": 1.3494182660167553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370010 + }, + { + "epoch": 1.7945301313209816, + "grad_norm": 1.351282108430496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370020 + }, + { + "epoch": 1.7945786295138177, + "grad_norm": 1.4223759947640247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370030 + }, + { + "epoch": 1.794627127706654, + "grad_norm": 9.826925051470425e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370040 + }, + { + "epoch": 1.7946756258994898, + "grad_norm": 1.0672200012606936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370050 + }, + { + "epoch": 1.794724124092326, + "grad_norm": 1.4226914757387021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370060 + }, + { + "epoch": 1.794772622285162, + "grad_norm": 1.5437995770639645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370070 + }, + { + "epoch": 1.794821120477998, + "grad_norm": 1.4986119012405652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370080 + }, + { + "epoch": 1.7948696186708344, + "grad_norm": 1.2920881253819516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370090 + }, + { + "epoch": 1.7949181168636703, + "grad_norm": 1.0387244842036125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370100 + }, + { + "epoch": 1.7949666150565065, + "grad_norm": 1.2680018812716298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370110 + }, + { + "epoch": 1.7950151132493426, + "grad_norm": 1.0792977178653018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370120 + }, + { + "epoch": 1.7950636114421785, + "grad_norm": 1.6056434404276843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370130 + }, + { + "epoch": 1.7951121096350149, + "grad_norm": 9.81752190654106e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370140 + }, + { + "epoch": 1.7951606078278508, + "grad_norm": 9.81631043117659e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370150 + }, + { + "epoch": 1.795209106020687, + "grad_norm": 1.386092307598119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370160 + }, + { + "epoch": 1.795257604213523, + "grad_norm": 1.0475369016660352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370170 + }, + { + "epoch": 1.795306102406359, + "grad_norm": 1.185908260481483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370180 + }, + { + "epoch": 1.7953546005991952, + "grad_norm": 1.9266495954184393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370190 + }, + { + "epoch": 1.7954030987920313, + "grad_norm": 1.4655445745859197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370200 + }, + { + "epoch": 1.7954515969848672, + "grad_norm": 9.956944602151907e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370210 + }, + { + "epoch": 1.7955000951777036, + "grad_norm": 1.314709585642504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370220 + }, + { + "epoch": 1.7955485933705395, + "grad_norm": 1.1496054774795539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370230 + }, + { + "epoch": 1.7955970915633757, + "grad_norm": 1.1263675325778877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370240 + }, + { + "epoch": 1.7956455897562118, + "grad_norm": 1.810546201852503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370250 + }, + { + "epoch": 1.7956940879490477, + "grad_norm": 9.70541869094177e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370260 + }, + { + "epoch": 1.7957425861418839, + "grad_norm": 9.965592795424527e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370270 + }, + { + "epoch": 1.79579108433472, + "grad_norm": 1.1288936896391988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370280 + }, + { + "epoch": 1.795839582527556, + "grad_norm": 1.1390354437423866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370290 + }, + { + "epoch": 1.7958880807203923, + "grad_norm": 1.330975951674418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370300 + }, + { + "epoch": 1.7959365789132282, + "grad_norm": 1.7271526431272832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370310 + }, + { + "epoch": 1.7959850771060644, + "grad_norm": 1.7314413014446473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370320 + }, + { + "epoch": 1.7960335752989005, + "grad_norm": 1.5959219723526985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370330 + }, + { + "epoch": 1.7960820734917364, + "grad_norm": 1.1099183794271994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370340 + }, + { + "epoch": 1.7961305716845726, + "grad_norm": 1.1615759909489043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370350 + }, + { + "epoch": 1.7961790698774087, + "grad_norm": 2.0776138143219214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370360 + }, + { + "epoch": 1.7962275680702446, + "grad_norm": 1.2832773066406844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370370 + }, + { + "epoch": 1.796276066263081, + "grad_norm": 1.5929092711530757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370380 + }, + { + "epoch": 1.796324564455917, + "grad_norm": 1.3869438930669276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370390 + }, + { + "epoch": 1.796373062648753, + "grad_norm": 1.3112875230092413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370400 + }, + { + "epoch": 1.7964215608415892, + "grad_norm": 1.1596493543208908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370410 + }, + { + "epoch": 1.7964700590344251, + "grad_norm": 1.3693412626025747e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370420 + }, + { + "epoch": 1.7965185572272613, + "grad_norm": 8.714918564578511e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370430 + }, + { + "epoch": 1.7965670554200974, + "grad_norm": 9.587582283643314e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370440 + }, + { + "epoch": 1.7966155536129333, + "grad_norm": 1.3885255611967295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370450 + }, + { + "epoch": 1.7966640518057697, + "grad_norm": 1.4060144160055188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370460 + }, + { + "epoch": 1.7967125499986056, + "grad_norm": 1.010538319690113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370470 + }, + { + "epoch": 1.7967610481914418, + "grad_norm": 7.98411736724347e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370480 + }, + { + "epoch": 1.796809546384278, + "grad_norm": 8.891306357838857e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370490 + }, + { + "epoch": 1.7968580445771138, + "grad_norm": 1.9609414536603254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370500 + }, + { + "epoch": 1.79690654276995, + "grad_norm": 1.4971956119325114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370510 + }, + { + "epoch": 1.7969550409627861, + "grad_norm": 1.4302469431015652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370520 + }, + { + "epoch": 1.797003539155622, + "grad_norm": 1.1930042731478352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370530 + }, + { + "epoch": 1.7970520373484584, + "grad_norm": 1.8779330090978874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370540 + }, + { + "epoch": 1.7971005355412943, + "grad_norm": 1.2098424484463521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370550 + }, + { + "epoch": 1.7971490337341305, + "grad_norm": 9.23501897176493e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370560 + }, + { + "epoch": 1.7971975319269666, + "grad_norm": 1.1415676404169517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370570 + }, + { + "epoch": 1.7972460301198026, + "grad_norm": 1.418572015410291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370580 + }, + { + "epoch": 1.7972945283126387, + "grad_norm": 1.293402185353898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370590 + }, + { + "epoch": 1.7973430265054748, + "grad_norm": 1.3338346427360648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370600 + }, + { + "epoch": 1.797391524698311, + "grad_norm": 1.4294998074149134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370610 + }, + { + "epoch": 1.7974400228911471, + "grad_norm": 8.885868929553453e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370620 + }, + { + "epoch": 1.797488521083983, + "grad_norm": 1.1350465456416714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370630 + }, + { + "epoch": 1.7975370192768192, + "grad_norm": 1.2534176363487859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370640 + }, + { + "epoch": 1.7975855174696553, + "grad_norm": 1.1777732566997656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370650 + }, + { + "epoch": 1.7976340156624913, + "grad_norm": 1.3787132324694085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370660 + }, + { + "epoch": 1.7976825138553276, + "grad_norm": 1.0022374041795956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370670 + }, + { + "epoch": 1.7977310120481635, + "grad_norm": 1.3449157343359275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370680 + }, + { + "epoch": 1.7977795102409997, + "grad_norm": 1.3683790101026716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370690 + }, + { + "epoch": 1.7978280084338358, + "grad_norm": 1.0470712297205864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370700 + }, + { + "epoch": 1.7978765066266718, + "grad_norm": 1.2773965885060079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370710 + }, + { + "epoch": 1.797925004819508, + "grad_norm": 1.2303015495263026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370720 + }, + { + "epoch": 1.797973503012344, + "grad_norm": 1.7303452892747373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370730 + }, + { + "epoch": 1.79802200120518, + "grad_norm": 1.3106433272014328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370740 + }, + { + "epoch": 1.7980704993980163, + "grad_norm": 2.6524872964728274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370750 + }, + { + "epoch": 1.7981189975908523, + "grad_norm": 1.0523567794962219e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370760 + }, + { + "epoch": 1.7981674957836884, + "grad_norm": 1.3520744523987105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370770 + }, + { + "epoch": 1.7982159939765245, + "grad_norm": 1.1618881856634289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370780 + }, + { + "epoch": 1.7982644921693605, + "grad_norm": 1.64627422805097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370790 + }, + { + "epoch": 1.7983129903621966, + "grad_norm": 1.0651945103745675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370800 + }, + { + "epoch": 1.7983614885550328, + "grad_norm": 1.2171462060450722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370810 + }, + { + "epoch": 1.7984099867478687, + "grad_norm": 1.0624253476976264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370820 + }, + { + "epoch": 1.798458484940705, + "grad_norm": 1.1986716508260997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370830 + }, + { + "epoch": 1.798506983133541, + "grad_norm": 1.2777880087355697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370840 + }, + { + "epoch": 1.798555481326377, + "grad_norm": 1.3151794320265253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370850 + }, + { + "epoch": 1.7986039795192132, + "grad_norm": 8.234993131850388e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370860 + }, + { + "epoch": 1.7986524777120492, + "grad_norm": 9.353971819336948e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370870 + }, + { + "epoch": 1.7987009759048853, + "grad_norm": 1.4129071246316016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370880 + }, + { + "epoch": 1.7987494740977215, + "grad_norm": 1.8501124188219364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370890 + }, + { + "epoch": 1.7987979722905574, + "grad_norm": 1.666429838564909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370900 + }, + { + "epoch": 1.7988464704833937, + "grad_norm": 1.9515525195856753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370910 + }, + { + "epoch": 1.7988949686762297, + "grad_norm": 1.1054107851293793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370920 + }, + { + "epoch": 1.7989434668690658, + "grad_norm": 1.4131745551537733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370930 + }, + { + "epoch": 1.798991965061902, + "grad_norm": 8.670945739197577e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370940 + }, + { + "epoch": 1.7990404632547379, + "grad_norm": 1.1904509378268813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370950 + }, + { + "epoch": 1.799088961447574, + "grad_norm": 1.1728796600607438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370960 + }, + { + "epoch": 1.7991374596404102, + "grad_norm": 1.181418074480689e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370970 + }, + { + "epoch": 1.799185957833246, + "grad_norm": 8.432131437530188e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370980 + }, + { + "epoch": 1.7992344560260825, + "grad_norm": 1.3982345947738395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 370990 + }, + { + "epoch": 1.7992829542189184, + "grad_norm": 1.079211298105065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371000 + }, + { + "epoch": 1.7993314524117545, + "grad_norm": 1.0912061476631152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371010 + }, + { + "epoch": 1.7993799506045907, + "grad_norm": 1.1905472163675768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371020 + }, + { + "epoch": 1.7994284487974266, + "grad_norm": 1.4860717101328191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371030 + }, + { + "epoch": 1.7994769469902627, + "grad_norm": 1.687367401359552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371040 + }, + { + "epoch": 1.7995254451830989, + "grad_norm": 1.9730286737740244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371050 + }, + { + "epoch": 1.7995739433759348, + "grad_norm": 1.5196013336549186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371060 + }, + { + "epoch": 1.7996224415687712, + "grad_norm": 1.3924434938417107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371070 + }, + { + "epoch": 1.799670939761607, + "grad_norm": 9.49524370241761e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371080 + }, + { + "epoch": 1.7997194379544432, + "grad_norm": 1.4159160066640197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371090 + }, + { + "epoch": 1.7997679361472794, + "grad_norm": 2.1346613365835765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371100 + }, + { + "epoch": 1.7998164343401153, + "grad_norm": 1.2704649776651422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371110 + }, + { + "epoch": 1.7998649325329517, + "grad_norm": 1.0022788821117956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371120 + }, + { + "epoch": 1.7999134307257876, + "grad_norm": 1.0576150621943725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371130 + }, + { + "epoch": 1.7999619289186237, + "grad_norm": 1.2411113026189469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371140 + }, + { + "epoch": 1.8000104271114599, + "grad_norm": 1.759769219233931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371150 + }, + { + "epoch": 1.8000589253042958, + "grad_norm": 1.3570345736013678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371160 + }, + { + "epoch": 1.800107423497132, + "grad_norm": 1.09490603250606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371170 + }, + { + "epoch": 1.800155921689968, + "grad_norm": 1.050303577443401e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371180 + }, + { + "epoch": 1.800204419882804, + "grad_norm": 1.3691660250003679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371190 + }, + { + "epoch": 1.8002529180756404, + "grad_norm": 1.2673398330775854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371200 + }, + { + "epoch": 1.8003014162684763, + "grad_norm": 1.553400252873871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371210 + }, + { + "epoch": 1.8003499144613124, + "grad_norm": 1.1459874826869054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371220 + }, + { + "epoch": 1.8003984126541486, + "grad_norm": 1.4566176709251977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371230 + }, + { + "epoch": 1.8004469108469845, + "grad_norm": 1.2732095377998576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371240 + }, + { + "epoch": 1.8004954090398206, + "grad_norm": 1.4282657723185821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371250 + }, + { + "epoch": 1.8005439072326568, + "grad_norm": 1.460526100061088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371260 + }, + { + "epoch": 1.8005924054254927, + "grad_norm": 1.1790519671706079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371270 + }, + { + "epoch": 1.800640903618329, + "grad_norm": 1.135871219304363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371280 + }, + { + "epoch": 1.800689401811165, + "grad_norm": 1.4158994865454133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371290 + }, + { + "epoch": 1.8007379000040011, + "grad_norm": 1.4419016203248702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371300 + }, + { + "epoch": 1.8007863981968373, + "grad_norm": 8.857203859236051e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371310 + }, + { + "epoch": 1.8008348963896732, + "grad_norm": 1.4115569157979735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371320 + }, + { + "epoch": 1.8008833945825093, + "grad_norm": 2.1469904964988018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371330 + }, + { + "epoch": 1.8009318927753455, + "grad_norm": 1.2120653813951776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371340 + }, + { + "epoch": 1.8009803909681814, + "grad_norm": 1.3742849525044676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371350 + }, + { + "epoch": 1.8010288891610178, + "grad_norm": 1.87299722398393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371360 + }, + { + "epoch": 1.8010773873538537, + "grad_norm": 1.1482803152773613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371370 + }, + { + "epoch": 1.8011258855466898, + "grad_norm": 1.2618440514700069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371380 + }, + { + "epoch": 1.801174383739526, + "grad_norm": 1.1255666620968441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371390 + }, + { + "epoch": 1.801222881932362, + "grad_norm": 1.7165863397394787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371400 + }, + { + "epoch": 1.801271380125198, + "grad_norm": 1.2420427353276864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371410 + }, + { + "epoch": 1.8013198783180342, + "grad_norm": 1.1314363668191163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371420 + }, + { + "epoch": 1.8013683765108701, + "grad_norm": 1.5318377677431272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371430 + }, + { + "epoch": 1.8014168747037065, + "grad_norm": 1.530482229838981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371440 + }, + { + "epoch": 1.8014653728965424, + "grad_norm": 1.1894965901149135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371450 + }, + { + "epoch": 1.8015138710893785, + "grad_norm": 1.0647733361679457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371460 + }, + { + "epoch": 1.8015623692822147, + "grad_norm": 2.2363373375355877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371470 + }, + { + "epoch": 1.8016108674750506, + "grad_norm": 1.3178525826162968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371480 + }, + { + "epoch": 1.8016593656678868, + "grad_norm": 1.1285697709695341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371490 + }, + { + "epoch": 1.801707863860723, + "grad_norm": 9.40876176969141e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371500 + }, + { + "epoch": 1.8017563620535588, + "grad_norm": 1.161283513795297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371510 + }, + { + "epoch": 1.8018048602463952, + "grad_norm": 1.6229108723564423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371520 + }, + { + "epoch": 1.8018533584392311, + "grad_norm": 1.1701609459180418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371530 + }, + { + "epoch": 1.8019018566320673, + "grad_norm": 1.3753717276188127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371540 + }, + { + "epoch": 1.8019503548249034, + "grad_norm": 1.3504186213708635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371550 + }, + { + "epoch": 1.8019988530177393, + "grad_norm": 1.126052850963788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371560 + }, + { + "epoch": 1.8020473512105755, + "grad_norm": 1.381620240437087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371570 + }, + { + "epoch": 1.8020958494034116, + "grad_norm": 1.0559422669587093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371580 + }, + { + "epoch": 1.8021443475962475, + "grad_norm": 1.9495784542300498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371590 + }, + { + "epoch": 1.802192845789084, + "grad_norm": 1.0333405242590743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371600 + }, + { + "epoch": 1.8022413439819198, + "grad_norm": 1.4146496418732113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371610 + }, + { + "epoch": 1.802289842174756, + "grad_norm": 1.3142107846420004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371620 + }, + { + "epoch": 1.802338340367592, + "grad_norm": 1.3550567778963796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371630 + }, + { + "epoch": 1.802386838560428, + "grad_norm": 9.087360197668204e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371640 + }, + { + "epoch": 1.8024353367532644, + "grad_norm": 1.1681879463765199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371650 + }, + { + "epoch": 1.8024838349461003, + "grad_norm": 2.0456710103644582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371660 + }, + { + "epoch": 1.8025323331389365, + "grad_norm": 1.4062126574287959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371670 + }, + { + "epoch": 1.8025808313317726, + "grad_norm": 1.2415099170937083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371680 + }, + { + "epoch": 1.8026293295246085, + "grad_norm": 9.097257169798922e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371690 + }, + { + "epoch": 1.8026778277174447, + "grad_norm": 1.6757574883285997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371700 + }, + { + "epoch": 1.8027263259102808, + "grad_norm": 1.4094521105789681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371710 + }, + { + "epoch": 1.8027748241031167, + "grad_norm": 1.1321021453625235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371720 + }, + { + "epoch": 1.802823322295953, + "grad_norm": 9.724218763551562e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371730 + }, + { + "epoch": 1.802871820488789, + "grad_norm": 1.5916979734242886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371740 + }, + { + "epoch": 1.8029203186816252, + "grad_norm": 8.50015879905186e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371750 + }, + { + "epoch": 1.8029688168744613, + "grad_norm": 9.475384032953116e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371760 + }, + { + "epoch": 1.8030173150672972, + "grad_norm": 1.3468262061167025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371770 + }, + { + "epoch": 1.8030658132601334, + "grad_norm": 1.0533063310447233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371780 + }, + { + "epoch": 1.8031143114529695, + "grad_norm": 9.059196948157933e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371790 + }, + { + "epoch": 1.8031628096458054, + "grad_norm": 1.2836411933392355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371800 + }, + { + "epoch": 1.8032113078386418, + "grad_norm": 1.3028876644227694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371810 + }, + { + "epoch": 1.8032598060314777, + "grad_norm": 7.4454375997845545e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371820 + }, + { + "epoch": 1.8033083042243139, + "grad_norm": 1.590453102551237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371830 + }, + { + "epoch": 1.80335680241715, + "grad_norm": 1.6026005411617916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371840 + }, + { + "epoch": 1.803405300609986, + "grad_norm": 1.2582632713531439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371850 + }, + { + "epoch": 1.803453798802822, + "grad_norm": 1.706894536823711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371860 + }, + { + "epoch": 1.8035022969956582, + "grad_norm": 9.134475398298036e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371870 + }, + { + "epoch": 1.8035507951884941, + "grad_norm": 1.134085358955872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371880 + }, + { + "epoch": 1.8035992933813305, + "grad_norm": 1.556939466240692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371890 + }, + { + "epoch": 1.8036477915741664, + "grad_norm": 1.2701092622080523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371900 + }, + { + "epoch": 1.8036962897670026, + "grad_norm": 1.5566401501132532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371910 + }, + { + "epoch": 1.8037447879598387, + "grad_norm": 1.161002138871936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371920 + }, + { + "epoch": 1.8037932861526746, + "grad_norm": 1.81448243097293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371930 + }, + { + "epoch": 1.8038417843455108, + "grad_norm": 1.0673484318601822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371940 + }, + { + "epoch": 1.803890282538347, + "grad_norm": 1.376762970295431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371950 + }, + { + "epoch": 1.8039387807311829, + "grad_norm": 1.230723789547028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371960 + }, + { + "epoch": 1.8039872789240192, + "grad_norm": 1.4016666938232447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371970 + }, + { + "epoch": 1.8040357771168551, + "grad_norm": 9.634354647403143e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371980 + }, + { + "epoch": 1.8040842753096913, + "grad_norm": 9.648990939581381e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 371990 + }, + { + "epoch": 1.8041327735025274, + "grad_norm": 1.548187178457283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372000 + }, + { + "epoch": 1.8041812716953634, + "grad_norm": 1.6996620999520928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372010 + }, + { + "epoch": 1.8042297698881995, + "grad_norm": 1.4495149081028558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372020 + }, + { + "epoch": 1.8042782680810356, + "grad_norm": 9.337916218044029e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372030 + }, + { + "epoch": 1.8043267662738716, + "grad_norm": 1.431861917922106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372040 + }, + { + "epoch": 1.804375264466708, + "grad_norm": 1.2072414179442603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372050 + }, + { + "epoch": 1.8044237626595439, + "grad_norm": 1.1892863582829705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372060 + }, + { + "epoch": 1.80447226085238, + "grad_norm": 1.579763342363094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372070 + }, + { + "epoch": 1.8045207590452161, + "grad_norm": 9.428952729706452e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372080 + }, + { + "epoch": 1.804569257238052, + "grad_norm": 1.0036645292643698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372090 + }, + { + "epoch": 1.8046177554308882, + "grad_norm": 1.51694319328044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372100 + }, + { + "epoch": 1.8046662536237243, + "grad_norm": 9.95326132624541e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372110 + }, + { + "epoch": 1.8047147518165603, + "grad_norm": 1.2188274389757225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372120 + }, + { + "epoch": 1.8047632500093966, + "grad_norm": 9.767915365443969e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372130 + }, + { + "epoch": 1.8048117482022326, + "grad_norm": 1.0074143297345017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372140 + }, + { + "epoch": 1.8048602463950687, + "grad_norm": 1.2323342346576283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372150 + }, + { + "epoch": 1.8049087445879048, + "grad_norm": 1.4780765056343625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372160 + }, + { + "epoch": 1.8049572427807408, + "grad_norm": 1.1788744203045098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372170 + }, + { + "epoch": 1.8050057409735771, + "grad_norm": 1.9837539610989552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372180 + }, + { + "epoch": 1.805054239166413, + "grad_norm": 1.6867728547254046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372190 + }, + { + "epoch": 1.8051027373592492, + "grad_norm": 9.804362655074783e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372200 + }, + { + "epoch": 1.8051512355520853, + "grad_norm": 1.9686240193550475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372210 + }, + { + "epoch": 1.8051997337449213, + "grad_norm": 1.755880418841116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372220 + }, + { + "epoch": 1.8052482319377574, + "grad_norm": 1.196701315819837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372230 + }, + { + "epoch": 1.8052967301305936, + "grad_norm": 9.580317872348587e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372240 + }, + { + "epoch": 1.8053452283234295, + "grad_norm": 1.373327140896663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372250 + }, + { + "epoch": 1.8053937265162658, + "grad_norm": 1.524851356293766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372260 + }, + { + "epoch": 1.8054422247091018, + "grad_norm": 1.5749929360708848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372270 + }, + { + "epoch": 1.805490722901938, + "grad_norm": 1.5305049672065252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372280 + }, + { + "epoch": 1.805539221094774, + "grad_norm": 9.728959859955921e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372290 + }, + { + "epoch": 1.80558771928761, + "grad_norm": 9.747517459857136e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372300 + }, + { + "epoch": 1.8056362174804461, + "grad_norm": 1.0892649449090186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372310 + }, + { + "epoch": 1.8056847156732823, + "grad_norm": 1.0606341582786172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372320 + }, + { + "epoch": 1.8057332138661182, + "grad_norm": 1.2933202953036016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372330 + }, + { + "epoch": 1.8057817120589545, + "grad_norm": 1.1187917259292135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372340 + }, + { + "epoch": 1.8058302102517905, + "grad_norm": 8.667986328703137e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372350 + }, + { + "epoch": 1.8058787084446266, + "grad_norm": 1.2301742735587595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372360 + }, + { + "epoch": 1.8059272066374628, + "grad_norm": 1.0099176606104265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372370 + }, + { + "epoch": 1.8059757048302987, + "grad_norm": 1.1016001444374979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372380 + }, + { + "epoch": 1.8060242030231348, + "grad_norm": 1.4967501016371898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372390 + }, + { + "epoch": 1.806072701215971, + "grad_norm": 1.0274366246676436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372400 + }, + { + "epoch": 1.8061211994088069, + "grad_norm": 1.3218008909632317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372410 + }, + { + "epoch": 1.8061696976016433, + "grad_norm": 1.4586079899459037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372420 + }, + { + "epoch": 1.8062181957944792, + "grad_norm": 8.309502419479031e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372430 + }, + { + "epoch": 1.8062666939873153, + "grad_norm": 1.1714136327611868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372440 + }, + { + "epoch": 1.8063151921801515, + "grad_norm": 1.865432608383344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372450 + }, + { + "epoch": 1.8063636903729874, + "grad_norm": 1.8133054169311436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372460 + }, + { + "epoch": 1.8064121885658235, + "grad_norm": 1.1702772084731805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372470 + }, + { + "epoch": 1.8064606867586597, + "grad_norm": 9.945091861141009e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372480 + }, + { + "epoch": 1.8065091849514956, + "grad_norm": 1.2709903352003948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372490 + }, + { + "epoch": 1.806557683144332, + "grad_norm": 1.3126356002146622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372500 + }, + { + "epoch": 1.8066061813371679, + "grad_norm": 1.2544814076420607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372510 + }, + { + "epoch": 1.806654679530004, + "grad_norm": 1.2057690845779234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372520 + }, + { + "epoch": 1.8067031777228402, + "grad_norm": 9.993065042124272e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372530 + }, + { + "epoch": 1.806751675915676, + "grad_norm": 1.4737023157351814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372540 + }, + { + "epoch": 1.8068001741085122, + "grad_norm": 1.6882536257867287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372550 + }, + { + "epoch": 1.8068486723013484, + "grad_norm": 1.2298940532673441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372560 + }, + { + "epoch": 1.8068971704941843, + "grad_norm": 1.1700216795418328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372570 + }, + { + "epoch": 1.8069456686870207, + "grad_norm": 1.1154222434583971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372580 + }, + { + "epoch": 1.8069941668798566, + "grad_norm": 1.3785573571567511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372590 + }, + { + "epoch": 1.8070426650726927, + "grad_norm": 1.4595031849751194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372600 + }, + { + "epoch": 1.8070911632655289, + "grad_norm": 2.0080344498296654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372610 + }, + { + "epoch": 1.8071396614583648, + "grad_norm": 9.656497823584687e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372620 + }, + { + "epoch": 1.807188159651201, + "grad_norm": 1.1486030793150803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372630 + }, + { + "epoch": 1.807236657844037, + "grad_norm": 1.222646961451801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372640 + }, + { + "epoch": 1.8072851560368732, + "grad_norm": 1.36361526514861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372650 + }, + { + "epoch": 1.8073336542297094, + "grad_norm": 1.7025653775704086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372660 + }, + { + "epoch": 1.8073821524225453, + "grad_norm": 9.327648875512295e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372670 + }, + { + "epoch": 1.8074306506153814, + "grad_norm": 8.77951844557856e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372680 + }, + { + "epoch": 1.8074791488082176, + "grad_norm": 1.378297120879779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372690 + }, + { + "epoch": 1.8075276470010535, + "grad_norm": 1.1967045132621479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372700 + }, + { + "epoch": 1.8075761451938899, + "grad_norm": 1.555687312304599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372710 + }, + { + "epoch": 1.8076246433867258, + "grad_norm": 1.247621117528297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372720 + }, + { + "epoch": 1.807673141579562, + "grad_norm": 1.2659052472940857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372730 + }, + { + "epoch": 1.807721639772398, + "grad_norm": 1.6582003325993355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372740 + }, + { + "epoch": 1.807770137965234, + "grad_norm": 1.2855053022065022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372750 + }, + { + "epoch": 1.8078186361580701, + "grad_norm": 1.5240775752545233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372760 + }, + { + "epoch": 1.8078671343509063, + "grad_norm": 1.4872224340933826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372770 + }, + { + "epoch": 1.8079156325437422, + "grad_norm": 1.7439017341303042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372780 + }, + { + "epoch": 1.8079641307365786, + "grad_norm": 1.6940825631195366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372790 + }, + { + "epoch": 1.8080126289294145, + "grad_norm": 1.2786863123892545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372800 + }, + { + "epoch": 1.8080611271222506, + "grad_norm": 1.76924963568581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372810 + }, + { + "epoch": 1.8081096253150868, + "grad_norm": 1.0380071024940207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372820 + }, + { + "epoch": 1.8081581235079227, + "grad_norm": 1.2104393931622326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372830 + }, + { + "epoch": 1.8082066217007589, + "grad_norm": 9.899975061955502e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372840 + }, + { + "epoch": 1.808255119893595, + "grad_norm": 1.449363296046613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372850 + }, + { + "epoch": 1.808303618086431, + "grad_norm": 1.8004334023657975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372860 + }, + { + "epoch": 1.8083521162792673, + "grad_norm": 1.7132016694176855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372870 + }, + { + "epoch": 1.8084006144721032, + "grad_norm": 1.7822502584863287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372880 + }, + { + "epoch": 1.8084491126649394, + "grad_norm": 1.7560447318487604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372890 + }, + { + "epoch": 1.8084976108577755, + "grad_norm": 8.026778353098507e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372900 + }, + { + "epoch": 1.8085461090506114, + "grad_norm": 8.844422971776567e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372910 + }, + { + "epoch": 1.8085946072434476, + "grad_norm": 1.5010165554940613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372920 + }, + { + "epoch": 1.8086431054362837, + "grad_norm": 1.3180613045449263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372930 + }, + { + "epoch": 1.8086916036291196, + "grad_norm": 1.5396386388033534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372940 + }, + { + "epoch": 1.808740101821956, + "grad_norm": 1.070550847970253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372950 + }, + { + "epoch": 1.808788600014792, + "grad_norm": 1.048189179897463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372960 + }, + { + "epoch": 1.808837098207628, + "grad_norm": 9.567034275903552e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372970 + }, + { + "epoch": 1.8088855964004642, + "grad_norm": 1.3265727183409126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372980 + }, + { + "epoch": 1.8089340945933001, + "grad_norm": 1.1505454367011225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 372990 + }, + { + "epoch": 1.8089825927861363, + "grad_norm": 1.6643010525285717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373000 + }, + { + "epoch": 1.8090310909789724, + "grad_norm": 9.501367692621443e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373010 + }, + { + "epoch": 1.8090795891718083, + "grad_norm": 1.1749342831990361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373020 + }, + { + "epoch": 1.8091280873646447, + "grad_norm": 9.798220013124137e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373030 + }, + { + "epoch": 1.8091765855574806, + "grad_norm": 1.612871436407204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373040 + }, + { + "epoch": 1.8092250837503168, + "grad_norm": 1.941933014393271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373050 + }, + { + "epoch": 1.809273581943153, + "grad_norm": 1.2469717702856542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373060 + }, + { + "epoch": 1.8093220801359888, + "grad_norm": 1.2174491637040319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373070 + }, + { + "epoch": 1.809370578328825, + "grad_norm": 1.0997876387364158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373080 + }, + { + "epoch": 1.8094190765216611, + "grad_norm": 9.100685538498965e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373090 + }, + { + "epoch": 1.809467574714497, + "grad_norm": 1.3126090436799132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373100 + }, + { + "epoch": 1.8095160729073334, + "grad_norm": 1.4416945859352381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373110 + }, + { + "epoch": 1.8095645711001693, + "grad_norm": 1.309718822284367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373120 + }, + { + "epoch": 1.8096130692930055, + "grad_norm": 1.588788123285667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373130 + }, + { + "epoch": 1.8096615674858416, + "grad_norm": 1.0449613618845888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373140 + }, + { + "epoch": 1.8097100656786775, + "grad_norm": 1.5987888346558066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373150 + }, + { + "epoch": 1.809758563871514, + "grad_norm": 9.534621980833435e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373160 + }, + { + "epoch": 1.8098070620643498, + "grad_norm": 1.0314236575936775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373170 + }, + { + "epoch": 1.809855560257186, + "grad_norm": 1.3514491747912416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373180 + }, + { + "epoch": 1.8099040584500221, + "grad_norm": 1.9213540980445032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373190 + }, + { + "epoch": 1.809952556642858, + "grad_norm": 1.1531437138501133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373200 + }, + { + "epoch": 1.8100010548356942, + "grad_norm": 1.1130425470184946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373210 + }, + { + "epoch": 1.8100495530285303, + "grad_norm": 8.288343344986515e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373220 + }, + { + "epoch": 1.8100980512213662, + "grad_norm": 1.8678532498483946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373230 + }, + { + "epoch": 1.8101465494142026, + "grad_norm": 1.2183141606669778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373240 + }, + { + "epoch": 1.8101950476070385, + "grad_norm": 1.6525472545936282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373250 + }, + { + "epoch": 1.8102435457998747, + "grad_norm": 9.349478524711685e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373260 + }, + { + "epoch": 1.8102920439927108, + "grad_norm": 1.3278444122022393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373270 + }, + { + "epoch": 1.8103405421855467, + "grad_norm": 1.0737923439307906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373280 + }, + { + "epoch": 1.8103890403783829, + "grad_norm": 1.3181012725738128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373290 + }, + { + "epoch": 1.810437538571219, + "grad_norm": 1.3991614977726385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373300 + }, + { + "epoch": 1.810486036764055, + "grad_norm": 1.4398701786433321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373310 + }, + { + "epoch": 1.8105345349568913, + "grad_norm": 1.1387187193179216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373320 + }, + { + "epoch": 1.8105830331497272, + "grad_norm": 1.2235242152769388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373330 + }, + { + "epoch": 1.8106315313425634, + "grad_norm": 9.491153640794892e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373340 + }, + { + "epoch": 1.8106800295353995, + "grad_norm": 1.1168005187300878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373350 + }, + { + "epoch": 1.8107285277282354, + "grad_norm": 1.000580773791171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373360 + }, + { + "epoch": 1.8107770259210716, + "grad_norm": 1.2270013449722228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373370 + }, + { + "epoch": 1.8108255241139077, + "grad_norm": 1.0318952803345383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373380 + }, + { + "epoch": 1.8108740223067437, + "grad_norm": 1.6294176674591654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373390 + }, + { + "epoch": 1.81092252049958, + "grad_norm": 1.5917098750151126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373400 + }, + { + "epoch": 1.810971018692416, + "grad_norm": 1.0842811093425553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373410 + }, + { + "epoch": 1.811019516885252, + "grad_norm": 8.393102213233306e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373420 + }, + { + "epoch": 1.8110680150780882, + "grad_norm": 1.0713828046959861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373430 + }, + { + "epoch": 1.8111165132709242, + "grad_norm": 1.2168047902605394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373440 + }, + { + "epoch": 1.8111650114637603, + "grad_norm": 1.7258091844496448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373450 + }, + { + "epoch": 1.8112135096565964, + "grad_norm": 2.0204884094709996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373460 + }, + { + "epoch": 1.8112620078494324, + "grad_norm": 1.405978444779521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373470 + }, + { + "epoch": 1.8113105060422687, + "grad_norm": 1.3032171786164781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373480 + }, + { + "epoch": 1.8113590042351047, + "grad_norm": 1.2952603434257526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373490 + }, + { + "epoch": 1.8114075024279408, + "grad_norm": 1.2429890006160349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373500 + }, + { + "epoch": 1.811456000620777, + "grad_norm": 1.2559808304501985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373510 + }, + { + "epoch": 1.8115044988136129, + "grad_norm": 9.084303975726016e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373520 + }, + { + "epoch": 1.811552997006449, + "grad_norm": 1.120508219543126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373530 + }, + { + "epoch": 1.8116014951992852, + "grad_norm": 1.1245205655541213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373540 + }, + { + "epoch": 1.811649993392121, + "grad_norm": 1.2417753048055147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373550 + }, + { + "epoch": 1.8116984915849574, + "grad_norm": 1.0325025279200872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373560 + }, + { + "epoch": 1.8117469897777934, + "grad_norm": 1.215926292985614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373570 + }, + { + "epoch": 1.8117954879706295, + "grad_norm": 1.4883421606270986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373580 + }, + { + "epoch": 1.8118439861634656, + "grad_norm": 1.1183833414918354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373590 + }, + { + "epoch": 1.8118924843563016, + "grad_norm": 1.4710049178745521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373600 + }, + { + "epoch": 1.8119409825491377, + "grad_norm": 2.342855331960436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373610 + }, + { + "epoch": 1.8119894807419739, + "grad_norm": 1.045016784217978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373620 + }, + { + "epoch": 1.8120379789348098, + "grad_norm": 8.686882324582257e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373630 + }, + { + "epoch": 1.8120864771276461, + "grad_norm": 1.444624420088303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373640 + }, + { + "epoch": 1.812134975320482, + "grad_norm": 8.031737941394113e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373650 + }, + { + "epoch": 1.8121834735133182, + "grad_norm": 1.2392594506138721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373660 + }, + { + "epoch": 1.8122319717061544, + "grad_norm": 8.634072123925307e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373670 + }, + { + "epoch": 1.8122804698989903, + "grad_norm": 1.2339333110844564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373680 + }, + { + "epoch": 1.8123289680918266, + "grad_norm": 1.360194268329451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373690 + }, + { + "epoch": 1.8123774662846626, + "grad_norm": 1.0346588474874352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373700 + }, + { + "epoch": 1.8124259644774987, + "grad_norm": 9.453151150751182e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373710 + }, + { + "epoch": 1.8124744626703349, + "grad_norm": 9.901289566016658e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373720 + }, + { + "epoch": 1.8125229608631708, + "grad_norm": 1.5220917859437577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373730 + }, + { + "epoch": 1.812571459056007, + "grad_norm": 1.0884894763307784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373740 + }, + { + "epoch": 1.812619957248843, + "grad_norm": 1.0802724048630807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373750 + }, + { + "epoch": 1.812668455441679, + "grad_norm": 1.0739110933855045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373760 + }, + { + "epoch": 1.8127169536345153, + "grad_norm": 1.2815062788718024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373770 + }, + { + "epoch": 1.8127654518273513, + "grad_norm": 8.916585692020362e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373780 + }, + { + "epoch": 1.8128139500201874, + "grad_norm": 1.032695529090688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373790 + }, + { + "epoch": 1.8128624482130236, + "grad_norm": 1.1291491297527045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373800 + }, + { + "epoch": 1.8129109464058595, + "grad_norm": 1.2775657864949608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373810 + }, + { + "epoch": 1.8129594445986956, + "grad_norm": 1.6151060933111694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373820 + }, + { + "epoch": 1.8130079427915318, + "grad_norm": 1.586993114699453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373830 + }, + { + "epoch": 1.8130564409843677, + "grad_norm": 1.73416410120808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373840 + }, + { + "epoch": 1.813104939177204, + "grad_norm": 1.2220442435761925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373850 + }, + { + "epoch": 1.81315343737004, + "grad_norm": 1.4343665810656603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373860 + }, + { + "epoch": 1.8132019355628761, + "grad_norm": 1.6173713035527726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373870 + }, + { + "epoch": 1.8132504337557123, + "grad_norm": 1.1325961501995607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373880 + }, + { + "epoch": 1.8132989319485482, + "grad_norm": 9.711123460931503e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373890 + }, + { + "epoch": 1.8133474301413843, + "grad_norm": 1.3950147703667426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373900 + }, + { + "epoch": 1.8133959283342205, + "grad_norm": 1.739080346396804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373910 + }, + { + "epoch": 1.8134444265270564, + "grad_norm": 1.10667635055961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373920 + }, + { + "epoch": 1.8134929247198928, + "grad_norm": 8.469951850997859e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373930 + }, + { + "epoch": 1.8135414229127287, + "grad_norm": 1.557691753362178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373940 + }, + { + "epoch": 1.8135899211055648, + "grad_norm": 1.3522176267599662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373950 + }, + { + "epoch": 1.813638419298401, + "grad_norm": 9.903124542631758e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373960 + }, + { + "epoch": 1.813686917491237, + "grad_norm": 8.820278729615438e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373970 + }, + { + "epoch": 1.813735415684073, + "grad_norm": 1.1255408161048308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373980 + }, + { + "epoch": 1.8137839138769092, + "grad_norm": 1.0561281627019525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 373990 + }, + { + "epoch": 1.813832412069745, + "grad_norm": 1.3448881119870748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374000 + }, + { + "epoch": 1.8138809102625815, + "grad_norm": 9.419045099434697e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374010 + }, + { + "epoch": 1.8139294084554174, + "grad_norm": 1.9852583577062433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374020 + }, + { + "epoch": 1.8139779066482535, + "grad_norm": 1.2385561909411535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374030 + }, + { + "epoch": 1.8140264048410897, + "grad_norm": 1.1113630904446836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374040 + }, + { + "epoch": 1.8140749030339256, + "grad_norm": 1.429652218831734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374050 + }, + { + "epoch": 1.8141234012267617, + "grad_norm": 1.2342875166382328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374060 + }, + { + "epoch": 1.8141718994195979, + "grad_norm": 1.4536379211449457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374070 + }, + { + "epoch": 1.8142203976124338, + "grad_norm": 1.1871152949538555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374080 + }, + { + "epoch": 1.8142688958052702, + "grad_norm": 1.2692479067766271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374090 + }, + { + "epoch": 1.814317393998106, + "grad_norm": 1.260268067682091e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374100 + }, + { + "epoch": 1.8143658921909422, + "grad_norm": 1.0338233380480233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374110 + }, + { + "epoch": 1.8144143903837784, + "grad_norm": 1.036703256573901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374120 + }, + { + "epoch": 1.8144628885766143, + "grad_norm": 1.420600881374412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374130 + }, + { + "epoch": 1.8145113867694505, + "grad_norm": 1.0806037842314709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374140 + }, + { + "epoch": 1.8145598849622866, + "grad_norm": 1.0949985806973928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374150 + }, + { + "epoch": 1.8146083831551225, + "grad_norm": 1.2020023198999752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374160 + }, + { + "epoch": 1.8146568813479589, + "grad_norm": 1.3285037958610246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374170 + }, + { + "epoch": 1.8147053795407948, + "grad_norm": 1.1357108142817651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374180 + }, + { + "epoch": 1.814753877733631, + "grad_norm": 1.1766510432664745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374190 + }, + { + "epoch": 1.814802375926467, + "grad_norm": 1.4473299891903935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374200 + }, + { + "epoch": 1.814850874119303, + "grad_norm": 9.071354334366788e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374210 + }, + { + "epoch": 1.8148993723121394, + "grad_norm": 9.981675042070037e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374220 + }, + { + "epoch": 1.8149478705049753, + "grad_norm": 1.3194924264325891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374230 + }, + { + "epoch": 1.8149963686978114, + "grad_norm": 7.79933095884644e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374240 + }, + { + "epoch": 1.8150448668906476, + "grad_norm": 1.5494043381636402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374250 + }, + { + "epoch": 1.8150933650834835, + "grad_norm": 8.402425422104898e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374260 + }, + { + "epoch": 1.8151418632763197, + "grad_norm": 1.4309060603068247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374270 + }, + { + "epoch": 1.8151903614691558, + "grad_norm": 1.5556084420609295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374280 + }, + { + "epoch": 1.8152388596619917, + "grad_norm": 1.174395425351804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374290 + }, + { + "epoch": 1.815287357854828, + "grad_norm": 1.5514494577928417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374300 + }, + { + "epoch": 1.815335856047664, + "grad_norm": 8.188791866814427e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374310 + }, + { + "epoch": 1.8153843542405002, + "grad_norm": 1.2394172799190528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374320 + }, + { + "epoch": 1.8154328524333363, + "grad_norm": 1.3812791799239221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374330 + }, + { + "epoch": 1.8154813506261722, + "grad_norm": 1.3612295290954535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374340 + }, + { + "epoch": 1.8155298488190084, + "grad_norm": 1.5966241662113134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374350 + }, + { + "epoch": 1.8155783470118445, + "grad_norm": 1.682971628724772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374360 + }, + { + "epoch": 1.8156268452046804, + "grad_norm": 1.117004178041725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374370 + }, + { + "epoch": 1.8156753433975168, + "grad_norm": 1.0474416889394433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374380 + }, + { + "epoch": 1.8157238415903527, + "grad_norm": 1.2094377943583368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374390 + }, + { + "epoch": 1.8157723397831889, + "grad_norm": 2.016281719363633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374400 + }, + { + "epoch": 1.815820837976025, + "grad_norm": 1.4925756630645992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374410 + }, + { + "epoch": 1.815869336168861, + "grad_norm": 8.272480478410671e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374420 + }, + { + "epoch": 1.815917834361697, + "grad_norm": 1.3002888543667268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374430 + }, + { + "epoch": 1.8159663325545332, + "grad_norm": 1.0328481181431926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374440 + }, + { + "epoch": 1.8160148307473691, + "grad_norm": 1.003098315521811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374450 + }, + { + "epoch": 1.8160633289402055, + "grad_norm": 1.7894778991944804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374460 + }, + { + "epoch": 1.8161118271330414, + "grad_norm": 8.75902195218714e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374470 + }, + { + "epoch": 1.8161603253258776, + "grad_norm": 1.7711890620830673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374480 + }, + { + "epoch": 1.8162088235187137, + "grad_norm": 9.712446846776857e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374490 + }, + { + "epoch": 1.8162573217115496, + "grad_norm": 1.1989776282916864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374500 + }, + { + "epoch": 1.8163058199043858, + "grad_norm": 1.0121971705245869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374510 + }, + { + "epoch": 1.816354318097222, + "grad_norm": 1.0321663523882307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374520 + }, + { + "epoch": 1.8164028162900578, + "grad_norm": 8.518388661116205e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374530 + }, + { + "epoch": 1.8164513144828942, + "grad_norm": 1.2957570127980489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374540 + }, + { + "epoch": 1.8164998126757301, + "grad_norm": 1.4916535562292665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374550 + }, + { + "epoch": 1.8165483108685663, + "grad_norm": 1.2060056953089315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374560 + }, + { + "epoch": 1.8165968090614024, + "grad_norm": 1.2010273664486704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374570 + }, + { + "epoch": 1.8166453072542383, + "grad_norm": 1.2720692943446466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374580 + }, + { + "epoch": 1.8166938054470745, + "grad_norm": 1.1700335811326568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374590 + }, + { + "epoch": 1.8167423036399106, + "grad_norm": 1.1597456328615863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374600 + }, + { + "epoch": 1.8167908018327465, + "grad_norm": 1.35905402487424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374610 + }, + { + "epoch": 1.816839300025583, + "grad_norm": 9.048030769065463e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374620 + }, + { + "epoch": 1.8168877982184188, + "grad_norm": 1.1340281602656432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374630 + }, + { + "epoch": 1.816936296411255, + "grad_norm": 1.3020333256008598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374640 + }, + { + "epoch": 1.8169847946040911, + "grad_norm": 1.1701581925649407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374650 + }, + { + "epoch": 1.817033292796927, + "grad_norm": 1.1655435727675467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374660 + }, + { + "epoch": 1.8170817909897632, + "grad_norm": 1.2223575929226627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374670 + }, + { + "epoch": 1.8171302891825993, + "grad_norm": 1.2459212328508329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374680 + }, + { + "epoch": 1.8171787873754355, + "grad_norm": 1.2861219644833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374690 + }, + { + "epoch": 1.8172272855682716, + "grad_norm": 1.2226334611398215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374700 + }, + { + "epoch": 1.8172757837611075, + "grad_norm": 1.3966791279074187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374710 + }, + { + "epoch": 1.8173242819539437, + "grad_norm": 1.2886842704062929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374720 + }, + { + "epoch": 1.8173727801467798, + "grad_norm": 1.3820904420924762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374730 + }, + { + "epoch": 1.8174212783396158, + "grad_norm": 1.503049240625387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374740 + }, + { + "epoch": 1.8174697765324521, + "grad_norm": 1.4370495016180485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374750 + }, + { + "epoch": 1.817518274725288, + "grad_norm": 1.8997312167812197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374760 + }, + { + "epoch": 1.8175667729181242, + "grad_norm": 8.890761016289161e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374770 + }, + { + "epoch": 1.8176152711109603, + "grad_norm": 1.1370482333461496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374780 + }, + { + "epoch": 1.8176637693037963, + "grad_norm": 9.9786481300157e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374790 + }, + { + "epoch": 1.8177122674966324, + "grad_norm": 1.2408515992490265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374800 + }, + { + "epoch": 1.8177607656894685, + "grad_norm": 1.73748730958323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374810 + }, + { + "epoch": 1.8178092638823045, + "grad_norm": 1.1665692412066164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374820 + }, + { + "epoch": 1.8178577620751408, + "grad_norm": 1.0848856923928452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374830 + }, + { + "epoch": 1.8179062602679767, + "grad_norm": 1.9503769266293602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374840 + }, + { + "epoch": 1.817954758460813, + "grad_norm": 1.0081307344478319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374850 + }, + { + "epoch": 1.818003256653649, + "grad_norm": 1.5586406831857857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374860 + }, + { + "epoch": 1.818051754846485, + "grad_norm": 1.62112225865485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374870 + }, + { + "epoch": 1.818100253039321, + "grad_norm": 1.700188434483607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374880 + }, + { + "epoch": 1.8181487512321572, + "grad_norm": 9.84269998838272e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374890 + }, + { + "epoch": 1.8181972494249932, + "grad_norm": 1.5067479708363862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374900 + }, + { + "epoch": 1.8182457476178295, + "grad_norm": 1.0832485131118119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374910 + }, + { + "epoch": 1.8182942458106655, + "grad_norm": 9.404371503762832e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374920 + }, + { + "epoch": 1.8183427440035016, + "grad_norm": 1.1925086695896425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374930 + }, + { + "epoch": 1.8183912421963377, + "grad_norm": 1.5270703812575448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374940 + }, + { + "epoch": 1.8184397403891737, + "grad_norm": 8.875005619302101e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374950 + }, + { + "epoch": 1.8184882385820098, + "grad_norm": 1.086141931949669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374960 + }, + { + "epoch": 1.818536736774846, + "grad_norm": 1.549532058220393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374970 + }, + { + "epoch": 1.8185852349676819, + "grad_norm": 1.172358032874854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374980 + }, + { + "epoch": 1.8186337331605182, + "grad_norm": 1.3742739390920633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 374990 + }, + { + "epoch": 1.8186822313533542, + "grad_norm": 1.0970928165932037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375000 + }, + { + "epoch": 1.8187307295461903, + "grad_norm": 1.59662221221879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375010 + }, + { + "epoch": 1.8187792277390264, + "grad_norm": 9.727973981910054e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375020 + }, + { + "epoch": 1.8188277259318624, + "grad_norm": 1.0965507613036607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375030 + }, + { + "epoch": 1.8188762241246985, + "grad_norm": 1.3452305935857112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375040 + }, + { + "epoch": 1.8189247223175347, + "grad_norm": 1.1800386445770528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375050 + }, + { + "epoch": 1.8189732205103706, + "grad_norm": 1.9733089828832817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375060 + }, + { + "epoch": 1.819021718703207, + "grad_norm": 1.6651705792014582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375070 + }, + { + "epoch": 1.8190702168960429, + "grad_norm": 1.2202911570113883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375080 + }, + { + "epoch": 1.819118715088879, + "grad_norm": 1.0944132711188104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375090 + }, + { + "epoch": 1.8191672132817152, + "grad_norm": 1.255327486404667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375100 + }, + { + "epoch": 1.819215711474551, + "grad_norm": 7.773743426753299e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375110 + }, + { + "epoch": 1.8192642096673872, + "grad_norm": 9.677236789684684e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375120 + }, + { + "epoch": 1.8193127078602234, + "grad_norm": 9.193657390937915e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375130 + }, + { + "epoch": 1.8193612060530593, + "grad_norm": 2.070989069125062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375140 + }, + { + "epoch": 1.8194097042458957, + "grad_norm": 7.748674590857263e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375150 + }, + { + "epoch": 1.8194582024387316, + "grad_norm": 9.657841637533693e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375160 + }, + { + "epoch": 1.8195067006315677, + "grad_norm": 1.4275014059705882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375170 + }, + { + "epoch": 1.8195551988244039, + "grad_norm": 1.6662234259001707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375180 + }, + { + "epoch": 1.8196036970172398, + "grad_norm": 1.1579141201423226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375190 + }, + { + "epoch": 1.8196521952100762, + "grad_norm": 2.1809103856185175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375200 + }, + { + "epoch": 1.819700693402912, + "grad_norm": 1.4580344931403033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375210 + }, + { + "epoch": 1.8197491915957482, + "grad_norm": 1.4229521561048841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375220 + }, + { + "epoch": 1.8197976897885844, + "grad_norm": 1.0681741713369775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375230 + }, + { + "epoch": 1.8198461879814203, + "grad_norm": 1.4899582900795849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375240 + }, + { + "epoch": 1.8198946861742564, + "grad_norm": 1.3092128270386638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375250 + }, + { + "epoch": 1.8199431843670926, + "grad_norm": 1.1689183843088813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375260 + }, + { + "epoch": 1.8199916825599285, + "grad_norm": 1.4723642749459032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375270 + }, + { + "epoch": 1.8200401807527649, + "grad_norm": 7.079522745812028e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375280 + }, + { + "epoch": 1.8200886789456008, + "grad_norm": 1.208545796771432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375290 + }, + { + "epoch": 1.820137177138437, + "grad_norm": 1.2403271298921936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375300 + }, + { + "epoch": 1.820185675331273, + "grad_norm": 1.1484730499944362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375310 + }, + { + "epoch": 1.820234173524109, + "grad_norm": 1.6492446519578152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375320 + }, + { + "epoch": 1.8202826717169451, + "grad_norm": 1.4316461793839608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375330 + }, + { + "epoch": 1.8203311699097813, + "grad_norm": 1.685311801224998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375340 + }, + { + "epoch": 1.8203796681026172, + "grad_norm": 1.1608477734625922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375350 + }, + { + "epoch": 1.8204281662954536, + "grad_norm": 1.1986046821732543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375360 + }, + { + "epoch": 1.8204766644882895, + "grad_norm": 1.3733176373875722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375370 + }, + { + "epoch": 1.8205251626811256, + "grad_norm": 1.1798742427515663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375380 + }, + { + "epoch": 1.8205736608739618, + "grad_norm": 1.4179520668733403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375390 + }, + { + "epoch": 1.8206221590667977, + "grad_norm": 1.0367192437854555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375400 + }, + { + "epoch": 1.8206706572596338, + "grad_norm": 1.3423417044577945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375410 + }, + { + "epoch": 1.82071915545247, + "grad_norm": 1.0010031914475803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375420 + }, + { + "epoch": 1.820767653645306, + "grad_norm": 1.06610533734397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375430 + }, + { + "epoch": 1.8208161518381423, + "grad_norm": 1.4239827983431042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375440 + }, + { + "epoch": 1.8208646500309782, + "grad_norm": 1.0721676879654751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375450 + }, + { + "epoch": 1.8209131482238143, + "grad_norm": 2.279766064816613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375460 + }, + { + "epoch": 1.8209616464166505, + "grad_norm": 1.5025719335426402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375470 + }, + { + "epoch": 1.8210101446094864, + "grad_norm": 1.3365962558964384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375480 + }, + { + "epoch": 1.8210586428023225, + "grad_norm": 1.528538895456677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375490 + }, + { + "epoch": 1.8211071409951587, + "grad_norm": 1.0383840454153415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375500 + }, + { + "epoch": 1.8211556391879946, + "grad_norm": 1.7730339862964684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375510 + }, + { + "epoch": 1.821204137380831, + "grad_norm": 2.3517515046478366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375520 + }, + { + "epoch": 1.821252635573667, + "grad_norm": 1.195012888643987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375530 + }, + { + "epoch": 1.821301133766503, + "grad_norm": 9.238171116976446e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375540 + }, + { + "epoch": 1.8213496319593392, + "grad_norm": 1.4084550414850128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375550 + }, + { + "epoch": 1.821398130152175, + "grad_norm": 1.4131201986344877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375560 + }, + { + "epoch": 1.8214466283450113, + "grad_norm": 1.8013137648154043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375570 + }, + { + "epoch": 1.8214951265378474, + "grad_norm": 9.841005343957931e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375580 + }, + { + "epoch": 1.8215436247306833, + "grad_norm": 1.3107166019210581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375590 + }, + { + "epoch": 1.8215921229235197, + "grad_norm": 8.81461659218985e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375600 + }, + { + "epoch": 1.8216406211163556, + "grad_norm": 1.2106017521773538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375610 + }, + { + "epoch": 1.8216891193091918, + "grad_norm": 1.3938506349120416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375620 + }, + { + "epoch": 1.821737617502028, + "grad_norm": 1.0096864677677786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375630 + }, + { + "epoch": 1.8217861156948638, + "grad_norm": 1.4776922796500003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375640 + }, + { + "epoch": 1.8218346138877, + "grad_norm": 1.6861106288956762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375650 + }, + { + "epoch": 1.821883112080536, + "grad_norm": 1.9807709250585503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375660 + }, + { + "epoch": 1.821931610273372, + "grad_norm": 1.0391202565074309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375670 + }, + { + "epoch": 1.8219801084662084, + "grad_norm": 1.4109130752615329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375680 + }, + { + "epoch": 1.8220286066590443, + "grad_norm": 1.0902862612738318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375690 + }, + { + "epoch": 1.8220771048518805, + "grad_norm": 1.954151329641718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375700 + }, + { + "epoch": 1.8221256030447166, + "grad_norm": 9.782834986538091e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375710 + }, + { + "epoch": 1.8221741012375525, + "grad_norm": 1.4054831964926962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375720 + }, + { + "epoch": 1.822222599430389, + "grad_norm": 1.0016134588397563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375730 + }, + { + "epoch": 1.8222710976232248, + "grad_norm": 9.124359934276072e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375740 + }, + { + "epoch": 1.822319595816061, + "grad_norm": 1.5014922638556527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375750 + }, + { + "epoch": 1.822368094008897, + "grad_norm": 8.870350676204453e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375760 + }, + { + "epoch": 1.822416592201733, + "grad_norm": 1.5480797088684994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375770 + }, + { + "epoch": 1.8224650903945692, + "grad_norm": 1.2185380704465842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375780 + }, + { + "epoch": 1.8225135885874053, + "grad_norm": 1.1641562380759751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375790 + }, + { + "epoch": 1.8225620867802412, + "grad_norm": 1.2109485858502467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375800 + }, + { + "epoch": 1.8226105849730776, + "grad_norm": 7.993836703690249e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375810 + }, + { + "epoch": 1.8226590831659135, + "grad_norm": 1.1399422739089005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375820 + }, + { + "epoch": 1.8227075813587497, + "grad_norm": 9.807225254121477e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375830 + }, + { + "epoch": 1.8227560795515858, + "grad_norm": 9.949178370050049e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375840 + }, + { + "epoch": 1.8228045777444217, + "grad_norm": 1.1015312217921291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375850 + }, + { + "epoch": 1.8228530759372579, + "grad_norm": 9.527209243742618e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375860 + }, + { + "epoch": 1.822901574130094, + "grad_norm": 1.6752698783761844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375870 + }, + { + "epoch": 1.82295007232293, + "grad_norm": 1.1023473689419916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375880 + }, + { + "epoch": 1.8229985705157663, + "grad_norm": 1.0946147988022403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375890 + }, + { + "epoch": 1.8230470687086022, + "grad_norm": 1.493447499001377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375900 + }, + { + "epoch": 1.8230955669014384, + "grad_norm": 7.277166425012638e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375910 + }, + { + "epoch": 1.8231440650942745, + "grad_norm": 1.2414253625081528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375920 + }, + { + "epoch": 1.8231925632871104, + "grad_norm": 1.3037977808494361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375930 + }, + { + "epoch": 1.8232410614799466, + "grad_norm": 1.7414921060776578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375940 + }, + { + "epoch": 1.8232895596727827, + "grad_norm": 2.0888949237019006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375950 + }, + { + "epoch": 1.8233380578656186, + "grad_norm": 1.3789773767314273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375960 + }, + { + "epoch": 1.823386556058455, + "grad_norm": 1.6074622521955462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375970 + }, + { + "epoch": 1.823435054251291, + "grad_norm": 9.472344686400902e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375980 + }, + { + "epoch": 1.823483552444127, + "grad_norm": 1.3674601007096499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 375990 + }, + { + "epoch": 1.8235320506369632, + "grad_norm": 1.7939290941626496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376000 + }, + { + "epoch": 1.8235805488297991, + "grad_norm": 1.209333255758338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376010 + }, + { + "epoch": 1.8236290470226353, + "grad_norm": 1.6113377299120657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376020 + }, + { + "epoch": 1.8236775452154714, + "grad_norm": 9.413372303868073e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376030 + }, + { + "epoch": 1.8237260434083074, + "grad_norm": 8.681863228332531e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376040 + }, + { + "epoch": 1.8237745416011437, + "grad_norm": 1.532475124577104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376050 + }, + { + "epoch": 1.8238230397939796, + "grad_norm": 1.9655534089224602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376060 + }, + { + "epoch": 1.8238715379868158, + "grad_norm": 8.916219762511446e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376070 + }, + { + "epoch": 1.823920036179652, + "grad_norm": 1.5130952490949312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376080 + }, + { + "epoch": 1.8239685343724878, + "grad_norm": 1.3949004618041272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376090 + }, + { + "epoch": 1.824017032565324, + "grad_norm": 1.0629817026597266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376100 + }, + { + "epoch": 1.8240655307581601, + "grad_norm": 1.2380202640827065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376110 + }, + { + "epoch": 1.824114028950996, + "grad_norm": 1.6196320729022773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376120 + }, + { + "epoch": 1.8241625271438324, + "grad_norm": 1.3864347003789135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376130 + }, + { + "epoch": 1.8242110253366683, + "grad_norm": 1.4534050407633003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376140 + }, + { + "epoch": 1.8242595235295045, + "grad_norm": 1.1163242774614446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376150 + }, + { + "epoch": 1.8243080217223406, + "grad_norm": 1.2854903808090512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376160 + }, + { + "epoch": 1.8243565199151766, + "grad_norm": 1.0585806897722705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376170 + }, + { + "epoch": 1.8244050181080127, + "grad_norm": 1.2869290522132815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376180 + }, + { + "epoch": 1.8244535163008488, + "grad_norm": 8.55720472259236e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376190 + }, + { + "epoch": 1.8245020144936848, + "grad_norm": 1.0979584352810434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376200 + }, + { + "epoch": 1.8245505126865211, + "grad_norm": 1.1751221329348027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376210 + }, + { + "epoch": 1.824599010879357, + "grad_norm": 1.4647978829884778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376220 + }, + { + "epoch": 1.8246475090721932, + "grad_norm": 1.669122617897756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376230 + }, + { + "epoch": 1.8246960072650293, + "grad_norm": 1.1159254853509992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376240 + }, + { + "epoch": 1.8247445054578653, + "grad_norm": 9.847118676020727e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376250 + }, + { + "epoch": 1.8247930036507016, + "grad_norm": 1.0511334913587689e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376260 + }, + { + "epoch": 1.8248415018435376, + "grad_norm": 1.545436312255788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376270 + }, + { + "epoch": 1.8248900000363737, + "grad_norm": 1.0802530425735313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376280 + }, + { + "epoch": 1.8249384982292098, + "grad_norm": 1.4114792890040917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376290 + }, + { + "epoch": 1.8249869964220458, + "grad_norm": 1.8913654642460642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376300 + }, + { + "epoch": 1.825035494614882, + "grad_norm": 2.1184508369742616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376310 + }, + { + "epoch": 1.825083992807718, + "grad_norm": 1.5607946934892425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376320 + }, + { + "epoch": 1.825132491000554, + "grad_norm": 1.3563672851546471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376330 + }, + { + "epoch": 1.8251809891933903, + "grad_norm": 1.4798361647194724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376340 + }, + { + "epoch": 1.8252294873862263, + "grad_norm": 1.0897156954570164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376350 + }, + { + "epoch": 1.8252779855790624, + "grad_norm": 1.715385700151728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376360 + }, + { + "epoch": 1.8253264837718985, + "grad_norm": 1.0227398483664274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376370 + }, + { + "epoch": 1.8253749819647345, + "grad_norm": 1.2610724908768134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376380 + }, + { + "epoch": 1.8254234801575706, + "grad_norm": 1.3993868286377165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376390 + }, + { + "epoch": 1.8254719783504068, + "grad_norm": 1.3922034192148658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376400 + }, + { + "epoch": 1.8255204765432427, + "grad_norm": 1.0924485316365917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376410 + }, + { + "epoch": 1.825568974736079, + "grad_norm": 1.675294569736252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376420 + }, + { + "epoch": 1.825617472928915, + "grad_norm": 1.3262472009500925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376430 + }, + { + "epoch": 1.825665971121751, + "grad_norm": 1.2437484819827205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376440 + }, + { + "epoch": 1.8257144693145873, + "grad_norm": 1.4495282307791513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376450 + }, + { + "epoch": 1.8257629675074232, + "grad_norm": 1.598920817968974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376460 + }, + { + "epoch": 1.8258114657002593, + "grad_norm": 1.7658360107475346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376470 + }, + { + "epoch": 1.8258599638930955, + "grad_norm": 1.7241923444544227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376480 + }, + { + "epoch": 1.8259084620859314, + "grad_norm": 1.690058937242611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376490 + }, + { + "epoch": 1.8259569602787677, + "grad_norm": 1.3796367603902127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376500 + }, + { + "epoch": 1.8260054584716037, + "grad_norm": 1.384836778584031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376510 + }, + { + "epoch": 1.8260539566644398, + "grad_norm": 1.4036094952984968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376520 + }, + { + "epoch": 1.826102454857276, + "grad_norm": 1.5563399458073945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376530 + }, + { + "epoch": 1.8261509530501119, + "grad_norm": 1.2781825375896005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376540 + }, + { + "epoch": 1.826199451242948, + "grad_norm": 8.754753366702062e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376550 + }, + { + "epoch": 1.8262479494357842, + "grad_norm": 1.1950319844800106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376560 + }, + { + "epoch": 1.82629644762862, + "grad_norm": 1.0362255054019442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376570 + }, + { + "epoch": 1.8263449458214565, + "grad_norm": 1.0509683789905466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376580 + }, + { + "epoch": 1.8263934440142924, + "grad_norm": 1.5122910923537347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376590 + }, + { + "epoch": 1.8264419422071285, + "grad_norm": 1.0993214338839152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376600 + }, + { + "epoch": 1.8264904403999647, + "grad_norm": 1.321003217924499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376610 + }, + { + "epoch": 1.8265389385928006, + "grad_norm": 8.622447644768272e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376620 + }, + { + "epoch": 1.8265874367856367, + "grad_norm": 8.543229235158378e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376630 + }, + { + "epoch": 1.8266359349784729, + "grad_norm": 1.0211481438204828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376640 + }, + { + "epoch": 1.8266844331713088, + "grad_norm": 9.922540122886403e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376650 + }, + { + "epoch": 1.8267329313641452, + "grad_norm": 1.8467654072651385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376660 + }, + { + "epoch": 1.826781429556981, + "grad_norm": 1.4893545063898728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376670 + }, + { + "epoch": 1.8268299277498172, + "grad_norm": 1.631077495289901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376680 + }, + { + "epoch": 1.8268784259426534, + "grad_norm": 1.1780844744180285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376690 + }, + { + "epoch": 1.8269269241354893, + "grad_norm": 1.1090261153867687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376700 + }, + { + "epoch": 1.8269754223283254, + "grad_norm": 1.114741365881855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376710 + }, + { + "epoch": 1.8270239205211616, + "grad_norm": 7.244666644368181e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376720 + }, + { + "epoch": 1.8270724187139975, + "grad_norm": 1.3261698406097366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376730 + }, + { + "epoch": 1.8271209169068339, + "grad_norm": 1.1492030438375878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376740 + }, + { + "epoch": 1.8271694150996698, + "grad_norm": 1.021254369959479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376750 + }, + { + "epoch": 1.827217913292506, + "grad_norm": 1.1085414364231383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376760 + }, + { + "epoch": 1.827266411485342, + "grad_norm": 1.815792316506304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376770 + }, + { + "epoch": 1.827314909678178, + "grad_norm": 1.0544893846997638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376780 + }, + { + "epoch": 1.8273634078710144, + "grad_norm": 1.1133389321571485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376790 + }, + { + "epoch": 1.8274119060638503, + "grad_norm": 9.909263631868725e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376800 + }, + { + "epoch": 1.8274604042566864, + "grad_norm": 1.3569237289345892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376810 + }, + { + "epoch": 1.8275089024495226, + "grad_norm": 1.6467526009478206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376820 + }, + { + "epoch": 1.8275574006423585, + "grad_norm": 1.6482703202314042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376830 + }, + { + "epoch": 1.8276058988351946, + "grad_norm": 1.181773701119937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376840 + }, + { + "epoch": 1.8276543970280308, + "grad_norm": 1.5963813382313674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376850 + }, + { + "epoch": 1.8277028952208667, + "grad_norm": 1.1184856596457848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376860 + }, + { + "epoch": 1.827751393413703, + "grad_norm": 9.79447989379878e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376870 + }, + { + "epoch": 1.827799891606539, + "grad_norm": 1.2391064174721578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376880 + }, + { + "epoch": 1.8278483897993751, + "grad_norm": 1.2293545736952183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376890 + }, + { + "epoch": 1.8278968879922113, + "grad_norm": 1.1563930257807442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376900 + }, + { + "epoch": 1.8279453861850472, + "grad_norm": 2.0846783854722162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376910 + }, + { + "epoch": 1.8279938843778833, + "grad_norm": 8.70008243225584e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376920 + }, + { + "epoch": 1.8280423825707195, + "grad_norm": 9.14468412105407e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376930 + }, + { + "epoch": 1.8280908807635554, + "grad_norm": 1.3087523953458913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376940 + }, + { + "epoch": 1.8281393789563918, + "grad_norm": 1.914056468876879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376950 + }, + { + "epoch": 1.8281878771492277, + "grad_norm": 8.44948111478061e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376960 + }, + { + "epoch": 1.8282363753420638, + "grad_norm": 2.5144174742308678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376970 + }, + { + "epoch": 1.8282848735349, + "grad_norm": 1.4216809063327673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376980 + }, + { + "epoch": 1.828333371727736, + "grad_norm": 1.0074230338830148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 376990 + }, + { + "epoch": 1.828381869920572, + "grad_norm": 1.281373851469425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377000 + }, + { + "epoch": 1.8284303681134082, + "grad_norm": 1.0517080539784729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377010 + }, + { + "epoch": 1.8284788663062441, + "grad_norm": 1.682930594881782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377020 + }, + { + "epoch": 1.8285273644990805, + "grad_norm": 1.5905587957831813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377030 + }, + { + "epoch": 1.8285758626919164, + "grad_norm": 2.0541792267181336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377040 + }, + { + "epoch": 1.8286243608847526, + "grad_norm": 1.613866373872952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377050 + }, + { + "epoch": 1.8286728590775887, + "grad_norm": 1.2399040016930485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377060 + }, + { + "epoch": 1.8287213572704246, + "grad_norm": 1.2784071579119427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377070 + }, + { + "epoch": 1.8287698554632608, + "grad_norm": 1.3641678897613474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377080 + }, + { + "epoch": 1.828818353656097, + "grad_norm": 1.0116981030705574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377090 + }, + { + "epoch": 1.8288668518489328, + "grad_norm": 1.0509488390653132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377100 + }, + { + "epoch": 1.8289153500417692, + "grad_norm": 1.2518882819279042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377110 + }, + { + "epoch": 1.8289638482346051, + "grad_norm": 1.1592432791474039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377120 + }, + { + "epoch": 1.8290123464274413, + "grad_norm": 1.4997342034916983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377130 + }, + { + "epoch": 1.8290608446202774, + "grad_norm": 1.4190987052131732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377140 + }, + { + "epoch": 1.8291093428131133, + "grad_norm": 1.248334591252842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377150 + }, + { + "epoch": 1.8291578410059495, + "grad_norm": 1.3476546101287568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377160 + }, + { + "epoch": 1.8292063391987856, + "grad_norm": 1.095750157276143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377170 + }, + { + "epoch": 1.8292548373916215, + "grad_norm": 1.255668546917832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377180 + }, + { + "epoch": 1.829303335584458, + "grad_norm": 1.035642949176463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377190 + }, + { + "epoch": 1.8293518337772938, + "grad_norm": 1.874276023272614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377200 + }, + { + "epoch": 1.82940033197013, + "grad_norm": 1.4482514743008323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377210 + }, + { + "epoch": 1.829448830162966, + "grad_norm": 1.3341495019858485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377220 + }, + { + "epoch": 1.829497328355802, + "grad_norm": 8.787119476494354e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377230 + }, + { + "epoch": 1.8295458265486382, + "grad_norm": 8.498159509429115e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377240 + }, + { + "epoch": 1.8295943247414743, + "grad_norm": 1.1890142204151743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377250 + }, + { + "epoch": 1.8296428229343105, + "grad_norm": 8.034564125125598e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377260 + }, + { + "epoch": 1.8296913211271466, + "grad_norm": 1.3124589415269838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377270 + }, + { + "epoch": 1.8297398193199825, + "grad_norm": 1.1020923729176957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377280 + }, + { + "epoch": 1.8297883175128187, + "grad_norm": 1.045837283442097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377290 + }, + { + "epoch": 1.8298368157056548, + "grad_norm": 1.2537952009950004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377300 + }, + { + "epoch": 1.8298853138984907, + "grad_norm": 1.2426291107203724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377310 + }, + { + "epoch": 1.829933812091327, + "grad_norm": 1.263579463284259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377320 + }, + { + "epoch": 1.829982310284163, + "grad_norm": 7.836318260956432e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377330 + }, + { + "epoch": 1.8300308084769992, + "grad_norm": 1.660231063738138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377340 + }, + { + "epoch": 1.8300793066698353, + "grad_norm": 9.210473272958097e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377350 + }, + { + "epoch": 1.8301278048626712, + "grad_norm": 1.4660816560763124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377360 + }, + { + "epoch": 1.8301763030555074, + "grad_norm": 1.1268104671557921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377370 + }, + { + "epoch": 1.8302248012483435, + "grad_norm": 1.1526905652203823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377380 + }, + { + "epoch": 1.8302732994411794, + "grad_norm": 8.649089444645597e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377390 + }, + { + "epoch": 1.8303217976340158, + "grad_norm": 1.3068953919059823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377400 + }, + { + "epoch": 1.8303702958268517, + "grad_norm": 1.0386392190753213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377410 + }, + { + "epoch": 1.8304187940196879, + "grad_norm": 1.4238064061089517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377420 + }, + { + "epoch": 1.830467292212524, + "grad_norm": 9.556957891732054e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377430 + }, + { + "epoch": 1.83051579040536, + "grad_norm": 1.0888420831633994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377440 + }, + { + "epoch": 1.830564288598196, + "grad_norm": 8.388663985670064e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377450 + }, + { + "epoch": 1.8306127867910322, + "grad_norm": 1.8089359343775868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377460 + }, + { + "epoch": 1.8306612849838682, + "grad_norm": 1.3196866710529775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377470 + }, + { + "epoch": 1.8307097831767045, + "grad_norm": 1.0186400167810916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377480 + }, + { + "epoch": 1.8307582813695404, + "grad_norm": 8.050180966279186e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377490 + }, + { + "epoch": 1.8308067795623766, + "grad_norm": 1.6641902078617932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377500 + }, + { + "epoch": 1.8308552777552127, + "grad_norm": 1.14651879101757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377510 + }, + { + "epoch": 1.8309037759480487, + "grad_norm": 1.2421547346264106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377520 + }, + { + "epoch": 1.8309522741408848, + "grad_norm": 1.4440658446801535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377530 + }, + { + "epoch": 1.831000772333721, + "grad_norm": 1.1116545906020292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377540 + }, + { + "epoch": 1.8310492705265569, + "grad_norm": 8.838506815322944e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377550 + }, + { + "epoch": 1.8310977687193932, + "grad_norm": 1.1683608747148355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377560 + }, + { + "epoch": 1.8311462669122291, + "grad_norm": 9.765741992850963e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377570 + }, + { + "epoch": 1.8311947651050653, + "grad_norm": 9.054711647138447e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377580 + }, + { + "epoch": 1.8312432632979014, + "grad_norm": 1.2465151577600864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377590 + }, + { + "epoch": 1.8312917614907374, + "grad_norm": 1.8834658277455674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377600 + }, + { + "epoch": 1.8313402596835735, + "grad_norm": 1.3168945045549663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377610 + }, + { + "epoch": 1.8313887578764096, + "grad_norm": 1.6117612133825787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377620 + }, + { + "epoch": 1.8314372560692456, + "grad_norm": 1.379481684438133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377630 + }, + { + "epoch": 1.831485754262082, + "grad_norm": 9.042567583605887e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377640 + }, + { + "epoch": 1.8315342524549179, + "grad_norm": 1.0322875887425198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377650 + }, + { + "epoch": 1.831582750647754, + "grad_norm": 1.121059156616866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377660 + }, + { + "epoch": 1.8316312488405901, + "grad_norm": 1.0948932427368163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377670 + }, + { + "epoch": 1.831679747033426, + "grad_norm": 1.194998056064378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377680 + }, + { + "epoch": 1.8317282452262622, + "grad_norm": 1.8878019147905434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377690 + }, + { + "epoch": 1.8317767434190984, + "grad_norm": 9.653777333085145e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377700 + }, + { + "epoch": 1.8318252416119343, + "grad_norm": 1.3931821918333753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377710 + }, + { + "epoch": 1.8318737398047706, + "grad_norm": 1.750299816194456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377720 + }, + { + "epoch": 1.8319222379976066, + "grad_norm": 1.638431967876386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377730 + }, + { + "epoch": 1.8319707361904427, + "grad_norm": 1.9908931392365048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377740 + }, + { + "epoch": 1.8320192343832788, + "grad_norm": 1.4495533662284288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377750 + }, + { + "epoch": 1.8320677325761148, + "grad_norm": 1.904697199961447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377760 + }, + { + "epoch": 1.8321162307689511, + "grad_norm": 2.326432380073129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377770 + }, + { + "epoch": 1.832164728961787, + "grad_norm": 1.1650189257750299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377780 + }, + { + "epoch": 1.8322132271546232, + "grad_norm": 1.5250639862074422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377790 + }, + { + "epoch": 1.8322617253474593, + "grad_norm": 1.2696934170719487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377800 + }, + { + "epoch": 1.8323102235402953, + "grad_norm": 1.4281574145513787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377810 + }, + { + "epoch": 1.8323587217331314, + "grad_norm": 1.513433822708521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377820 + }, + { + "epoch": 1.8324072199259676, + "grad_norm": 1.2635718249498495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377830 + }, + { + "epoch": 1.8324557181188035, + "grad_norm": 1.49644758806744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377840 + }, + { + "epoch": 1.8325042163116398, + "grad_norm": 1.6579075889922024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377850 + }, + { + "epoch": 1.8325527145044758, + "grad_norm": 1.2671020677146316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377860 + }, + { + "epoch": 1.832601212697312, + "grad_norm": 1.4718970930971409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377870 + }, + { + "epoch": 1.832649710890148, + "grad_norm": 9.880384510552176e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377880 + }, + { + "epoch": 1.832698209082984, + "grad_norm": 1.7029629262310664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377890 + }, + { + "epoch": 1.8327467072758201, + "grad_norm": 8.540508744658837e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377900 + }, + { + "epoch": 1.8327952054686563, + "grad_norm": 1.0826580520983953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377910 + }, + { + "epoch": 1.8328437036614922, + "grad_norm": 8.313552513072864e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377920 + }, + { + "epoch": 1.8328922018543286, + "grad_norm": 8.31353208496921e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377930 + }, + { + "epoch": 1.8329407000471645, + "grad_norm": 1.2697396023497731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377940 + }, + { + "epoch": 1.8329891982400006, + "grad_norm": 1.6855441486995915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377950 + }, + { + "epoch": 1.8330376964328368, + "grad_norm": 1.7373709582102492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377960 + }, + { + "epoch": 1.8330861946256727, + "grad_norm": 1.7617729497487744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377970 + }, + { + "epoch": 1.8331346928185088, + "grad_norm": 1.0116792736880598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377980 + }, + { + "epoch": 1.833183191011345, + "grad_norm": 9.944863599287146e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 377990 + }, + { + "epoch": 1.833231689204181, + "grad_norm": 9.703733816479598e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378000 + }, + { + "epoch": 1.8332801873970173, + "grad_norm": 9.350022978082961e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378010 + }, + { + "epoch": 1.8333286855898532, + "grad_norm": 1.1037148972548039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378020 + }, + { + "epoch": 1.8333771837826893, + "grad_norm": 1.1673471966844318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378030 + }, + { + "epoch": 1.8334256819755255, + "grad_norm": 1.7288286358052574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378040 + }, + { + "epoch": 1.8334741801683614, + "grad_norm": 1.0271531181160753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378050 + }, + { + "epoch": 1.8335226783611975, + "grad_norm": 9.439975023894931e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378060 + }, + { + "epoch": 1.8335711765540337, + "grad_norm": 1.3012634525466638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378070 + }, + { + "epoch": 1.8336196747468696, + "grad_norm": 1.6690465898250295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378080 + }, + { + "epoch": 1.833668172939706, + "grad_norm": 1.7096287052709158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378090 + }, + { + "epoch": 1.8337166711325419, + "grad_norm": 1.0137573447366321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378100 + }, + { + "epoch": 1.833765169325378, + "grad_norm": 1.1790619147689085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378110 + }, + { + "epoch": 1.8338136675182142, + "grad_norm": 8.300608200784154e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378120 + }, + { + "epoch": 1.83386216571105, + "grad_norm": 9.89195214629035e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378130 + }, + { + "epoch": 1.8339106639038862, + "grad_norm": 9.942372258819887e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378140 + }, + { + "epoch": 1.8339591620967224, + "grad_norm": 1.31095969635453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378150 + }, + { + "epoch": 1.8340076602895583, + "grad_norm": 1.0883741019540594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378160 + }, + { + "epoch": 1.8340561584823947, + "grad_norm": 1.500469970494578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378170 + }, + { + "epoch": 1.8341046566752306, + "grad_norm": 1.0218075274792682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378180 + }, + { + "epoch": 1.8341531548680667, + "grad_norm": 1.5791572494094908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378190 + }, + { + "epoch": 1.8342016530609029, + "grad_norm": 1.5488623716919392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378200 + }, + { + "epoch": 1.8342501512537388, + "grad_norm": 1.5186383706122797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378210 + }, + { + "epoch": 1.834298649446575, + "grad_norm": 1.216053568953157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378220 + }, + { + "epoch": 1.834347147639411, + "grad_norm": 7.918913524918025e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378230 + }, + { + "epoch": 1.834395645832247, + "grad_norm": 1.0901913150007658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378240 + }, + { + "epoch": 1.8344441440250834, + "grad_norm": 7.251889311277182e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378250 + }, + { + "epoch": 1.8344926422179193, + "grad_norm": 1.0440792230781426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378260 + }, + { + "epoch": 1.8345411404107554, + "grad_norm": 1.0370282410576692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378270 + }, + { + "epoch": 1.8345896386035916, + "grad_norm": 1.142789951558143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378280 + }, + { + "epoch": 1.8346381367964275, + "grad_norm": 8.647410787432364e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378290 + }, + { + "epoch": 1.8346866349892639, + "grad_norm": 1.5637498407272687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378300 + }, + { + "epoch": 1.8347351331820998, + "grad_norm": 1.2307322272420151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378310 + }, + { + "epoch": 1.834783631374936, + "grad_norm": 1.1328833871004917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378320 + }, + { + "epoch": 1.834832129567772, + "grad_norm": 1.7221761794417034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378330 + }, + { + "epoch": 1.834880627760608, + "grad_norm": 1.3511797902765466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378340 + }, + { + "epoch": 1.8349291259534442, + "grad_norm": 1.4217773625091468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378350 + }, + { + "epoch": 1.8349776241462803, + "grad_norm": 1.490887768795801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378360 + }, + { + "epoch": 1.8350261223391162, + "grad_norm": 1.0043028630946083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378370 + }, + { + "epoch": 1.8350746205319526, + "grad_norm": 1.5913018458491024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378380 + }, + { + "epoch": 1.8351231187247885, + "grad_norm": 1.500262136744368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378390 + }, + { + "epoch": 1.8351716169176246, + "grad_norm": 1.2229238066652215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378400 + }, + { + "epoch": 1.8352201151104608, + "grad_norm": 1.5067948666569464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378410 + }, + { + "epoch": 1.8352686133032967, + "grad_norm": 9.762709751726106e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378420 + }, + { + "epoch": 1.8353171114961329, + "grad_norm": 1.1577513170379916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378430 + }, + { + "epoch": 1.835365609688969, + "grad_norm": 1.850614594900435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378440 + }, + { + "epoch": 1.835414107881805, + "grad_norm": 1.259491977378957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378450 + }, + { + "epoch": 1.8354626060746413, + "grad_norm": 1.075046718312933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378460 + }, + { + "epoch": 1.8355111042674772, + "grad_norm": 1.1596166693550458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378470 + }, + { + "epoch": 1.8355596024603134, + "grad_norm": 1.611840083626248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378480 + }, + { + "epoch": 1.8356081006531495, + "grad_norm": 1.4738613884901497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378490 + }, + { + "epoch": 1.8356565988459854, + "grad_norm": 9.326832639544591e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378500 + }, + { + "epoch": 1.8357050970388216, + "grad_norm": 1.527273774115656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378510 + }, + { + "epoch": 1.8357535952316577, + "grad_norm": 1.312701236599878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378520 + }, + { + "epoch": 1.8358020934244936, + "grad_norm": 1.2530341209071594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378530 + }, + { + "epoch": 1.83585059161733, + "grad_norm": 1.3955607336413323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378540 + }, + { + "epoch": 1.835899089810166, + "grad_norm": 1.1828467094687767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378550 + }, + { + "epoch": 1.835947588003002, + "grad_norm": 1.398384164019717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378560 + }, + { + "epoch": 1.8359960861958382, + "grad_norm": 1.0896246571689971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378570 + }, + { + "epoch": 1.8360445843886741, + "grad_norm": 1.135462301959933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378580 + }, + { + "epoch": 1.8360930825815103, + "grad_norm": 1.1042730285737434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378590 + }, + { + "epoch": 1.8361415807743464, + "grad_norm": 1.4106389834012134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378600 + }, + { + "epoch": 1.8361900789671823, + "grad_norm": 9.521631483266901e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378610 + }, + { + "epoch": 1.8362385771600187, + "grad_norm": 1.2403263305316159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378620 + }, + { + "epoch": 1.8362870753528546, + "grad_norm": 1.3504254603446952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378630 + }, + { + "epoch": 1.8363355735456908, + "grad_norm": 1.456255205312118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378640 + }, + { + "epoch": 1.836384071738527, + "grad_norm": 1.4445221019343535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378650 + }, + { + "epoch": 1.8364325699313628, + "grad_norm": 8.361341841123249e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378660 + }, + { + "epoch": 1.836481068124199, + "grad_norm": 9.787486376922061e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378670 + }, + { + "epoch": 1.8365295663170351, + "grad_norm": 1.055051690457276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378680 + }, + { + "epoch": 1.836578064509871, + "grad_norm": 1.477041422504044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378690 + }, + { + "epoch": 1.8366265627027074, + "grad_norm": 1.0030871244737227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378700 + }, + { + "epoch": 1.8366750608955433, + "grad_norm": 1.5448161860831533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378710 + }, + { + "epoch": 1.8367235590883795, + "grad_norm": 9.019828439704725e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378720 + }, + { + "epoch": 1.8367720572812156, + "grad_norm": 9.980784199115078e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378730 + }, + { + "epoch": 1.8368205554740515, + "grad_norm": 1.4182755414537951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378740 + }, + { + "epoch": 1.8368690536668877, + "grad_norm": 1.0353445212274437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378750 + }, + { + "epoch": 1.8369175518597238, + "grad_norm": 1.0805200290064931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378760 + }, + { + "epoch": 1.8369660500525598, + "grad_norm": 1.1760509899261251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378770 + }, + { + "epoch": 1.8370145482453961, + "grad_norm": 1.2025083151456784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378780 + }, + { + "epoch": 1.837063046438232, + "grad_norm": 1.1506762653823444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378790 + }, + { + "epoch": 1.8371115446310682, + "grad_norm": 1.0880478740205035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378800 + }, + { + "epoch": 1.8371600428239043, + "grad_norm": 1.3596096692936044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378810 + }, + { + "epoch": 1.8372085410167402, + "grad_norm": 1.8239319388158037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378820 + }, + { + "epoch": 1.8372570392095766, + "grad_norm": 9.661324185117337e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378830 + }, + { + "epoch": 1.8373055374024125, + "grad_norm": 1.0450619036816988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378840 + }, + { + "epoch": 1.8373540355952487, + "grad_norm": 1.1514980080562509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378850 + }, + { + "epoch": 1.8374025337880848, + "grad_norm": 1.2158353435154368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378860 + }, + { + "epoch": 1.8374510319809207, + "grad_norm": 1.2144832695071273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378870 + }, + { + "epoch": 1.837499530173757, + "grad_norm": 1.0466205679904306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378880 + }, + { + "epoch": 1.837548028366593, + "grad_norm": 1.3229027651107117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378890 + }, + { + "epoch": 1.837596526559429, + "grad_norm": 1.692589179924653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378900 + }, + { + "epoch": 1.8376450247522653, + "grad_norm": 8.245231164494271e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378910 + }, + { + "epoch": 1.8376935229451012, + "grad_norm": 9.799932421117319e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378920 + }, + { + "epoch": 1.8377420211379374, + "grad_norm": 1.1014539502696152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378930 + }, + { + "epoch": 1.8377905193307735, + "grad_norm": 2.0839726388999225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378940 + }, + { + "epoch": 1.8378390175236095, + "grad_norm": 1.2060242582379033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378950 + }, + { + "epoch": 1.8378875157164456, + "grad_norm": 1.2789016068381898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378960 + }, + { + "epoch": 1.8379360139092817, + "grad_norm": 1.3342217108913701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378970 + }, + { + "epoch": 1.8379845121021177, + "grad_norm": 1.0869391608991918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378980 + }, + { + "epoch": 1.838033010294954, + "grad_norm": 2.1024451513085296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 378990 + }, + { + "epoch": 1.83808150848779, + "grad_norm": 9.985832605252654e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379000 + }, + { + "epoch": 1.838130006680626, + "grad_norm": 1.5075000803221883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379010 + }, + { + "epoch": 1.8381785048734622, + "grad_norm": 1.3522674535693113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379020 + }, + { + "epoch": 1.8382270030662982, + "grad_norm": 1.517168790599044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379030 + }, + { + "epoch": 1.8382755012591343, + "grad_norm": 1.3869415838030363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379040 + }, + { + "epoch": 1.8383239994519704, + "grad_norm": 1.0734107824816874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379050 + }, + { + "epoch": 1.8383724976448064, + "grad_norm": 1.0958132179439417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379060 + }, + { + "epoch": 1.8384209958376427, + "grad_norm": 2.2668055876806648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379070 + }, + { + "epoch": 1.8384694940304787, + "grad_norm": 8.808340723476249e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379080 + }, + { + "epoch": 1.8385179922233148, + "grad_norm": 1.0800014216272302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379090 + }, + { + "epoch": 1.838566490416151, + "grad_norm": 1.567922325307336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379100 + }, + { + "epoch": 1.8386149886089869, + "grad_norm": 1.0207102718595706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379110 + }, + { + "epoch": 1.838663486801823, + "grad_norm": 1.3304109813816467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379120 + }, + { + "epoch": 1.8387119849946592, + "grad_norm": 1.698521145954146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379130 + }, + { + "epoch": 1.838760483187495, + "grad_norm": 1.1793770404722181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379140 + }, + { + "epoch": 1.8388089813803314, + "grad_norm": 8.285097941040931e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379150 + }, + { + "epoch": 1.8388574795731674, + "grad_norm": 1.4934812497813255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379160 + }, + { + "epoch": 1.8389059777660035, + "grad_norm": 1.9396722450437665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379170 + }, + { + "epoch": 1.8389544759588397, + "grad_norm": 1.5179367096607166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379180 + }, + { + "epoch": 1.8390029741516756, + "grad_norm": 1.5714135770394932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379190 + }, + { + "epoch": 1.8390514723445117, + "grad_norm": 1.1228759255743626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379200 + }, + { + "epoch": 1.8390999705373479, + "grad_norm": 1.096958790469671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379210 + }, + { + "epoch": 1.8391484687301838, + "grad_norm": 9.746818463440832e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379220 + }, + { + "epoch": 1.8391969669230201, + "grad_norm": 1.3563456136012064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379230 + }, + { + "epoch": 1.839245465115856, + "grad_norm": 1.6319742002224302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379240 + }, + { + "epoch": 1.8392939633086922, + "grad_norm": 8.788606287168932e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379250 + }, + { + "epoch": 1.8393424615015284, + "grad_norm": 1.1055024451422923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379260 + }, + { + "epoch": 1.8393909596943643, + "grad_norm": 1.2838123453207118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379270 + }, + { + "epoch": 1.8394394578872004, + "grad_norm": 1.4506361445398852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379280 + }, + { + "epoch": 1.8394879560800366, + "grad_norm": 1.1336550365115272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379290 + }, + { + "epoch": 1.8395364542728727, + "grad_norm": 1.0386359328151684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379300 + }, + { + "epoch": 1.8395849524657089, + "grad_norm": 1.2075879851636273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379310 + }, + { + "epoch": 1.8396334506585448, + "grad_norm": 1.2419731909574239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379320 + }, + { + "epoch": 1.839681948851381, + "grad_norm": 1.0373809367081321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379330 + }, + { + "epoch": 1.839730447044217, + "grad_norm": 1.0327896760031763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379340 + }, + { + "epoch": 1.839778945237053, + "grad_norm": 1.912928304648176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379350 + }, + { + "epoch": 1.8398274434298894, + "grad_norm": 1.2090219492222332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379360 + }, + { + "epoch": 1.8398759416227253, + "grad_norm": 1.0432343877653238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379370 + }, + { + "epoch": 1.8399244398155614, + "grad_norm": 1.624170309355577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379380 + }, + { + "epoch": 1.8399729380083976, + "grad_norm": 1.1083161055580604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379390 + }, + { + "epoch": 1.8400214362012335, + "grad_norm": 1.434397045585456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379400 + }, + { + "epoch": 1.8400699343940696, + "grad_norm": 1.1197508698046477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379410 + }, + { + "epoch": 1.8401184325869058, + "grad_norm": 9.801579103907443e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379420 + }, + { + "epoch": 1.8401669307797417, + "grad_norm": 1.8620422537196646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379430 + }, + { + "epoch": 1.840215428972578, + "grad_norm": 9.303294135065698e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379440 + }, + { + "epoch": 1.840263927165414, + "grad_norm": 1.0102143122026064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379450 + }, + { + "epoch": 1.8403124253582501, + "grad_norm": 1.1193996840574982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379460 + }, + { + "epoch": 1.8403609235510863, + "grad_norm": 1.331562415884946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379470 + }, + { + "epoch": 1.8404094217439222, + "grad_norm": 1.61124820152736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379480 + }, + { + "epoch": 1.8404579199367583, + "grad_norm": 1.2527192616573757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379490 + }, + { + "epoch": 1.8405064181295945, + "grad_norm": 1.205112631907923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379500 + }, + { + "epoch": 1.8405549163224304, + "grad_norm": 1.1819270895330192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379510 + }, + { + "epoch": 1.8406034145152668, + "grad_norm": 9.43588940316431e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379520 + }, + { + "epoch": 1.8406519127081027, + "grad_norm": 1.2741786292735924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379530 + }, + { + "epoch": 1.8407004109009388, + "grad_norm": 1.168531316153576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379540 + }, + { + "epoch": 1.840748909093775, + "grad_norm": 1.519792469650838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379550 + }, + { + "epoch": 1.840797407286611, + "grad_norm": 1.494225543297034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379560 + }, + { + "epoch": 1.840845905479447, + "grad_norm": 1.150588335718794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379570 + }, + { + "epoch": 1.8408944036722832, + "grad_norm": 1.1471179561794997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379580 + }, + { + "epoch": 1.840942901865119, + "grad_norm": 1.771624980051456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379590 + }, + { + "epoch": 1.8409914000579555, + "grad_norm": 9.640607423477832e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379600 + }, + { + "epoch": 1.8410398982507914, + "grad_norm": 1.28824861889143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379610 + }, + { + "epoch": 1.8410883964436275, + "grad_norm": 1.3502845952473308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379620 + }, + { + "epoch": 1.8411368946364637, + "grad_norm": 1.2053368081410554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379630 + }, + { + "epoch": 1.8411853928292996, + "grad_norm": 1.2417523898022864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379640 + }, + { + "epoch": 1.8412338910221357, + "grad_norm": 1.7634780746789147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379650 + }, + { + "epoch": 1.841282389214972, + "grad_norm": 9.678925216860534e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379660 + }, + { + "epoch": 1.8413308874078078, + "grad_norm": 8.25957080508033e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379670 + }, + { + "epoch": 1.8413793856006442, + "grad_norm": 1.4765845435249503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379680 + }, + { + "epoch": 1.84142788379348, + "grad_norm": 1.0485303292284698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379690 + }, + { + "epoch": 1.8414763819863162, + "grad_norm": 1.4754051314014305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379700 + }, + { + "epoch": 1.8415248801791524, + "grad_norm": 1.6255668811027135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379710 + }, + { + "epoch": 1.8415733783719883, + "grad_norm": 1.8275203572670762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379720 + }, + { + "epoch": 1.8416218765648245, + "grad_norm": 1.3241232998950636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379730 + }, + { + "epoch": 1.8416703747576606, + "grad_norm": 1.062560084363895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379740 + }, + { + "epoch": 1.8417188729504965, + "grad_norm": 8.363122638854747e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379750 + }, + { + "epoch": 1.8417673711433329, + "grad_norm": 1.1029483992786027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379760 + }, + { + "epoch": 1.8418158693361688, + "grad_norm": 1.2714981068029374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379770 + }, + { + "epoch": 1.841864367529005, + "grad_norm": 1.114711967176163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379780 + }, + { + "epoch": 1.841912865721841, + "grad_norm": 1.077623856815535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379790 + }, + { + "epoch": 1.841961363914677, + "grad_norm": 1.215289646694373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379800 + }, + { + "epoch": 1.8420098621075134, + "grad_norm": 1.738485622126973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379810 + }, + { + "epoch": 1.8420583603003493, + "grad_norm": 1.276122674198632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379820 + }, + { + "epoch": 1.8421068584931855, + "grad_norm": 1.3933108888863899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379830 + }, + { + "epoch": 1.8421553566860216, + "grad_norm": 9.076522644591023e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379840 + }, + { + "epoch": 1.8422038548788575, + "grad_norm": 9.822444191343038e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379850 + }, + { + "epoch": 1.8422523530716937, + "grad_norm": 1.2288063899745794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379860 + }, + { + "epoch": 1.8423008512645298, + "grad_norm": 1.1745417971553707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379870 + }, + { + "epoch": 1.8423493494573657, + "grad_norm": 1.314801245655417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379880 + }, + { + "epoch": 1.842397847650202, + "grad_norm": 1.0655548443594398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379890 + }, + { + "epoch": 1.842446345843038, + "grad_norm": 9.31777410784207e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379900 + }, + { + "epoch": 1.8424948440358742, + "grad_norm": 1.2652209946395487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379910 + }, + { + "epoch": 1.8425433422287103, + "grad_norm": 1.1798524823802836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379920 + }, + { + "epoch": 1.8425918404215462, + "grad_norm": 9.35059052409315e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379930 + }, + { + "epoch": 1.8426403386143824, + "grad_norm": 1.2450224851079383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379940 + }, + { + "epoch": 1.8426888368072185, + "grad_norm": 1.0985057308232626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379950 + }, + { + "epoch": 1.8427373350000544, + "grad_norm": 1.1425627555183837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379960 + }, + { + "epoch": 1.8427858331928908, + "grad_norm": 8.659454486803497e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379970 + }, + { + "epoch": 1.8428343313857267, + "grad_norm": 1.2272076688191191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379980 + }, + { + "epoch": 1.8428828295785629, + "grad_norm": 1.0086619539606545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 379990 + }, + { + "epoch": 1.842931327771399, + "grad_norm": 1.5507836792494345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380000 + }, + { + "epoch": 1.842979825964235, + "grad_norm": 1.374890867822387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380010 + }, + { + "epoch": 1.843028324157071, + "grad_norm": 9.22068288389255e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380020 + }, + { + "epoch": 1.8430768223499072, + "grad_norm": 8.93694274139989e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380030 + }, + { + "epoch": 1.8431253205427431, + "grad_norm": 1.2793506698471901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380040 + }, + { + "epoch": 1.8431738187355795, + "grad_norm": 1.3681282773347903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380050 + }, + { + "epoch": 1.8432223169284154, + "grad_norm": 8.27781221346413e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380060 + }, + { + "epoch": 1.8432708151212516, + "grad_norm": 8.715614008281136e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380070 + }, + { + "epoch": 1.8433193133140877, + "grad_norm": 7.181727657012971e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380080 + }, + { + "epoch": 1.8433678115069236, + "grad_norm": 1.085312817394879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380090 + }, + { + "epoch": 1.8434163096997598, + "grad_norm": 8.271266338510941e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380100 + }, + { + "epoch": 1.843464807892596, + "grad_norm": 1.1717875558758806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380110 + }, + { + "epoch": 1.8435133060854318, + "grad_norm": 9.528991817830956e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380120 + }, + { + "epoch": 1.8435618042782682, + "grad_norm": 9.472182149750097e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380130 + }, + { + "epoch": 1.8436103024711041, + "grad_norm": 1.048905939882161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380140 + }, + { + "epoch": 1.8436588006639403, + "grad_norm": 1.2320957587519388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380150 + }, + { + "epoch": 1.8437072988567764, + "grad_norm": 8.247885929790755e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380160 + }, + { + "epoch": 1.8437557970496123, + "grad_norm": 1.3913035168400256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380170 + }, + { + "epoch": 1.8438042952424485, + "grad_norm": 9.237959730512557e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380180 + }, + { + "epoch": 1.8438527934352846, + "grad_norm": 1.5885111892544046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380190 + }, + { + "epoch": 1.8439012916281206, + "grad_norm": 1.1399630572839214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380200 + }, + { + "epoch": 1.843949789820957, + "grad_norm": 9.832700875733735e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380210 + }, + { + "epoch": 1.8439982880137928, + "grad_norm": 9.886785612422955e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380220 + }, + { + "epoch": 1.844046786206629, + "grad_norm": 9.2818472907652e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380230 + }, + { + "epoch": 1.8440952843994651, + "grad_norm": 1.2732864540510036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380240 + }, + { + "epoch": 1.844143782592301, + "grad_norm": 1.0373256031925848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380250 + }, + { + "epoch": 1.8441922807851372, + "grad_norm": 1.1922506537587196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380260 + }, + { + "epoch": 1.8442407789779733, + "grad_norm": 1.2161281759404119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380270 + }, + { + "epoch": 1.8442892771708093, + "grad_norm": 1.1614780248692114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380280 + }, + { + "epoch": 1.8443377753636456, + "grad_norm": 1.2211876843082337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380290 + }, + { + "epoch": 1.8443862735564815, + "grad_norm": 1.2497734402927563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380300 + }, + { + "epoch": 1.8444347717493177, + "grad_norm": 1.777273261893697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380310 + }, + { + "epoch": 1.8444832699421538, + "grad_norm": 1.025104445773195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380320 + }, + { + "epoch": 1.8445317681349898, + "grad_norm": 1.0114700188523784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380330 + }, + { + "epoch": 1.8445802663278261, + "grad_norm": 1.0029212127449227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380340 + }, + { + "epoch": 1.844628764520662, + "grad_norm": 1.0299896935350716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380350 + }, + { + "epoch": 1.8446772627134982, + "grad_norm": 1.5330842373373343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380360 + }, + { + "epoch": 1.8447257609063343, + "grad_norm": 1.0927363902624165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380370 + }, + { + "epoch": 1.8447742590991703, + "grad_norm": 1.1490147500126113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380380 + }, + { + "epoch": 1.8448227572920064, + "grad_norm": 1.0258961680165157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380390 + }, + { + "epoch": 1.8448712554848425, + "grad_norm": 9.14290776421467e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380400 + }, + { + "epoch": 1.8449197536776785, + "grad_norm": 1.0833358210504684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380410 + }, + { + "epoch": 1.8449682518705148, + "grad_norm": 1.5084800963904854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380420 + }, + { + "epoch": 1.8450167500633508, + "grad_norm": 6.19677376079153e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380430 + }, + { + "epoch": 1.845065248256187, + "grad_norm": 1.4652026258943351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380440 + }, + { + "epoch": 1.845113746449023, + "grad_norm": 1.9474702739330496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380450 + }, + { + "epoch": 1.845162244641859, + "grad_norm": 1.3626773487374066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380460 + }, + { + "epoch": 1.845210742834695, + "grad_norm": 8.58036752759972e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380470 + }, + { + "epoch": 1.8452592410275312, + "grad_norm": 1.0840473407824902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380480 + }, + { + "epoch": 1.8453077392203672, + "grad_norm": 1.4632978384554463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380490 + }, + { + "epoch": 1.8453562374132035, + "grad_norm": 9.16396292183208e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380500 + }, + { + "epoch": 1.8454047356060395, + "grad_norm": 1.274662597694487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380510 + }, + { + "epoch": 1.8454532337988756, + "grad_norm": 1.6900699506550154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380520 + }, + { + "epoch": 1.8455017319917117, + "grad_norm": 9.949153501054298e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380530 + }, + { + "epoch": 1.8455502301845477, + "grad_norm": 1.3341581173165196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380540 + }, + { + "epoch": 1.8455987283773838, + "grad_norm": 1.3777259333380698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380550 + }, + { + "epoch": 1.84564722657022, + "grad_norm": 1.5239265849231742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380560 + }, + { + "epoch": 1.8456957247630559, + "grad_norm": 1.4517481439213498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380570 + }, + { + "epoch": 1.8457442229558922, + "grad_norm": 1.5952146270592493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380580 + }, + { + "epoch": 1.8457927211487282, + "grad_norm": 1.7349956138446032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380590 + }, + { + "epoch": 1.8458412193415643, + "grad_norm": 1.4502851364284197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380600 + }, + { + "epoch": 1.8458897175344005, + "grad_norm": 7.835087245666728e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380610 + }, + { + "epoch": 1.8459382157272364, + "grad_norm": 1.1030416580126712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380620 + }, + { + "epoch": 1.8459867139200725, + "grad_norm": 8.105085491649788e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380630 + }, + { + "epoch": 1.8460352121129087, + "grad_norm": 1.7107964822571375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380640 + }, + { + "epoch": 1.8460837103057446, + "grad_norm": 1.2497663348653987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380650 + }, + { + "epoch": 1.846132208498581, + "grad_norm": 1.9689888830498603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380660 + }, + { + "epoch": 1.8461807066914169, + "grad_norm": 1.041856645400685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380670 + }, + { + "epoch": 1.846229204884253, + "grad_norm": 1.5168353684202884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380680 + }, + { + "epoch": 1.8462777030770892, + "grad_norm": 1.3768346462939007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380690 + }, + { + "epoch": 1.846326201269925, + "grad_norm": 1.1626085871796477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380700 + }, + { + "epoch": 1.8463746994627612, + "grad_norm": 1.2197426180193816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380710 + }, + { + "epoch": 1.8464231976555974, + "grad_norm": 9.509681042629836e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380720 + }, + { + "epoch": 1.8464716958484333, + "grad_norm": 1.665644866477578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380730 + }, + { + "epoch": 1.8465201940412697, + "grad_norm": 1.9518578753263682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380740 + }, + { + "epoch": 1.8465686922341056, + "grad_norm": 1.3925883557419638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380750 + }, + { + "epoch": 1.8466171904269417, + "grad_norm": 1.1147033518454919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380760 + }, + { + "epoch": 1.8466656886197779, + "grad_norm": 1.1064173577324254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380770 + }, + { + "epoch": 1.8467141868126138, + "grad_norm": 1.2667568327628942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380780 + }, + { + "epoch": 1.84676268500545, + "grad_norm": 1.2128084314610987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380790 + }, + { + "epoch": 1.846811183198286, + "grad_norm": 7.549301628273497e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380800 + }, + { + "epoch": 1.846859681391122, + "grad_norm": 1.470084232124691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380810 + }, + { + "epoch": 1.8469081795839584, + "grad_norm": 1.4888351884678741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380820 + }, + { + "epoch": 1.8469566777767943, + "grad_norm": 1.2864795451150712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380830 + }, + { + "epoch": 1.8470051759696304, + "grad_norm": 1.2610604116503055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380840 + }, + { + "epoch": 1.8470536741624666, + "grad_norm": 1.4357597777348019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380850 + }, + { + "epoch": 1.8471021723553025, + "grad_norm": 1.11871312213907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380860 + }, + { + "epoch": 1.8471506705481389, + "grad_norm": 1.0812901685142151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380870 + }, + { + "epoch": 1.8471991687409748, + "grad_norm": 1.1444230452184456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380880 + }, + { + "epoch": 1.847247666933811, + "grad_norm": 1.1210357975244278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380890 + }, + { + "epoch": 1.847296165126647, + "grad_norm": 8.860580713587751e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380900 + }, + { + "epoch": 1.847344663319483, + "grad_norm": 1.3183343305911421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380910 + }, + { + "epoch": 1.8473931615123191, + "grad_norm": 1.0715516474135711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380920 + }, + { + "epoch": 1.8474416597051553, + "grad_norm": 1.5509948880776392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380930 + }, + { + "epoch": 1.8474901578979912, + "grad_norm": 1.4339375020711032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380940 + }, + { + "epoch": 1.8475386560908276, + "grad_norm": 1.0674801487198238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380950 + }, + { + "epoch": 1.8475871542836635, + "grad_norm": 8.396619399775318e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380960 + }, + { + "epoch": 1.8476356524764996, + "grad_norm": 1.3791777497829116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380970 + }, + { + "epoch": 1.8476841506693358, + "grad_norm": 1.2586518494117627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380980 + }, + { + "epoch": 1.8477326488621717, + "grad_norm": 1.2403014615358643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 380990 + }, + { + "epoch": 1.8477811470550078, + "grad_norm": 1.5573380807154535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381000 + }, + { + "epoch": 1.847829645247844, + "grad_norm": 1.0339453737628901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381010 + }, + { + "epoch": 1.84787814344068, + "grad_norm": 7.946987956586327e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381020 + }, + { + "epoch": 1.8479266416335163, + "grad_norm": 1.4525071811988255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381030 + }, + { + "epoch": 1.8479751398263522, + "grad_norm": 1.4341175358367764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381040 + }, + { + "epoch": 1.8480236380191883, + "grad_norm": 1.2023893880552805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381050 + }, + { + "epoch": 1.8480721362120245, + "grad_norm": 1.3652893038340608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381060 + }, + { + "epoch": 1.8481206344048604, + "grad_norm": 1.0501551628294692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381070 + }, + { + "epoch": 1.8481691325976966, + "grad_norm": 1.1336473981771178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381080 + }, + { + "epoch": 1.8482176307905327, + "grad_norm": 1.7233126925475517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381090 + }, + { + "epoch": 1.8482661289833686, + "grad_norm": 1.2580185781985165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381100 + }, + { + "epoch": 1.848314627176205, + "grad_norm": 9.660704236580386e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381110 + }, + { + "epoch": 1.848363125369041, + "grad_norm": 8.713482380073856e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381120 + }, + { + "epoch": 1.848411623561877, + "grad_norm": 9.173163562081754e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381130 + }, + { + "epoch": 1.8484601217547132, + "grad_norm": 9.376177168007871e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381140 + }, + { + "epoch": 1.8485086199475491, + "grad_norm": 6.819889097897658e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381150 + }, + { + "epoch": 1.8485571181403853, + "grad_norm": 1.3041380420020232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381160 + }, + { + "epoch": 1.8486056163332214, + "grad_norm": 7.63015073346196e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381170 + }, + { + "epoch": 1.8486541145260573, + "grad_norm": 1.608768762650925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381180 + }, + { + "epoch": 1.8487026127188937, + "grad_norm": 1.151429618317934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381190 + }, + { + "epoch": 1.8487511109117296, + "grad_norm": 1.240376334976645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381200 + }, + { + "epoch": 1.8487996091045658, + "grad_norm": 9.83191750236756e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381210 + }, + { + "epoch": 1.848848107297402, + "grad_norm": 1.035314056707648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381220 + }, + { + "epoch": 1.8488966054902378, + "grad_norm": 1.514623804155235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381230 + }, + { + "epoch": 1.848945103683074, + "grad_norm": 1.4512651524967168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381240 + }, + { + "epoch": 1.84899360187591, + "grad_norm": 9.963676106394814e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381250 + }, + { + "epoch": 1.849042100068746, + "grad_norm": 1.2519553393985916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381260 + }, + { + "epoch": 1.8490905982615824, + "grad_norm": 1.0350141188553152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381270 + }, + { + "epoch": 1.8491390964544183, + "grad_norm": 8.14216338795859e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381280 + }, + { + "epoch": 1.8491875946472545, + "grad_norm": 1.2820689399006824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381290 + }, + { + "epoch": 1.8492360928400906, + "grad_norm": 1.1208306283094771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381300 + }, + { + "epoch": 1.8492845910329265, + "grad_norm": 1.3917446750610907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381310 + }, + { + "epoch": 1.8493330892257627, + "grad_norm": 1.3605998105958861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381320 + }, + { + "epoch": 1.8493815874185988, + "grad_norm": 1.3191331582618204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381330 + }, + { + "epoch": 1.8494300856114347, + "grad_norm": 1.515465619661427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381340 + }, + { + "epoch": 1.849478583804271, + "grad_norm": 9.248561028130098e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381350 + }, + { + "epoch": 1.849527081997107, + "grad_norm": 1.0480992074235473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381360 + }, + { + "epoch": 1.8495755801899432, + "grad_norm": 1.5832149813377328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381370 + }, + { + "epoch": 1.8496240783827793, + "grad_norm": 7.035809712618857e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381380 + }, + { + "epoch": 1.8496725765756152, + "grad_norm": 1.278364880619165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381390 + }, + { + "epoch": 1.8497210747684516, + "grad_norm": 1.4739446108080756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381400 + }, + { + "epoch": 1.8497695729612875, + "grad_norm": 1.0891795909628854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381410 + }, + { + "epoch": 1.8498180711541237, + "grad_norm": 1.1172359037914248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381420 + }, + { + "epoch": 1.8498665693469598, + "grad_norm": 1.0642372316738147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381430 + }, + { + "epoch": 1.8499150675397957, + "grad_norm": 1.161467277910333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381440 + }, + { + "epoch": 1.8499635657326319, + "grad_norm": 1.3563711931396938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381450 + }, + { + "epoch": 1.850012063925468, + "grad_norm": 1.0773709036016044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381460 + }, + { + "epoch": 1.850060562118304, + "grad_norm": 1.0884317447334979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381470 + }, + { + "epoch": 1.8501090603111403, + "grad_norm": 8.89446649665615e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381480 + }, + { + "epoch": 1.8501575585039762, + "grad_norm": 1.102127011876064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381490 + }, + { + "epoch": 1.8502060566968124, + "grad_norm": 1.665298121622527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381500 + }, + { + "epoch": 1.8502545548896485, + "grad_norm": 1.3521236574831619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381510 + }, + { + "epoch": 1.8503030530824844, + "grad_norm": 1.2177530983592533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381520 + }, + { + "epoch": 1.8503515512753206, + "grad_norm": 1.03995834166426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381530 + }, + { + "epoch": 1.8504000494681567, + "grad_norm": 1.5089300475779055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381540 + }, + { + "epoch": 1.8504485476609926, + "grad_norm": 1.6430027116598467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381550 + }, + { + "epoch": 1.850497045853829, + "grad_norm": 1.0132340300117448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381560 + }, + { + "epoch": 1.850545544046665, + "grad_norm": 1.0370620806554598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381570 + }, + { + "epoch": 1.850594042239501, + "grad_norm": 1.614278311024009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381580 + }, + { + "epoch": 1.8506425404323372, + "grad_norm": 1.2924436632033576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381590 + }, + { + "epoch": 1.8506910386251731, + "grad_norm": 1.6384475998165726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381600 + }, + { + "epoch": 1.8507395368180093, + "grad_norm": 7.842759330856097e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381610 + }, + { + "epoch": 1.8507880350108454, + "grad_norm": 1.0231116398529139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381620 + }, + { + "epoch": 1.8508365332036814, + "grad_norm": 1.3215490035634048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381630 + }, + { + "epoch": 1.8508850313965177, + "grad_norm": 8.65796767612892e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381640 + }, + { + "epoch": 1.8509335295893536, + "grad_norm": 1.2087791212422871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381650 + }, + { + "epoch": 1.8509820277821898, + "grad_norm": 8.453886479742323e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381660 + }, + { + "epoch": 1.851030525975026, + "grad_norm": 9.140868506563038e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381670 + }, + { + "epoch": 1.8510790241678619, + "grad_norm": 1.0707204900484157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381680 + }, + { + "epoch": 1.851127522360698, + "grad_norm": 1.1065062643922374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381690 + }, + { + "epoch": 1.8511760205535341, + "grad_norm": 1.0917148074440775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381700 + }, + { + "epoch": 1.85122451874637, + "grad_norm": 9.420425328698911e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381710 + }, + { + "epoch": 1.8512730169392064, + "grad_norm": 1.0088378132877551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381720 + }, + { + "epoch": 1.8513215151320424, + "grad_norm": 1.660260195990304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381730 + }, + { + "epoch": 1.8513700133248785, + "grad_norm": 1.3626695327673133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381740 + }, + { + "epoch": 1.8514185115177146, + "grad_norm": 1.1895312290732818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381750 + }, + { + "epoch": 1.8514670097105506, + "grad_norm": 2.433745116547925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381760 + }, + { + "epoch": 1.8515155079033867, + "grad_norm": 1.4026538153188994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381770 + }, + { + "epoch": 1.8515640060962228, + "grad_norm": 1.510471570043137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381780 + }, + { + "epoch": 1.8516125042890588, + "grad_norm": 1.4886021304505448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381790 + }, + { + "epoch": 1.8516610024818951, + "grad_norm": 1.1005061750779532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381800 + }, + { + "epoch": 1.851709500674731, + "grad_norm": 1.2743045729735059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381810 + }, + { + "epoch": 1.8517579988675672, + "grad_norm": 1.3079970884177783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381820 + }, + { + "epoch": 1.8518064970604033, + "grad_norm": 1.8266607781924904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381830 + }, + { + "epoch": 1.8518549952532393, + "grad_norm": 1.02656496636655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381840 + }, + { + "epoch": 1.8519034934460754, + "grad_norm": 8.550484764668909e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381850 + }, + { + "epoch": 1.8519519916389116, + "grad_norm": 7.738075069596562e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381860 + }, + { + "epoch": 1.8520004898317477, + "grad_norm": 2.793649755972183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381870 + }, + { + "epoch": 1.8520489880245838, + "grad_norm": 6.777229888399461e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381880 + }, + { + "epoch": 1.8520974862174198, + "grad_norm": 9.977419779261254e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381890 + }, + { + "epoch": 1.852145984410256, + "grad_norm": 9.278280366231684e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381900 + }, + { + "epoch": 1.852194482603092, + "grad_norm": 1.273196215123562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381910 + }, + { + "epoch": 1.852242980795928, + "grad_norm": 1.1564661228646855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381920 + }, + { + "epoch": 1.8522914789887643, + "grad_norm": 1.6308801420450436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381930 + }, + { + "epoch": 1.8523399771816003, + "grad_norm": 1.4319096131032438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381940 + }, + { + "epoch": 1.8523884753744364, + "grad_norm": 1.370148794421766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381950 + }, + { + "epoch": 1.8524369735672725, + "grad_norm": 1.8490959874384316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381960 + }, + { + "epoch": 1.8524854717601085, + "grad_norm": 9.531679445728969e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381970 + }, + { + "epoch": 1.8525339699529446, + "grad_norm": 7.2618484558972796e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381980 + }, + { + "epoch": 1.8525824681457808, + "grad_norm": 9.01427377186792e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 381990 + }, + { + "epoch": 1.8526309663386167, + "grad_norm": 1.2371648594466933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382000 + }, + { + "epoch": 1.852679464531453, + "grad_norm": 1.2435022789247796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382010 + }, + { + "epoch": 1.852727962724289, + "grad_norm": 8.484573932321382e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382020 + }, + { + "epoch": 1.8527764609171251, + "grad_norm": 1.907792324118418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382030 + }, + { + "epoch": 1.8528249591099613, + "grad_norm": 8.429694275946531e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382040 + }, + { + "epoch": 1.8528734573027972, + "grad_norm": 1.6228449695177005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382050 + }, + { + "epoch": 1.8529219554956333, + "grad_norm": 1.2936031801302761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382060 + }, + { + "epoch": 1.8529704536884695, + "grad_norm": 1.0755334400869287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382070 + }, + { + "epoch": 1.8530189518813054, + "grad_norm": 1.0309062936642022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382080 + }, + { + "epoch": 1.8530674500741418, + "grad_norm": 1.3102837925771382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382090 + }, + { + "epoch": 1.8531159482669777, + "grad_norm": 1.1388237020071301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382100 + }, + { + "epoch": 1.8531644464598138, + "grad_norm": 1.4570231243737908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382110 + }, + { + "epoch": 1.85321294465265, + "grad_norm": 1.0240447601006508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382120 + }, + { + "epoch": 1.8532614428454859, + "grad_norm": 1.3216950200956035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382130 + }, + { + "epoch": 1.853309941038322, + "grad_norm": 1.1662200094519903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382140 + }, + { + "epoch": 1.8533584392311582, + "grad_norm": 2.047709735109038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382150 + }, + { + "epoch": 1.853406937423994, + "grad_norm": 1.600063903595128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382160 + }, + { + "epoch": 1.8534554356168305, + "grad_norm": 1.4524001556992516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382170 + }, + { + "epoch": 1.8535039338096664, + "grad_norm": 1.036631935846799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382180 + }, + { + "epoch": 1.8535524320025025, + "grad_norm": 1.990679443508725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382190 + }, + { + "epoch": 1.8536009301953387, + "grad_norm": 8.48052383872755e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382200 + }, + { + "epoch": 1.8536494283881746, + "grad_norm": 1.2056071696520121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382210 + }, + { + "epoch": 1.8536979265810107, + "grad_norm": 1.2779220348591025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382220 + }, + { + "epoch": 1.8537464247738469, + "grad_norm": 1.616410116866973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382230 + }, + { + "epoch": 1.8537949229666828, + "grad_norm": 1.1717866676974609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382240 + }, + { + "epoch": 1.8538434211595192, + "grad_norm": 1.1606952732279296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382250 + }, + { + "epoch": 1.853891919352355, + "grad_norm": 1.0530211369541576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382260 + }, + { + "epoch": 1.8539404175451912, + "grad_norm": 1.644477087836549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382270 + }, + { + "epoch": 1.8539889157380274, + "grad_norm": 1.244202962880081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382280 + }, + { + "epoch": 1.8540374139308633, + "grad_norm": 1.0297816821491779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382290 + }, + { + "epoch": 1.8540859121236994, + "grad_norm": 1.3071856486135403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382300 + }, + { + "epoch": 1.8541344103165356, + "grad_norm": 1.3293788292401132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382310 + }, + { + "epoch": 1.8541829085093715, + "grad_norm": 8.833993092594028e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382320 + }, + { + "epoch": 1.8542314067022079, + "grad_norm": 1.386128278824117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382330 + }, + { + "epoch": 1.8542799048950438, + "grad_norm": 1.068957367067469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382340 + }, + { + "epoch": 1.85432840308788, + "grad_norm": 1.6221083143364012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382350 + }, + { + "epoch": 1.854376901280716, + "grad_norm": 9.497662212254454e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382360 + }, + { + "epoch": 1.854425399473552, + "grad_norm": 1.0768900438051787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382370 + }, + { + "epoch": 1.8544738976663884, + "grad_norm": 1.4032907280636664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382380 + }, + { + "epoch": 1.8545223958592243, + "grad_norm": 8.365476311666953e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382390 + }, + { + "epoch": 1.8545708940520604, + "grad_norm": 1.368682767122209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382400 + }, + { + "epoch": 1.8546193922448966, + "grad_norm": 1.5127540109460824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382410 + }, + { + "epoch": 1.8546678904377325, + "grad_norm": 1.0034779229783908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382420 + }, + { + "epoch": 1.8547163886305686, + "grad_norm": 1.173210684157766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382430 + }, + { + "epoch": 1.8547648868234048, + "grad_norm": 1.0170232656037115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382440 + }, + { + "epoch": 1.8548133850162407, + "grad_norm": 1.3739568593962304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382450 + }, + { + "epoch": 1.854861883209077, + "grad_norm": 1.1556114287714081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382460 + }, + { + "epoch": 1.854910381401913, + "grad_norm": 7.867456019994279e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382470 + }, + { + "epoch": 1.8549588795947491, + "grad_norm": 9.490895180874759e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382480 + }, + { + "epoch": 1.8550073777875853, + "grad_norm": 9.043917614803831e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382490 + }, + { + "epoch": 1.8550558759804212, + "grad_norm": 9.243631637900762e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382500 + }, + { + "epoch": 1.8551043741732574, + "grad_norm": 1.5723660595767797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382510 + }, + { + "epoch": 1.8551528723660935, + "grad_norm": 1.3062906312200084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382520 + }, + { + "epoch": 1.8552013705589294, + "grad_norm": 1.7040518329736187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382530 + }, + { + "epoch": 1.8552498687517658, + "grad_norm": 1.6220067067251875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382540 + }, + { + "epoch": 1.8552983669446017, + "grad_norm": 7.929686240970568e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382550 + }, + { + "epoch": 1.8553468651374379, + "grad_norm": 1.3912500484991597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382560 + }, + { + "epoch": 1.855395363330274, + "grad_norm": 9.563478009511073e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382570 + }, + { + "epoch": 1.85544386152311, + "grad_norm": 1.7763930770797742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382580 + }, + { + "epoch": 1.855492359715946, + "grad_norm": 1.011788341997999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382590 + }, + { + "epoch": 1.8555408579087822, + "grad_norm": 1.2924202152930775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382600 + }, + { + "epoch": 1.8555893561016181, + "grad_norm": 1.6938662028564977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382610 + }, + { + "epoch": 1.8556378542944545, + "grad_norm": 1.0317791954150834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382620 + }, + { + "epoch": 1.8556863524872904, + "grad_norm": 1.3028498280220902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382630 + }, + { + "epoch": 1.8557348506801266, + "grad_norm": 1.226192303249718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382640 + }, + { + "epoch": 1.8557833488729627, + "grad_norm": 1.1955129330942782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382650 + }, + { + "epoch": 1.8558318470657986, + "grad_norm": 2.0823282653736896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382660 + }, + { + "epoch": 1.8558803452586348, + "grad_norm": 1.4202806042362681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382670 + }, + { + "epoch": 1.855928843451471, + "grad_norm": 1.0405464045959434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382680 + }, + { + "epoch": 1.8559773416443068, + "grad_norm": 1.7736411450641754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382690 + }, + { + "epoch": 1.8560258398371432, + "grad_norm": 2.396728682185767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382700 + }, + { + "epoch": 1.8560743380299791, + "grad_norm": 1.1197994531642053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382710 + }, + { + "epoch": 1.8561228362228153, + "grad_norm": 9.056623007097642e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382720 + }, + { + "epoch": 1.8561713344156514, + "grad_norm": 1.0177520159970754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382730 + }, + { + "epoch": 1.8562198326084873, + "grad_norm": 1.0382785298190811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382740 + }, + { + "epoch": 1.8562683308013235, + "grad_norm": 1.8516766786547123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382750 + }, + { + "epoch": 1.8563168289941596, + "grad_norm": 1.1394747367887703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382760 + }, + { + "epoch": 1.8563653271869955, + "grad_norm": 1.3704366530475909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382770 + }, + { + "epoch": 1.856413825379832, + "grad_norm": 1.4731255326694281e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382780 + }, + { + "epoch": 1.8564623235726678, + "grad_norm": 1.4121958713531058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382790 + }, + { + "epoch": 1.856510821765504, + "grad_norm": 1.4422848693129708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382800 + }, + { + "epoch": 1.8565593199583401, + "grad_norm": 1.0084090007467239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382810 + }, + { + "epoch": 1.856607818151176, + "grad_norm": 1.091544188369653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382820 + }, + { + "epoch": 1.8566563163440122, + "grad_norm": 1.1562362622896671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382830 + }, + { + "epoch": 1.8567048145368483, + "grad_norm": 1.5935532005073583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382840 + }, + { + "epoch": 1.8567533127296842, + "grad_norm": 9.621001773041371e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382850 + }, + { + "epoch": 1.8568018109225206, + "grad_norm": 9.351071028618207e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382860 + }, + { + "epoch": 1.8568503091153565, + "grad_norm": 1.1038060243606651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382870 + }, + { + "epoch": 1.8568988073081927, + "grad_norm": 1.5580617684918252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382880 + }, + { + "epoch": 1.8569473055010288, + "grad_norm": 1.0666484584476166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382890 + }, + { + "epoch": 1.8569958036938647, + "grad_norm": 6.357404824797186e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382900 + }, + { + "epoch": 1.857044301886701, + "grad_norm": 1.288458584269847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382910 + }, + { + "epoch": 1.857092800079537, + "grad_norm": 1.388148085368357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382920 + }, + { + "epoch": 1.8571412982723732, + "grad_norm": 9.636774045418406e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382930 + }, + { + "epoch": 1.8571897964652093, + "grad_norm": 9.902803910222246e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382940 + }, + { + "epoch": 1.8572382946580452, + "grad_norm": 9.67212709923615e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382950 + }, + { + "epoch": 1.8572867928508814, + "grad_norm": 1.528625226399072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382960 + }, + { + "epoch": 1.8573352910437175, + "grad_norm": 1.0936484606816066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382970 + }, + { + "epoch": 1.8573837892365535, + "grad_norm": 1.1825037837809305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382980 + }, + { + "epoch": 1.8574322874293898, + "grad_norm": 1.1391534826543648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 382990 + }, + { + "epoch": 1.8574807856222257, + "grad_norm": 1.4090834277169506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383000 + }, + { + "epoch": 1.8575292838150619, + "grad_norm": 2.044199476358699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383010 + }, + { + "epoch": 1.857577782007898, + "grad_norm": 1.1723829018706056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383020 + }, + { + "epoch": 1.857626280200734, + "grad_norm": 1.3194718206932521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383030 + }, + { + "epoch": 1.85767477839357, + "grad_norm": 6.977010080788659e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383040 + }, + { + "epoch": 1.8577232765864062, + "grad_norm": 9.239140119632339e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383050 + }, + { + "epoch": 1.8577717747792422, + "grad_norm": 2.3655633896169093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383060 + }, + { + "epoch": 1.8578202729720785, + "grad_norm": 1.4123481939520843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383070 + }, + { + "epoch": 1.8578687711649144, + "grad_norm": 1.0703905317654971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383080 + }, + { + "epoch": 1.8579172693577506, + "grad_norm": 1.4360010958114344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383090 + }, + { + "epoch": 1.8579657675505867, + "grad_norm": 1.1765857621526266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383100 + }, + { + "epoch": 1.8580142657434227, + "grad_norm": 9.97562388249662e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383110 + }, + { + "epoch": 1.8580627639362588, + "grad_norm": 8.486152225373189e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383120 + }, + { + "epoch": 1.858111262129095, + "grad_norm": 1.2501160107092346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383130 + }, + { + "epoch": 1.8581597603219309, + "grad_norm": 9.368825715228013e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383140 + }, + { + "epoch": 1.8582082585147672, + "grad_norm": 1.4139051707218186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383150 + }, + { + "epoch": 1.8582567567076032, + "grad_norm": 1.1682517175870544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383160 + }, + { + "epoch": 1.8583052549004393, + "grad_norm": 1.919462455646226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383170 + }, + { + "epoch": 1.8583537530932754, + "grad_norm": 9.112492982410458e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383180 + }, + { + "epoch": 1.8584022512861114, + "grad_norm": 1.1700874047448906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383190 + }, + { + "epoch": 1.8584507494789475, + "grad_norm": 1.698530205374027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383200 + }, + { + "epoch": 1.8584992476717836, + "grad_norm": 9.776950804507578e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383210 + }, + { + "epoch": 1.8585477458646196, + "grad_norm": 9.181186477746905e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383220 + }, + { + "epoch": 1.858596244057456, + "grad_norm": 1.426023921169417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383230 + }, + { + "epoch": 1.8586447422502919, + "grad_norm": 1.053570564124584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383240 + }, + { + "epoch": 1.858693240443128, + "grad_norm": 1.1535670196849424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383250 + }, + { + "epoch": 1.8587417386359641, + "grad_norm": 8.55646486996875e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383260 + }, + { + "epoch": 1.8587902368288, + "grad_norm": 1.2213214439782405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383270 + }, + { + "epoch": 1.8588387350216362, + "grad_norm": 1.4028284311962125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383280 + }, + { + "epoch": 1.8588872332144724, + "grad_norm": 9.265833433858006e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383290 + }, + { + "epoch": 1.8589357314073083, + "grad_norm": 1.0459220156633364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383300 + }, + { + "epoch": 1.8589842296001446, + "grad_norm": 1.0672616568285775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383310 + }, + { + "epoch": 1.8590327277929806, + "grad_norm": 1.0041357967338627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383320 + }, + { + "epoch": 1.8590812259858167, + "grad_norm": 1.2984209263322555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383330 + }, + { + "epoch": 1.8591297241786529, + "grad_norm": 1.264278370882721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383340 + }, + { + "epoch": 1.8591782223714888, + "grad_norm": 8.946425822387027e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383350 + }, + { + "epoch": 1.859226720564325, + "grad_norm": 1.751431355501154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383360 + }, + { + "epoch": 1.859275218757161, + "grad_norm": 1.1735393101730551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383370 + }, + { + "epoch": 1.859323716949997, + "grad_norm": 1.4089513555859412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383380 + }, + { + "epoch": 1.8593722151428334, + "grad_norm": 8.843739074393397e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383390 + }, + { + "epoch": 1.8594207133356693, + "grad_norm": 1.1454384107878468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383400 + }, + { + "epoch": 1.8594692115285054, + "grad_norm": 1.4827318928212208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383410 + }, + { + "epoch": 1.8595177097213416, + "grad_norm": 1.2599521426182037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383420 + }, + { + "epoch": 1.8595662079141775, + "grad_norm": 8.985787225412878e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383430 + }, + { + "epoch": 1.8596147061070138, + "grad_norm": 8.118165695236712e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383440 + }, + { + "epoch": 1.8596632042998498, + "grad_norm": 1.1960479717743056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383450 + }, + { + "epoch": 1.859711702492686, + "grad_norm": 1.2772132684801818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383460 + }, + { + "epoch": 1.859760200685522, + "grad_norm": 1.2578960095765979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383470 + }, + { + "epoch": 1.859808698878358, + "grad_norm": 1.0686685314453825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383480 + }, + { + "epoch": 1.8598571970711941, + "grad_norm": 1.9704392784092306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383490 + }, + { + "epoch": 1.8599056952640303, + "grad_norm": 8.879486479429488e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383500 + }, + { + "epoch": 1.8599541934568662, + "grad_norm": 2.0806966816167005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383510 + }, + { + "epoch": 1.8600026916497026, + "grad_norm": 1.1932678845028022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383520 + }, + { + "epoch": 1.8600511898425385, + "grad_norm": 8.979378129936322e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383530 + }, + { + "epoch": 1.8600996880353746, + "grad_norm": 8.722875755040604e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383540 + }, + { + "epoch": 1.8601481862282108, + "grad_norm": 1.1184146941900508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383550 + }, + { + "epoch": 1.8601966844210467, + "grad_norm": 1.156811268998581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383560 + }, + { + "epoch": 1.8602451826138828, + "grad_norm": 9.926763411272077e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383570 + }, + { + "epoch": 1.860293680806719, + "grad_norm": 1.2276362149066244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383580 + }, + { + "epoch": 1.860342178999555, + "grad_norm": 1.1719643033814009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383590 + }, + { + "epoch": 1.8603906771923913, + "grad_norm": 1.814168903990776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383600 + }, + { + "epoch": 1.8604391753852272, + "grad_norm": 1.754117739949379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383610 + }, + { + "epoch": 1.8604876735780633, + "grad_norm": 1.5394602925766776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383620 + }, + { + "epoch": 1.8605361717708995, + "grad_norm": 8.966600795190516e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383630 + }, + { + "epoch": 1.8605846699637354, + "grad_norm": 1.0885305101737686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383640 + }, + { + "epoch": 1.8606331681565715, + "grad_norm": 1.5225989358214065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383650 + }, + { + "epoch": 1.8606816663494077, + "grad_norm": 1.2528829529401264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383660 + }, + { + "epoch": 1.8607301645422436, + "grad_norm": 1.4948604132314358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383670 + }, + { + "epoch": 1.86077866273508, + "grad_norm": 8.381791261058424e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383680 + }, + { + "epoch": 1.860827160927916, + "grad_norm": 2.5634802724994188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383690 + }, + { + "epoch": 1.860875659120752, + "grad_norm": 1.0399821448459079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383700 + }, + { + "epoch": 1.8609241573135882, + "grad_norm": 1.1099689167792803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383710 + }, + { + "epoch": 1.860972655506424, + "grad_norm": 1.2143898331373748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383720 + }, + { + "epoch": 1.8610211536992602, + "grad_norm": 1.1873983574162139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383730 + }, + { + "epoch": 1.8610696518920964, + "grad_norm": 1.3915167684785956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383740 + }, + { + "epoch": 1.8611181500849323, + "grad_norm": 1.5377196405097493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383750 + }, + { + "epoch": 1.8611666482777687, + "grad_norm": 1.3213478311513427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383760 + }, + { + "epoch": 1.8612151464706046, + "grad_norm": 1.0088717417033877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383770 + }, + { + "epoch": 1.8612636446634407, + "grad_norm": 1.1087305296086924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383780 + }, + { + "epoch": 1.8613121428562769, + "grad_norm": 1.5814210385656224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383790 + }, + { + "epoch": 1.8613606410491128, + "grad_norm": 1.0757211121870114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383800 + }, + { + "epoch": 1.861409139241949, + "grad_norm": 1.511792646624599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383810 + }, + { + "epoch": 1.861457637434785, + "grad_norm": 1.4892053812332051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383820 + }, + { + "epoch": 1.861506135627621, + "grad_norm": 8.755523417391942e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383830 + }, + { + "epoch": 1.8615546338204574, + "grad_norm": 1.0851228360309051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383840 + }, + { + "epoch": 1.8616031320132933, + "grad_norm": 1.589786080558042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383850 + }, + { + "epoch": 1.8616516302061294, + "grad_norm": 9.828078795237616e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383860 + }, + { + "epoch": 1.8617001283989656, + "grad_norm": 1.0003043726669603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383870 + }, + { + "epoch": 1.8617486265918015, + "grad_norm": 1.6184667828156307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383880 + }, + { + "epoch": 1.8617971247846377, + "grad_norm": 1.1059127835721938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383890 + }, + { + "epoch": 1.8618456229774738, + "grad_norm": 9.816002233264953e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383900 + }, + { + "epoch": 1.86189412117031, + "grad_norm": 1.8674358059911356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383910 + }, + { + "epoch": 1.861942619363146, + "grad_norm": 1.401135474310422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383920 + }, + { + "epoch": 1.861991117555982, + "grad_norm": 1.6523030055282106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383930 + }, + { + "epoch": 1.8620396157488182, + "grad_norm": 9.914410625810888e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383940 + }, + { + "epoch": 1.8620881139416543, + "grad_norm": 1.0184915133493178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383950 + }, + { + "epoch": 1.8621366121344902, + "grad_norm": 1.2213990707721223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383960 + }, + { + "epoch": 1.8621851103273266, + "grad_norm": 1.3162870793337333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383970 + }, + { + "epoch": 1.8622336085201625, + "grad_norm": 1.2077438604762847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383980 + }, + { + "epoch": 1.8622821067129987, + "grad_norm": 1.0718330223369321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 383990 + }, + { + "epoch": 1.8623306049058348, + "grad_norm": 1.1307837333163206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384000 + }, + { + "epoch": 1.8623791030986707, + "grad_norm": 1.1256330090247957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384010 + }, + { + "epoch": 1.8624276012915069, + "grad_norm": 2.39028032922306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384020 + }, + { + "epoch": 1.862476099484343, + "grad_norm": 9.75490799248746e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384030 + }, + { + "epoch": 1.862524597677179, + "grad_norm": 1.1222807572153215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384040 + }, + { + "epoch": 1.8625730958700153, + "grad_norm": 1.3308828705760334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384050 + }, + { + "epoch": 1.8626215940628512, + "grad_norm": 1.1658050524943064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384060 + }, + { + "epoch": 1.8626700922556874, + "grad_norm": 2.2550043610181092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384070 + }, + { + "epoch": 1.8627185904485235, + "grad_norm": 1.2852845010513647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384080 + }, + { + "epoch": 1.8627670886413594, + "grad_norm": 1.3161381318127496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384090 + }, + { + "epoch": 1.8628155868341956, + "grad_norm": 8.867083955976796e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384100 + }, + { + "epoch": 1.8628640850270317, + "grad_norm": 1.8953592473280878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384110 + }, + { + "epoch": 1.8629125832198676, + "grad_norm": 8.046963095864612e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384120 + }, + { + "epoch": 1.862961081412704, + "grad_norm": 1.535777904848601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384130 + }, + { + "epoch": 1.86300957960554, + "grad_norm": 8.878933144274015e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384140 + }, + { + "epoch": 1.863058077798376, + "grad_norm": 9.142572920950442e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384150 + }, + { + "epoch": 1.8631065759912122, + "grad_norm": 1.0578070863687117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384160 + }, + { + "epoch": 1.8631550741840481, + "grad_norm": 1.1512828024251576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384170 + }, + { + "epoch": 1.8632035723768843, + "grad_norm": 8.165627285450228e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384180 + }, + { + "epoch": 1.8632520705697204, + "grad_norm": 1.6150384141155882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384190 + }, + { + "epoch": 1.8633005687625563, + "grad_norm": 1.1674496924740652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384200 + }, + { + "epoch": 1.8633490669553927, + "grad_norm": 1.5591210100751596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384210 + }, + { + "epoch": 1.8633975651482286, + "grad_norm": 1.1752920414664914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384220 + }, + { + "epoch": 1.8634460633410648, + "grad_norm": 1.3854536184965127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384230 + }, + { + "epoch": 1.863494561533901, + "grad_norm": 1.3396086018246933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384240 + }, + { + "epoch": 1.8635430597267368, + "grad_norm": 1.0404288097731751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384250 + }, + { + "epoch": 1.863591557919573, + "grad_norm": 1.8124087119986143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384260 + }, + { + "epoch": 1.8636400561124091, + "grad_norm": 9.508904774691018e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384270 + }, + { + "epoch": 1.863688554305245, + "grad_norm": 2.4730439918130287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384280 + }, + { + "epoch": 1.8637370524980814, + "grad_norm": 9.554918634080423e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384290 + }, + { + "epoch": 1.8637855506909173, + "grad_norm": 8.306666465784929e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384300 + }, + { + "epoch": 1.8638340488837535, + "grad_norm": 8.837922393922781e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384310 + }, + { + "epoch": 1.8638825470765896, + "grad_norm": 1.3754987371328298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384320 + }, + { + "epoch": 1.8639310452694255, + "grad_norm": 9.67638946747229e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384330 + }, + { + "epoch": 1.8639795434622617, + "grad_norm": 1.755208245413087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384340 + }, + { + "epoch": 1.8640280416550978, + "grad_norm": 1.979353037029341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384350 + }, + { + "epoch": 1.8640765398479338, + "grad_norm": 1.9672460993547247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384360 + }, + { + "epoch": 1.8641250380407701, + "grad_norm": 1.1310065772818234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384370 + }, + { + "epoch": 1.864173536233606, + "grad_norm": 1.4995318764476906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384380 + }, + { + "epoch": 1.8642220344264422, + "grad_norm": 1.4212806043190085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384390 + }, + { + "epoch": 1.8642705326192783, + "grad_norm": 1.0130079886039312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384400 + }, + { + "epoch": 1.8643190308121143, + "grad_norm": 5.9094055870900775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384410 + }, + { + "epoch": 1.8643675290049506, + "grad_norm": 1.0955700346926278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384420 + }, + { + "epoch": 1.8644160271977865, + "grad_norm": 8.570248510864076e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384430 + }, + { + "epoch": 1.8644645253906227, + "grad_norm": 1.5154963506347485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384440 + }, + { + "epoch": 1.8645130235834588, + "grad_norm": 1.4396104752734118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384450 + }, + { + "epoch": 1.8645615217762948, + "grad_norm": 1.1269336575026045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384460 + }, + { + "epoch": 1.864610019969131, + "grad_norm": 1.0863748123313144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384470 + }, + { + "epoch": 1.864658518161967, + "grad_norm": 9.656684341052824e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384480 + }, + { + "epoch": 1.864707016354803, + "grad_norm": 8.330260925504263e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384490 + }, + { + "epoch": 1.8647555145476393, + "grad_norm": 1.6053606444188517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384500 + }, + { + "epoch": 1.8648040127404752, + "grad_norm": 1.4639640610880633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384510 + }, + { + "epoch": 1.8648525109333114, + "grad_norm": 9.0438350142108e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384520 + }, + { + "epoch": 1.8649010091261475, + "grad_norm": 9.287377977784672e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384530 + }, + { + "epoch": 1.8649495073189835, + "grad_norm": 1.51925920732765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384540 + }, + { + "epoch": 1.8649980055118196, + "grad_norm": 2.0687684454401278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384550 + }, + { + "epoch": 1.8650465037046557, + "grad_norm": 1.4145804527743167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384560 + }, + { + "epoch": 1.8650950018974917, + "grad_norm": 1.0267625860649332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384570 + }, + { + "epoch": 1.865143500090328, + "grad_norm": 8.425211639462304e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384580 + }, + { + "epoch": 1.865191998283164, + "grad_norm": 1.6995889140503095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384590 + }, + { + "epoch": 1.865240496476, + "grad_norm": 1.0898375535361993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384600 + }, + { + "epoch": 1.8652889946688362, + "grad_norm": 1.2116517567051233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384610 + }, + { + "epoch": 1.8653374928616722, + "grad_norm": 1.542394478803999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384620 + }, + { + "epoch": 1.8653859910545083, + "grad_norm": 1.1629663454471029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384630 + }, + { + "epoch": 1.8654344892473445, + "grad_norm": 1.08323039427205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384640 + }, + { + "epoch": 1.8654829874401804, + "grad_norm": 1.067061994319829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384650 + }, + { + "epoch": 1.8655314856330167, + "grad_norm": 1.84215824816647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384660 + }, + { + "epoch": 1.8655799838258527, + "grad_norm": 1.1484069695200105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384670 + }, + { + "epoch": 1.8656284820186888, + "grad_norm": 1.4790489721860922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384680 + }, + { + "epoch": 1.865676980211525, + "grad_norm": 9.116503996153824e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384690 + }, + { + "epoch": 1.8657254784043609, + "grad_norm": 9.270861411891929e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384700 + }, + { + "epoch": 1.865773976597197, + "grad_norm": 1.2101669000230686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384710 + }, + { + "epoch": 1.8658224747900332, + "grad_norm": 1.300622987088218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384720 + }, + { + "epoch": 1.865870972982869, + "grad_norm": 1.8397964041128034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384730 + }, + { + "epoch": 1.8659194711757054, + "grad_norm": 9.919522092616262e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384740 + }, + { + "epoch": 1.8659679693685414, + "grad_norm": 1.1987906667343395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384750 + }, + { + "epoch": 1.8660164675613775, + "grad_norm": 1.2271983429457123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384760 + }, + { + "epoch": 1.8660649657542137, + "grad_norm": 1.212837297259739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384770 + }, + { + "epoch": 1.8661134639470496, + "grad_norm": 9.47628908676279e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384780 + }, + { + "epoch": 1.8661619621398857, + "grad_norm": 9.892790586718547e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384790 + }, + { + "epoch": 1.8662104603327219, + "grad_norm": 1.041699704273924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384800 + }, + { + "epoch": 1.8662589585255578, + "grad_norm": 1.11043192418947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384810 + }, + { + "epoch": 1.8663074567183942, + "grad_norm": 1.8361054898718976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384820 + }, + { + "epoch": 1.86635595491123, + "grad_norm": 1.5970449851465673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384830 + }, + { + "epoch": 1.8664044531040662, + "grad_norm": 1.3661964004541005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384840 + }, + { + "epoch": 1.8664529512969024, + "grad_norm": 9.675343193293884e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384850 + }, + { + "epoch": 1.8665014494897383, + "grad_norm": 1.0748411050087725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384860 + }, + { + "epoch": 1.8665499476825744, + "grad_norm": 1.0318186305369181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384870 + }, + { + "epoch": 1.8665984458754106, + "grad_norm": 1.4943401183131755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384880 + }, + { + "epoch": 1.8666469440682465, + "grad_norm": 1.1349841955166085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384890 + }, + { + "epoch": 1.8666954422610829, + "grad_norm": 1.355662604396457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384900 + }, + { + "epoch": 1.8667439404539188, + "grad_norm": 1.1273933786526413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384910 + }, + { + "epoch": 1.866792438646755, + "grad_norm": 8.530833817133043e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384920 + }, + { + "epoch": 1.866840936839591, + "grad_norm": 1.0721644905231642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384930 + }, + { + "epoch": 1.866889435032427, + "grad_norm": 8.494111192192122e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384940 + }, + { + "epoch": 1.8669379332252634, + "grad_norm": 1.1927711263126639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384950 + }, + { + "epoch": 1.8669864314180993, + "grad_norm": 7.370509980120232e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384960 + }, + { + "epoch": 1.8670349296109354, + "grad_norm": 7.776476351750716e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384970 + }, + { + "epoch": 1.8670834278037716, + "grad_norm": 1.3309174207165597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384980 + }, + { + "epoch": 1.8671319259966075, + "grad_norm": 1.5685760246242353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 384990 + }, + { + "epoch": 1.8671804241894436, + "grad_norm": 2.4005709420293897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385000 + }, + { + "epoch": 1.8672289223822798, + "grad_norm": 1.1936928778766287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385010 + }, + { + "epoch": 1.8672774205751157, + "grad_norm": 1.674006355756319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385020 + }, + { + "epoch": 1.867325918767952, + "grad_norm": 1.1638814356729199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385030 + }, + { + "epoch": 1.867374416960788, + "grad_norm": 1.665265436656682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385040 + }, + { + "epoch": 1.8674229151536241, + "grad_norm": 1.0032420227901184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385050 + }, + { + "epoch": 1.8674714133464603, + "grad_norm": 7.628810472226633e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385060 + }, + { + "epoch": 1.8675199115392962, + "grad_norm": 1.5132167519027462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385070 + }, + { + "epoch": 1.8675684097321323, + "grad_norm": 1.1669015975712682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385080 + }, + { + "epoch": 1.8676169079249685, + "grad_norm": 1.1196625848697295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385090 + }, + { + "epoch": 1.8676654061178044, + "grad_norm": 1.1666063670645599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385100 + }, + { + "epoch": 1.8677139043106408, + "grad_norm": 9.964300495823863e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385110 + }, + { + "epoch": 1.8677624025034767, + "grad_norm": 9.036174475340886e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385120 + }, + { + "epoch": 1.8678109006963128, + "grad_norm": 1.337002952794819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385130 + }, + { + "epoch": 1.867859398889149, + "grad_norm": 1.3984176483461397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385140 + }, + { + "epoch": 1.867907897081985, + "grad_norm": 1.320016984607264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385150 + }, + { + "epoch": 1.867956395274821, + "grad_norm": 1.2214186106973557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385160 + }, + { + "epoch": 1.8680048934676572, + "grad_norm": 1.7631650806038124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385170 + }, + { + "epoch": 1.8680533916604931, + "grad_norm": 7.793916623199948e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385180 + }, + { + "epoch": 1.8681018898533295, + "grad_norm": 1.491131129682799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385190 + }, + { + "epoch": 1.8681503880461654, + "grad_norm": 7.618041308887769e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385200 + }, + { + "epoch": 1.8681988862390015, + "grad_norm": 1.2316915487531332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385210 + }, + { + "epoch": 1.8682473844318377, + "grad_norm": 1.0083183177300725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385220 + }, + { + "epoch": 1.8682958826246736, + "grad_norm": 1.0429491936747581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385230 + }, + { + "epoch": 1.8683443808175098, + "grad_norm": 9.578854154312921e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385240 + }, + { + "epoch": 1.868392879010346, + "grad_norm": 9.279487400704056e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385250 + }, + { + "epoch": 1.8684413772031818, + "grad_norm": 1.0679362283383398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385260 + }, + { + "epoch": 1.8684898753960182, + "grad_norm": 1.6404198888153587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385270 + }, + { + "epoch": 1.868538373588854, + "grad_norm": 6.789639517279511e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385280 + }, + { + "epoch": 1.8685868717816903, + "grad_norm": 1.1379837516756197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385290 + }, + { + "epoch": 1.8686353699745264, + "grad_norm": 1.1512450548423203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385300 + }, + { + "epoch": 1.8686838681673623, + "grad_norm": 1.3297965395508982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385310 + }, + { + "epoch": 1.8687323663601985, + "grad_norm": 1.229239643407709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385320 + }, + { + "epoch": 1.8687808645530346, + "grad_norm": 1.510616698396916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385330 + }, + { + "epoch": 1.8688293627458705, + "grad_norm": 1.0604379596657054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385340 + }, + { + "epoch": 1.868877860938707, + "grad_norm": 1.2066901255991525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385350 + }, + { + "epoch": 1.8689263591315428, + "grad_norm": 1.2113241076860959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385360 + }, + { + "epoch": 1.868974857324379, + "grad_norm": 1.5881136405937468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385370 + }, + { + "epoch": 1.869023355517215, + "grad_norm": 1.0582846599049844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385380 + }, + { + "epoch": 1.869071853710051, + "grad_norm": 1.086796697080672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385390 + }, + { + "epoch": 1.8691203519028872, + "grad_norm": 1.4384897717434342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385400 + }, + { + "epoch": 1.8691688500957233, + "grad_norm": 9.670176659426488e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385410 + }, + { + "epoch": 1.8692173482885592, + "grad_norm": 1.7184254019753098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385420 + }, + { + "epoch": 1.8692658464813956, + "grad_norm": 1.2293772222449206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385430 + }, + { + "epoch": 1.8693143446742315, + "grad_norm": 9.194675243406891e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385440 + }, + { + "epoch": 1.8693628428670677, + "grad_norm": 1.823454454097373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385450 + }, + { + "epoch": 1.8694113410599038, + "grad_norm": 1.480586675484119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385460 + }, + { + "epoch": 1.8694598392527397, + "grad_norm": 1.514568204186162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385470 + }, + { + "epoch": 1.869508337445576, + "grad_norm": 1.3611135329938406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385480 + }, + { + "epoch": 1.869556835638412, + "grad_norm": 1.1046497938593802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385490 + }, + { + "epoch": 1.8696053338312482, + "grad_norm": 9.00814445259357e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385500 + }, + { + "epoch": 1.8696538320240843, + "grad_norm": 1.466455845644532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385510 + }, + { + "epoch": 1.8697023302169202, + "grad_norm": 1.1509309949531144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385520 + }, + { + "epoch": 1.8697508284097564, + "grad_norm": 8.875475465686122e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385530 + }, + { + "epoch": 1.8697993266025925, + "grad_norm": 9.83271597476687e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385540 + }, + { + "epoch": 1.8698478247954284, + "grad_norm": 2.0427590285976294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385550 + }, + { + "epoch": 1.8698963229882648, + "grad_norm": 1.1248629583349157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385560 + }, + { + "epoch": 1.8699448211811007, + "grad_norm": 1.2497175738701571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385570 + }, + { + "epoch": 1.8699933193739369, + "grad_norm": 1.2663512904964591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385580 + }, + { + "epoch": 1.870041817566773, + "grad_norm": 9.75106040357332e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385590 + }, + { + "epoch": 1.870090315759609, + "grad_norm": 1.6502506028359676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385600 + }, + { + "epoch": 1.870138813952445, + "grad_norm": 1.2740016153145461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385610 + }, + { + "epoch": 1.8701873121452812, + "grad_norm": 1.1824937473647879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385620 + }, + { + "epoch": 1.8702358103381171, + "grad_norm": 1.152842621365835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385630 + }, + { + "epoch": 1.8702843085309535, + "grad_norm": 7.058896134282122e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385640 + }, + { + "epoch": 1.8703328067237894, + "grad_norm": 1.8148806901763237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385650 + }, + { + "epoch": 1.8703813049166256, + "grad_norm": 5.604185115970495e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385660 + }, + { + "epoch": 1.8704298031094617, + "grad_norm": 1.127723159299876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385670 + }, + { + "epoch": 1.8704783013022976, + "grad_norm": 1.966471963044114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385680 + }, + { + "epoch": 1.8705267994951338, + "grad_norm": 1.0689763740856506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385690 + }, + { + "epoch": 1.87057529768797, + "grad_norm": 1.0746374456971353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385700 + }, + { + "epoch": 1.8706237958808059, + "grad_norm": 1.2630678725145117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385710 + }, + { + "epoch": 1.8706722940736422, + "grad_norm": 1.0568753872064462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385720 + }, + { + "epoch": 1.8707207922664781, + "grad_norm": 1.1503103358734279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385730 + }, + { + "epoch": 1.8707692904593143, + "grad_norm": 6.767840954324811e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385740 + }, + { + "epoch": 1.8708177886521504, + "grad_norm": 1.5938811159799116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385750 + }, + { + "epoch": 1.8708662868449863, + "grad_norm": 1.0817038820221114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385760 + }, + { + "epoch": 1.8709147850378225, + "grad_norm": 9.99333771289912e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385770 + }, + { + "epoch": 1.8709632832306586, + "grad_norm": 1.4079141408274154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385780 + }, + { + "epoch": 1.8710117814234946, + "grad_norm": 9.135961320794195e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385790 + }, + { + "epoch": 1.871060279616331, + "grad_norm": 1.1301856339684946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385800 + }, + { + "epoch": 1.8711087778091668, + "grad_norm": 1.1573240144002739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385810 + }, + { + "epoch": 1.871157276002003, + "grad_norm": 8.117302385812764e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385820 + }, + { + "epoch": 1.8712057741948391, + "grad_norm": 1.2698979645620057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385830 + }, + { + "epoch": 1.871254272387675, + "grad_norm": 7.162545223593497e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385840 + }, + { + "epoch": 1.8713027705805112, + "grad_norm": 1.5579091794393207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385850 + }, + { + "epoch": 1.8713512687733473, + "grad_norm": 1.5757018800854894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385860 + }, + { + "epoch": 1.8713997669661833, + "grad_norm": 1.281454142798566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385870 + }, + { + "epoch": 1.8714482651590196, + "grad_norm": 9.43911970807676e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385880 + }, + { + "epoch": 1.8714967633518556, + "grad_norm": 1.3452211788944624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385890 + }, + { + "epoch": 1.8715452615446917, + "grad_norm": 1.1793438225993214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385900 + }, + { + "epoch": 1.8715937597375278, + "grad_norm": 1.390656656496958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385910 + }, + { + "epoch": 1.8716422579303638, + "grad_norm": 9.195674444129054e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385920 + }, + { + "epoch": 1.8716907561232, + "grad_norm": 1.2881688604693409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385930 + }, + { + "epoch": 1.871739254316036, + "grad_norm": 7.71595320969709e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385940 + }, + { + "epoch": 1.8717877525088722, + "grad_norm": 2.3826441264418463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385950 + }, + { + "epoch": 1.8718362507017083, + "grad_norm": 1.3921709118847048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385960 + }, + { + "epoch": 1.8718847488945443, + "grad_norm": 1.1842281821827783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385970 + }, + { + "epoch": 1.8719332470873804, + "grad_norm": 1.342206967791526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385980 + }, + { + "epoch": 1.8719817452802165, + "grad_norm": 1.8238546672932898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 385990 + }, + { + "epoch": 1.8720302434730525, + "grad_norm": 1.1053869819477313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386000 + }, + { + "epoch": 1.8720787416658888, + "grad_norm": 1.0665162086809232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386010 + }, + { + "epoch": 1.8721272398587248, + "grad_norm": 9.217682617190803e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386020 + }, + { + "epoch": 1.872175738051561, + "grad_norm": 1.5484024729062185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386030 + }, + { + "epoch": 1.872224236244397, + "grad_norm": 9.779232534867788e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386040 + }, + { + "epoch": 1.872272734437233, + "grad_norm": 1.4087499167203532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386050 + }, + { + "epoch": 1.872321232630069, + "grad_norm": 1.1315917980425638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386060 + }, + { + "epoch": 1.8723697308229053, + "grad_norm": 1.1268673993924949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386070 + }, + { + "epoch": 1.8724182290157412, + "grad_norm": 1.2700542839638729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386080 + }, + { + "epoch": 1.8724667272085775, + "grad_norm": 1.4010513638140765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386090 + }, + { + "epoch": 1.8725152254014135, + "grad_norm": 9.382055132789446e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386100 + }, + { + "epoch": 1.8725637235942496, + "grad_norm": 1.1854952575163225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386110 + }, + { + "epoch": 1.8726122217870858, + "grad_norm": 1.3192070547063395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386120 + }, + { + "epoch": 1.8726607199799217, + "grad_norm": 1.6394647417428132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386130 + }, + { + "epoch": 1.8727092181727578, + "grad_norm": 1.4703587680742203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386140 + }, + { + "epoch": 1.872757716365594, + "grad_norm": 1.3253166564197727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386150 + }, + { + "epoch": 1.8728062145584299, + "grad_norm": 1.4617250521098413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386160 + }, + { + "epoch": 1.8728547127512662, + "grad_norm": 7.158254433647926e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386170 + }, + { + "epoch": 1.8729032109441022, + "grad_norm": 8.636634518666142e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386180 + }, + { + "epoch": 1.8729517091369383, + "grad_norm": 8.59250892659702e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386190 + }, + { + "epoch": 1.8730002073297745, + "grad_norm": 9.794416833130981e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386200 + }, + { + "epoch": 1.8730487055226104, + "grad_norm": 1.1763539475850848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386210 + }, + { + "epoch": 1.8730972037154465, + "grad_norm": 1.336641641813685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386220 + }, + { + "epoch": 1.8731457019082827, + "grad_norm": 1.0876430422968042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386230 + }, + { + "epoch": 1.8731942001011186, + "grad_norm": 1.3823759914544098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386240 + }, + { + "epoch": 1.873242698293955, + "grad_norm": 1.2626209411337186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386250 + }, + { + "epoch": 1.8732911964867909, + "grad_norm": 1.7360353155027042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386260 + }, + { + "epoch": 1.873339694679627, + "grad_norm": 8.111088689588541e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386270 + }, + { + "epoch": 1.8733881928724632, + "grad_norm": 1.2058372966805564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386280 + }, + { + "epoch": 1.873436691065299, + "grad_norm": 1.3225634809543863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386290 + }, + { + "epoch": 1.8734851892581352, + "grad_norm": 1.3608261184572257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386300 + }, + { + "epoch": 1.8735336874509714, + "grad_norm": 1.1639834873733435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386310 + }, + { + "epoch": 1.8735821856438073, + "grad_norm": 1.3087159800306836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386320 + }, + { + "epoch": 1.8736306838366437, + "grad_norm": 1.2724166609245913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386330 + }, + { + "epoch": 1.8736791820294796, + "grad_norm": 1.0569416453165559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386340 + }, + { + "epoch": 1.8737276802223157, + "grad_norm": 1.268341787152849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386350 + }, + { + "epoch": 1.8737761784151519, + "grad_norm": 1.2937678484092885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386360 + }, + { + "epoch": 1.8738246766079878, + "grad_norm": 1.412956418533895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386370 + }, + { + "epoch": 1.873873174800824, + "grad_norm": 1.203155175488746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386380 + }, + { + "epoch": 1.87392167299366, + "grad_norm": 1.2554636441564071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386390 + }, + { + "epoch": 1.873970171186496, + "grad_norm": 1.0209979528497115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386400 + }, + { + "epoch": 1.8740186693793324, + "grad_norm": 1.314721487233328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386410 + }, + { + "epoch": 1.8740671675721683, + "grad_norm": 1.2922391157133006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386420 + }, + { + "epoch": 1.8741156657650044, + "grad_norm": 1.521703119067297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386430 + }, + { + "epoch": 1.8741641639578406, + "grad_norm": 1.793359416524254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386440 + }, + { + "epoch": 1.8742126621506765, + "grad_norm": 1.158665163814021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386450 + }, + { + "epoch": 1.8742611603435126, + "grad_norm": 1.5933895980424495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386460 + }, + { + "epoch": 1.8743096585363488, + "grad_norm": 1.7718567946189978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386470 + }, + { + "epoch": 1.874358156729185, + "grad_norm": 1.3148816258023999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386480 + }, + { + "epoch": 1.874406654922021, + "grad_norm": 1.199837917909008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386490 + }, + { + "epoch": 1.874455153114857, + "grad_norm": 8.519189798050775e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386500 + }, + { + "epoch": 1.8745036513076931, + "grad_norm": 1.499080859446167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386510 + }, + { + "epoch": 1.8745521495005293, + "grad_norm": 8.957307784385193e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386520 + }, + { + "epoch": 1.8746006476933652, + "grad_norm": 9.660564792568493e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386530 + }, + { + "epoch": 1.8746491458862016, + "grad_norm": 1.0020060337012637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386540 + }, + { + "epoch": 1.8746976440790375, + "grad_norm": 1.4152264249389646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386550 + }, + { + "epoch": 1.8747461422718736, + "grad_norm": 1.1256541476711845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386560 + }, + { + "epoch": 1.8747946404647098, + "grad_norm": 7.988077754816914e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386570 + }, + { + "epoch": 1.8748431386575457, + "grad_norm": 1.2425569906326928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386580 + }, + { + "epoch": 1.8748916368503818, + "grad_norm": 9.207226980834093e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386590 + }, + { + "epoch": 1.874940135043218, + "grad_norm": 1.3039280766236061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386600 + }, + { + "epoch": 1.874988633236054, + "grad_norm": 7.667120272003558e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386610 + }, + { + "epoch": 1.8750371314288903, + "grad_norm": 1.0706591169196145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386620 + }, + { + "epoch": 1.8750856296217262, + "grad_norm": 1.705277341557121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386630 + }, + { + "epoch": 1.8751341278145623, + "grad_norm": 1.1992250747994149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386640 + }, + { + "epoch": 1.8751826260073985, + "grad_norm": 1.2702252583096652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386650 + }, + { + "epoch": 1.8752311242002344, + "grad_norm": 1.4434466066859386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386660 + }, + { + "epoch": 1.8752796223930706, + "grad_norm": 1.4990868990594208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386670 + }, + { + "epoch": 1.8753281205859067, + "grad_norm": 1.2560400719507925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386680 + }, + { + "epoch": 1.8753766187787426, + "grad_norm": 8.298020937047568e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386690 + }, + { + "epoch": 1.875425116971579, + "grad_norm": 1.1481268380464371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386700 + }, + { + "epoch": 1.875473615164415, + "grad_norm": 1.1707182778764036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386710 + }, + { + "epoch": 1.875522113357251, + "grad_norm": 1.0570226471884325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386720 + }, + { + "epoch": 1.8755706115500872, + "grad_norm": 9.945082979356812e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386730 + }, + { + "epoch": 1.8756191097429231, + "grad_norm": 8.79521522278992e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386740 + }, + { + "epoch": 1.8756676079357593, + "grad_norm": 9.2541831975268e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386750 + }, + { + "epoch": 1.8757161061285954, + "grad_norm": 8.885852054163479e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386760 + }, + { + "epoch": 1.8757646043214313, + "grad_norm": 1.371899660540521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386770 + }, + { + "epoch": 1.8758131025142677, + "grad_norm": 1.0362811053710175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386780 + }, + { + "epoch": 1.8758616007071036, + "grad_norm": 1.2610525068623701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386790 + }, + { + "epoch": 1.8759100988999398, + "grad_norm": 1.1444318381848007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386800 + }, + { + "epoch": 1.875958597092776, + "grad_norm": 1.4744829357482558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386810 + }, + { + "epoch": 1.8760070952856118, + "grad_norm": 1.4852688856592522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386820 + }, + { + "epoch": 1.876055593478448, + "grad_norm": 1.0274638917451284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386830 + }, + { + "epoch": 1.8761040916712841, + "grad_norm": 8.310315102733057e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386840 + }, + { + "epoch": 1.87615258986412, + "grad_norm": 2.0160289437853862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386850 + }, + { + "epoch": 1.8762010880569564, + "grad_norm": 1.2424175466207998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386860 + }, + { + "epoch": 1.8762495862497923, + "grad_norm": 9.390589639224345e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386870 + }, + { + "epoch": 1.8762980844426285, + "grad_norm": 1.1576325675832777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386880 + }, + { + "epoch": 1.8763465826354646, + "grad_norm": 1.3383963270996446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386890 + }, + { + "epoch": 1.8763950808283005, + "grad_norm": 1.1796047694190293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386900 + }, + { + "epoch": 1.8764435790211367, + "grad_norm": 1.6450524498168306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386910 + }, + { + "epoch": 1.8764920772139728, + "grad_norm": 1.4817018723078945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386920 + }, + { + "epoch": 1.8765405754068087, + "grad_norm": 7.324086670479346e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386930 + }, + { + "epoch": 1.876589073599645, + "grad_norm": 1.8066302232000453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386940 + }, + { + "epoch": 1.876637571792481, + "grad_norm": 1.2542084704136869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386950 + }, + { + "epoch": 1.8766860699853172, + "grad_norm": 1.4857717722804864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386960 + }, + { + "epoch": 1.8767345681781533, + "grad_norm": 8.004094276259366e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386970 + }, + { + "epoch": 1.8767830663709892, + "grad_norm": 1.1522486076387395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386980 + }, + { + "epoch": 1.8768315645638256, + "grad_norm": 1.1258568299865601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 386990 + }, + { + "epoch": 1.8768800627566615, + "grad_norm": 9.94212445704079e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387000 + }, + { + "epoch": 1.8769285609494977, + "grad_norm": 1.3076791205435256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387010 + }, + { + "epoch": 1.8769770591423338, + "grad_norm": 1.2929130654981691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387020 + }, + { + "epoch": 1.8770255573351697, + "grad_norm": 1.0559879193294819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387030 + }, + { + "epoch": 1.8770740555280059, + "grad_norm": 8.978007670634724e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387040 + }, + { + "epoch": 1.877122553720842, + "grad_norm": 1.0939428030098952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387050 + }, + { + "epoch": 1.877171051913678, + "grad_norm": 9.748731599756866e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387060 + }, + { + "epoch": 1.8772195501065143, + "grad_norm": 9.158703129230616e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387070 + }, + { + "epoch": 1.8772680482993502, + "grad_norm": 9.2851024646734e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387080 + }, + { + "epoch": 1.8773165464921864, + "grad_norm": 1.2123864578938992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387090 + }, + { + "epoch": 1.8773650446850225, + "grad_norm": 1.0806828321108242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387100 + }, + { + "epoch": 1.8774135428778584, + "grad_norm": 8.215562452562608e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387110 + }, + { + "epoch": 1.8774620410706946, + "grad_norm": 1.311211317300831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387120 + }, + { + "epoch": 1.8775105392635307, + "grad_norm": 7.598650597628875e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387130 + }, + { + "epoch": 1.8775590374563667, + "grad_norm": 8.599103651363293e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387140 + }, + { + "epoch": 1.877607535649203, + "grad_norm": 1.0651445059295384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387150 + }, + { + "epoch": 1.877656033842039, + "grad_norm": 2.1500921931760786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387160 + }, + { + "epoch": 1.877704532034875, + "grad_norm": 8.350896862907575e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387170 + }, + { + "epoch": 1.8777530302277112, + "grad_norm": 1.868213850286793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387180 + }, + { + "epoch": 1.8778015284205472, + "grad_norm": 1.0074506562318675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387190 + }, + { + "epoch": 1.8778500266133833, + "grad_norm": 1.187685239045777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387200 + }, + { + "epoch": 1.8778985248062194, + "grad_norm": 1.0345281964418973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387210 + }, + { + "epoch": 1.8779470229990554, + "grad_norm": 1.0015868134871653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387220 + }, + { + "epoch": 1.8779955211918917, + "grad_norm": 1.0788941295913901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387230 + }, + { + "epoch": 1.8780440193847276, + "grad_norm": 1.7891311543394295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387240 + }, + { + "epoch": 1.8780925175775638, + "grad_norm": 9.277584922529059e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387250 + }, + { + "epoch": 1.8781410157704, + "grad_norm": 8.83860806766279e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387260 + }, + { + "epoch": 1.8781895139632359, + "grad_norm": 8.133755891037708e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387270 + }, + { + "epoch": 1.878238012156072, + "grad_norm": 1.0121178561917077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387280 + }, + { + "epoch": 1.8782865103489081, + "grad_norm": 1.0860297550152609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387290 + }, + { + "epoch": 1.878335008541744, + "grad_norm": 8.894375014278921e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387300 + }, + { + "epoch": 1.8783835067345804, + "grad_norm": 1.2001863503030563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387310 + }, + { + "epoch": 1.8784320049274164, + "grad_norm": 1.331846721797092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387320 + }, + { + "epoch": 1.8784805031202525, + "grad_norm": 1.4999487873978978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387330 + }, + { + "epoch": 1.8785290013130886, + "grad_norm": 1.2087190803811154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387340 + }, + { + "epoch": 1.8785774995059246, + "grad_norm": 7.471346208376417e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387350 + }, + { + "epoch": 1.8786259976987607, + "grad_norm": 8.222532876800415e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387360 + }, + { + "epoch": 1.8786744958915969, + "grad_norm": 9.350417329301308e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387370 + }, + { + "epoch": 1.8787229940844328, + "grad_norm": 9.074375029172188e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387380 + }, + { + "epoch": 1.8787714922772691, + "grad_norm": 1.1013315592833806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387390 + }, + { + "epoch": 1.878819990470105, + "grad_norm": 1.3516284091963371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387400 + }, + { + "epoch": 1.8788684886629412, + "grad_norm": 8.021934227997463e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387410 + }, + { + "epoch": 1.8789169868557773, + "grad_norm": 1.3673831844585038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387420 + }, + { + "epoch": 1.8789654850486133, + "grad_norm": 8.439317689123982e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387430 + }, + { + "epoch": 1.8790139832414494, + "grad_norm": 1.0430997399168973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387440 + }, + { + "epoch": 1.8790624814342856, + "grad_norm": 9.729121508428307e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387450 + }, + { + "epoch": 1.8791109796271215, + "grad_norm": 1.0798705041281664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387460 + }, + { + "epoch": 1.8791594778199578, + "grad_norm": 6.769645288784432e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387470 + }, + { + "epoch": 1.8792079760127938, + "grad_norm": 1.749775790926833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387480 + }, + { + "epoch": 1.87925647420563, + "grad_norm": 1.0266989036722407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387490 + }, + { + "epoch": 1.879304972398466, + "grad_norm": 8.735670853354804e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387500 + }, + { + "epoch": 1.879353470591302, + "grad_norm": 1.4630028744022638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387510 + }, + { + "epoch": 1.8794019687841383, + "grad_norm": 1.2588398767832132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387520 + }, + { + "epoch": 1.8794504669769743, + "grad_norm": 1.1284980949710643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387530 + }, + { + "epoch": 1.8794989651698104, + "grad_norm": 1.1934506716215765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387540 + }, + { + "epoch": 1.8795474633626466, + "grad_norm": 1.3002419585461666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387550 + }, + { + "epoch": 1.8795959615554825, + "grad_norm": 1.5417345622381617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387560 + }, + { + "epoch": 1.8796444597483186, + "grad_norm": 1.790690262737371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387570 + }, + { + "epoch": 1.8796929579411548, + "grad_norm": 1.5477269244001945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387580 + }, + { + "epoch": 1.8797414561339907, + "grad_norm": 1.3034592960536884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387590 + }, + { + "epoch": 1.879789954326827, + "grad_norm": 1.4838516193549367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387600 + }, + { + "epoch": 1.879838452519663, + "grad_norm": 1.06093995810852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387610 + }, + { + "epoch": 1.8798869507124991, + "grad_norm": 1.4398122694103677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387620 + }, + { + "epoch": 1.8799354489053353, + "grad_norm": 8.277796226252576e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387630 + }, + { + "epoch": 1.8799839470981712, + "grad_norm": 1.6093480326162535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387640 + }, + { + "epoch": 1.8800324452910073, + "grad_norm": 8.055418554420157e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387650 + }, + { + "epoch": 1.8800809434838435, + "grad_norm": 8.339632096010519e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387660 + }, + { + "epoch": 1.8801294416766794, + "grad_norm": 1.3325002434783073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387670 + }, + { + "epoch": 1.8801779398695158, + "grad_norm": 1.078210232208221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387680 + }, + { + "epoch": 1.8802264380623517, + "grad_norm": 6.846847533381606e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387690 + }, + { + "epoch": 1.8802749362551878, + "grad_norm": 7.339756358248906e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387700 + }, + { + "epoch": 1.880323434448024, + "grad_norm": 1.4848394513933272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387710 + }, + { + "epoch": 1.8803719326408599, + "grad_norm": 1.1728267246269297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387720 + }, + { + "epoch": 1.880420430833696, + "grad_norm": 9.000686418403347e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387730 + }, + { + "epoch": 1.8804689290265322, + "grad_norm": 1.2558318829292148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387740 + }, + { + "epoch": 1.880517427219368, + "grad_norm": 1.1780242559211729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387750 + }, + { + "epoch": 1.8805659254122045, + "grad_norm": 1.493802770369257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387760 + }, + { + "epoch": 1.8806144236050404, + "grad_norm": 9.917327403741183e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387770 + }, + { + "epoch": 1.8806629217978765, + "grad_norm": 1.3704597456865031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387780 + }, + { + "epoch": 1.8807114199907127, + "grad_norm": 1.3704406498504795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387790 + }, + { + "epoch": 1.8807599181835486, + "grad_norm": 1.4057956576607467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387800 + }, + { + "epoch": 1.8808084163763847, + "grad_norm": 1.0008657902460527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387810 + }, + { + "epoch": 1.8808569145692209, + "grad_norm": 1.6715308248649308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387820 + }, + { + "epoch": 1.8809054127620568, + "grad_norm": 7.867133611227928e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387830 + }, + { + "epoch": 1.8809539109548932, + "grad_norm": 1.242437974724453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387840 + }, + { + "epoch": 1.881002409147729, + "grad_norm": 1.1160898871764857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387850 + }, + { + "epoch": 1.8810509073405652, + "grad_norm": 1.6264399604892787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387860 + }, + { + "epoch": 1.8810994055334014, + "grad_norm": 1.374520763874898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387870 + }, + { + "epoch": 1.8811479037262373, + "grad_norm": 6.818659414875583e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387880 + }, + { + "epoch": 1.8811964019190734, + "grad_norm": 1.5307566769706682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387890 + }, + { + "epoch": 1.8812449001119096, + "grad_norm": 1.1505759900387602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387900 + }, + { + "epoch": 1.8812933983047455, + "grad_norm": 1.8799614309727986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387910 + }, + { + "epoch": 1.8813418964975819, + "grad_norm": 1.3391559861020141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387920 + }, + { + "epoch": 1.8813903946904178, + "grad_norm": 1.4290708172381983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387930 + }, + { + "epoch": 1.881438892883254, + "grad_norm": 1.615462252857469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387940 + }, + { + "epoch": 1.88148739107609, + "grad_norm": 1.4152822913615637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387950 + }, + { + "epoch": 1.881535889268926, + "grad_norm": 1.2438588825602892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387960 + }, + { + "epoch": 1.8815843874617622, + "grad_norm": 1.0413634399242255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387970 + }, + { + "epoch": 1.8816328856545983, + "grad_norm": 9.216518215282576e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387980 + }, + { + "epoch": 1.8816813838474342, + "grad_norm": 1.1527499843566602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 387990 + }, + { + "epoch": 1.8817298820402706, + "grad_norm": 1.1738428895569086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388000 + }, + { + "epoch": 1.8817783802331065, + "grad_norm": 1.2102407076497457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388010 + }, + { + "epoch": 1.8818268784259427, + "grad_norm": 1.1418404000096416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388020 + }, + { + "epoch": 1.8818753766187788, + "grad_norm": 8.975525211951663e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388030 + }, + { + "epoch": 1.8819238748116147, + "grad_norm": 1.2108894331674946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388040 + }, + { + "epoch": 1.881972373004451, + "grad_norm": 1.2497271661970899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388050 + }, + { + "epoch": 1.882020871197287, + "grad_norm": 1.0338518485752957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388060 + }, + { + "epoch": 1.8820693693901231, + "grad_norm": 1.501746105248003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388070 + }, + { + "epoch": 1.8821178675829593, + "grad_norm": 2.537021259740868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388080 + }, + { + "epoch": 1.8821663657757952, + "grad_norm": 1.2014716332942044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388090 + }, + { + "epoch": 1.8822148639686314, + "grad_norm": 1.1988082526670496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388100 + }, + { + "epoch": 1.8822633621614675, + "grad_norm": 1.3904579709844711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388110 + }, + { + "epoch": 1.8823118603543034, + "grad_norm": 1.776169078482326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388120 + }, + { + "epoch": 1.8823603585471398, + "grad_norm": 1.3144390464958633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388130 + }, + { + "epoch": 1.8824088567399757, + "grad_norm": 1.6585627093945732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388140 + }, + { + "epoch": 1.8824573549328119, + "grad_norm": 9.402000955560652e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388150 + }, + { + "epoch": 1.882505853125648, + "grad_norm": 9.962382918615731e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388160 + }, + { + "epoch": 1.882554351318484, + "grad_norm": 1.4861606167926311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388170 + }, + { + "epoch": 1.88260284951132, + "grad_norm": 1.3359374051447048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388180 + }, + { + "epoch": 1.8826513477041562, + "grad_norm": 1.404446781094748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388190 + }, + { + "epoch": 1.8826998458969921, + "grad_norm": 1.6329943619552978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388200 + }, + { + "epoch": 1.8827483440898285, + "grad_norm": 5.4568875640370607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388210 + }, + { + "epoch": 1.8827968422826644, + "grad_norm": 6.418456433010533e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388220 + }, + { + "epoch": 1.8828453404755006, + "grad_norm": 1.2524774994915333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388230 + }, + { + "epoch": 1.8828938386683367, + "grad_norm": 1.6681013903507846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388240 + }, + { + "epoch": 1.8829423368611726, + "grad_norm": 1.7427272069880928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388250 + }, + { + "epoch": 1.8829908350540088, + "grad_norm": 9.74895630889705e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388260 + }, + { + "epoch": 1.883039333246845, + "grad_norm": 8.784364347036444e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388270 + }, + { + "epoch": 1.8830878314396808, + "grad_norm": 1.0604266797997752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388280 + }, + { + "epoch": 1.8831363296325172, + "grad_norm": 1.636150592787544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388290 + }, + { + "epoch": 1.8831848278253531, + "grad_norm": 1.2512429314881501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388300 + }, + { + "epoch": 1.8832333260181893, + "grad_norm": 1.2882182431894762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388310 + }, + { + "epoch": 1.8832818242110254, + "grad_norm": 9.201380102297207e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388320 + }, + { + "epoch": 1.8833303224038613, + "grad_norm": 1.1836359448125222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388330 + }, + { + "epoch": 1.8833788205966975, + "grad_norm": 1.4154364791352236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388340 + }, + { + "epoch": 1.8834273187895336, + "grad_norm": 8.466372491966467e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388350 + }, + { + "epoch": 1.8834758169823695, + "grad_norm": 1.1591760440410326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388360 + }, + { + "epoch": 1.883524315175206, + "grad_norm": 1.265753546420001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388370 + }, + { + "epoch": 1.8835728133680418, + "grad_norm": 1.1801008170664318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388380 + }, + { + "epoch": 1.883621311560878, + "grad_norm": 8.18383494305408e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388390 + }, + { + "epoch": 1.8836698097537141, + "grad_norm": 1.2557592299344833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388400 + }, + { + "epoch": 1.88371830794655, + "grad_norm": 7.287717540549465e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388410 + }, + { + "epoch": 1.8837668061393862, + "grad_norm": 1.0811313622127727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388420 + }, + { + "epoch": 1.8838153043322223, + "grad_norm": 1.6222813314925588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388430 + }, + { + "epoch": 1.8838638025250583, + "grad_norm": 9.624214314385426e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388440 + }, + { + "epoch": 1.8839123007178946, + "grad_norm": 1.4054460706347527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388450 + }, + { + "epoch": 1.8839607989107305, + "grad_norm": 1.820139949870736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388460 + }, + { + "epoch": 1.8840092971035667, + "grad_norm": 1.2638631474715112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388470 + }, + { + "epoch": 1.8840577952964028, + "grad_norm": 1.3716294766652481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388480 + }, + { + "epoch": 1.8841062934892387, + "grad_norm": 9.600941375254024e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388490 + }, + { + "epoch": 1.884154791682075, + "grad_norm": 8.634643222649174e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388500 + }, + { + "epoch": 1.884203289874911, + "grad_norm": 1.0712339459928444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388510 + }, + { + "epoch": 1.8842517880677472, + "grad_norm": 9.900078090652187e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388520 + }, + { + "epoch": 1.8843002862605833, + "grad_norm": 1.0444075826399057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388530 + }, + { + "epoch": 1.8843487844534192, + "grad_norm": 9.383771093496307e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388540 + }, + { + "epoch": 1.8843972826462554, + "grad_norm": 8.916436478045853e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388550 + }, + { + "epoch": 1.8844457808390915, + "grad_norm": 1.2598230014759793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388560 + }, + { + "epoch": 1.8844942790319275, + "grad_norm": 8.389111627593593e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388570 + }, + { + "epoch": 1.8845427772247638, + "grad_norm": 1.4664540692876926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388580 + }, + { + "epoch": 1.8845912754175997, + "grad_norm": 8.541764628944293e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388590 + }, + { + "epoch": 1.8846397736104359, + "grad_norm": 1.0370663439118744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388600 + }, + { + "epoch": 1.884688271803272, + "grad_norm": 1.4515977753148945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388610 + }, + { + "epoch": 1.884736769996108, + "grad_norm": 1.0453710785895964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388620 + }, + { + "epoch": 1.884785268188944, + "grad_norm": 1.7576828881260553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388630 + }, + { + "epoch": 1.8848337663817802, + "grad_norm": 8.522083483342158e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388640 + }, + { + "epoch": 1.8848822645746162, + "grad_norm": 7.996964868084433e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388650 + }, + { + "epoch": 1.8849307627674525, + "grad_norm": 1.4190622010801235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388660 + }, + { + "epoch": 1.8849792609602884, + "grad_norm": 8.017775776636427e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388670 + }, + { + "epoch": 1.8850277591531246, + "grad_norm": 7.995735629151568e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388680 + }, + { + "epoch": 1.8850762573459607, + "grad_norm": 7.92618770617537e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388690 + }, + { + "epoch": 1.8851247555387967, + "grad_norm": 1.3967362377798054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388700 + }, + { + "epoch": 1.8851732537316328, + "grad_norm": 9.084976326789729e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388710 + }, + { + "epoch": 1.885221751924469, + "grad_norm": 9.72256763986934e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388720 + }, + { + "epoch": 1.8852702501173049, + "grad_norm": 1.9008194129810363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388730 + }, + { + "epoch": 1.8853187483101412, + "grad_norm": 1.5370165584727147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388740 + }, + { + "epoch": 1.8853672465029772, + "grad_norm": 1.0895962354595667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388750 + }, + { + "epoch": 1.8854157446958133, + "grad_norm": 1.3259088937900287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388760 + }, + { + "epoch": 1.8854642428886494, + "grad_norm": 1.2380161784619759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388770 + }, + { + "epoch": 1.8855127410814854, + "grad_norm": 9.447473914292459e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388780 + }, + { + "epoch": 1.8855612392743215, + "grad_norm": 8.847746535423084e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388790 + }, + { + "epoch": 1.8856097374671577, + "grad_norm": 9.732483263746872e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388800 + }, + { + "epoch": 1.8856582356599936, + "grad_norm": 1.0188117904874616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388810 + }, + { + "epoch": 1.88570673385283, + "grad_norm": 2.014613720291436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388820 + }, + { + "epoch": 1.8857552320456659, + "grad_norm": 1.687968165242637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388830 + }, + { + "epoch": 1.885803730238502, + "grad_norm": 1.7227657522767004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388840 + }, + { + "epoch": 1.8858522284313382, + "grad_norm": 1.4530544767410447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388850 + }, + { + "epoch": 1.885900726624174, + "grad_norm": 1.6348197462434655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388860 + }, + { + "epoch": 1.8859492248170102, + "grad_norm": 1.369165758546842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388870 + }, + { + "epoch": 1.8859977230098464, + "grad_norm": 1.1696874580024996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388880 + }, + { + "epoch": 1.8860462212026823, + "grad_norm": 1.3109914931419553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388890 + }, + { + "epoch": 1.8860947193955186, + "grad_norm": 1.1788476861340769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388900 + }, + { + "epoch": 1.8861432175883546, + "grad_norm": 9.533241751569221e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388910 + }, + { + "epoch": 1.8861917157811907, + "grad_norm": 9.764050012961434e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388920 + }, + { + "epoch": 1.8862402139740269, + "grad_norm": 1.1874680794221604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388930 + }, + { + "epoch": 1.8862887121668628, + "grad_norm": 1.0528208527205152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388940 + }, + { + "epoch": 1.886337210359699, + "grad_norm": 7.297967563602015e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388950 + }, + { + "epoch": 1.886385708552535, + "grad_norm": 1.2758611944718723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388960 + }, + { + "epoch": 1.886434206745371, + "grad_norm": 1.0197179989290817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388970 + }, + { + "epoch": 1.8864827049382074, + "grad_norm": 2.216616934447302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388980 + }, + { + "epoch": 1.8865312031310433, + "grad_norm": 7.478395680493577e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 388990 + }, + { + "epoch": 1.8865797013238794, + "grad_norm": 1.143734884578862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389000 + }, + { + "epoch": 1.8866281995167156, + "grad_norm": 1.330723886638907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389010 + }, + { + "epoch": 1.8866766977095515, + "grad_norm": 8.179825705667554e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389020 + }, + { + "epoch": 1.8867251959023879, + "grad_norm": 1.0896513913394301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389030 + }, + { + "epoch": 1.8867736940952238, + "grad_norm": 1.0073643252894726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389040 + }, + { + "epoch": 1.88682219228806, + "grad_norm": 8.377604387987958e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389050 + }, + { + "epoch": 1.886870690480896, + "grad_norm": 1.879282152117412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389060 + }, + { + "epoch": 1.886919188673732, + "grad_norm": 1.2526304438154057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389070 + }, + { + "epoch": 1.8869676868665681, + "grad_norm": 2.009317867646132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389080 + }, + { + "epoch": 1.8870161850594043, + "grad_norm": 8.550434138498986e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389090 + }, + { + "epoch": 1.8870646832522402, + "grad_norm": 1.306087327179739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389100 + }, + { + "epoch": 1.8871131814450766, + "grad_norm": 1.2715323016720959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389110 + }, + { + "epoch": 1.8871616796379125, + "grad_norm": 9.544867118904676e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389120 + }, + { + "epoch": 1.8872101778307486, + "grad_norm": 1.2615165800866635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389130 + }, + { + "epoch": 1.8872586760235848, + "grad_norm": 1.9875480816722302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389140 + }, + { + "epoch": 1.8873071742164207, + "grad_norm": 8.032713161298943e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389150 + }, + { + "epoch": 1.8873556724092568, + "grad_norm": 1.6552556658666617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389160 + }, + { + "epoch": 1.887404170602093, + "grad_norm": 1.0957345253359563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389170 + }, + { + "epoch": 1.887452668794929, + "grad_norm": 1.7638512872508727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389180 + }, + { + "epoch": 1.8875011669877653, + "grad_norm": 2.5127455671736243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389190 + }, + { + "epoch": 1.8875496651806012, + "grad_norm": 1.0806405548180464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389200 + }, + { + "epoch": 1.8875981633734373, + "grad_norm": 8.40292191384151e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389210 + }, + { + "epoch": 1.8876466615662735, + "grad_norm": 1.2948300209814079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389220 + }, + { + "epoch": 1.8876951597591094, + "grad_norm": 1.747359057446829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389230 + }, + { + "epoch": 1.8877436579519455, + "grad_norm": 1.5446135037677777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389240 + }, + { + "epoch": 1.8877921561447817, + "grad_norm": 1.2050438868982383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389250 + }, + { + "epoch": 1.8878406543376176, + "grad_norm": 1.0551833184990755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389260 + }, + { + "epoch": 1.887889152530454, + "grad_norm": 1.0605596401092043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389270 + }, + { + "epoch": 1.88793765072329, + "grad_norm": 1.1626329232683474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389280 + }, + { + "epoch": 1.887986148916126, + "grad_norm": 8.771829484999216e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389290 + }, + { + "epoch": 1.8880346471089622, + "grad_norm": 1.0758207658057017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389300 + }, + { + "epoch": 1.888083145301798, + "grad_norm": 1.0140681183656852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389310 + }, + { + "epoch": 1.8881316434946342, + "grad_norm": 1.3696746847813301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389320 + }, + { + "epoch": 1.8881801416874704, + "grad_norm": 8.882623525607869e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389330 + }, + { + "epoch": 1.8882286398803063, + "grad_norm": 8.71320349205007e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389340 + }, + { + "epoch": 1.8882771380731427, + "grad_norm": 1.1790126208666152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389350 + }, + { + "epoch": 1.8883256362659786, + "grad_norm": 8.279049446002773e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389360 + }, + { + "epoch": 1.8883741344588147, + "grad_norm": 1.1612530492755013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389370 + }, + { + "epoch": 1.888422632651651, + "grad_norm": 1.4088361588449061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389380 + }, + { + "epoch": 1.8884711308444868, + "grad_norm": 1.0552723139767295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389390 + }, + { + "epoch": 1.888519629037323, + "grad_norm": 1.0944149586578078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389400 + }, + { + "epoch": 1.888568127230159, + "grad_norm": 1.6864328600263434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389410 + }, + { + "epoch": 1.888616625422995, + "grad_norm": 1.6433315153108197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389420 + }, + { + "epoch": 1.8886651236158314, + "grad_norm": 1.073228617087807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389430 + }, + { + "epoch": 1.8887136218086673, + "grad_norm": 1.1502901742233007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389440 + }, + { + "epoch": 1.8887621200015035, + "grad_norm": 1.1873046545929355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389450 + }, + { + "epoch": 1.8888106181943396, + "grad_norm": 1.2954441963586305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389460 + }, + { + "epoch": 1.8888591163871755, + "grad_norm": 1.1384131859415447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389470 + }, + { + "epoch": 1.8889076145800117, + "grad_norm": 1.0273424777551554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389480 + }, + { + "epoch": 1.8889561127728478, + "grad_norm": 1.2045889619116679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389490 + }, + { + "epoch": 1.8890046109656837, + "grad_norm": 1.2618708744582818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389500 + }, + { + "epoch": 1.88905310915852, + "grad_norm": 1.3180807556523177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389510 + }, + { + "epoch": 1.889101607351356, + "grad_norm": 9.402437939343145e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389520 + }, + { + "epoch": 1.8891501055441922, + "grad_norm": 1.1911948760712221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389530 + }, + { + "epoch": 1.8891986037370283, + "grad_norm": 1.256115655934309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389540 + }, + { + "epoch": 1.8892471019298642, + "grad_norm": 1.1262115684473883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389550 + }, + { + "epoch": 1.8892956001227006, + "grad_norm": 1.2356389689216485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389560 + }, + { + "epoch": 1.8893440983155365, + "grad_norm": 7.489274089778064e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389570 + }, + { + "epoch": 1.8893925965083727, + "grad_norm": 1.034730345850221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389580 + }, + { + "epoch": 1.8894410947012088, + "grad_norm": 1.5135896092033363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389590 + }, + { + "epoch": 1.8894895928940447, + "grad_norm": 9.850173121606076e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389600 + }, + { + "epoch": 1.8895380910868809, + "grad_norm": 1.0366341562928483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389610 + }, + { + "epoch": 1.889586589279717, + "grad_norm": 2.025119272275333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389620 + }, + { + "epoch": 1.889635087472553, + "grad_norm": 9.801063960424017e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389630 + }, + { + "epoch": 1.8896835856653893, + "grad_norm": 1.0213581091988999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389640 + }, + { + "epoch": 1.8897320838582252, + "grad_norm": 1.4061757980243783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389650 + }, + { + "epoch": 1.8897805820510614, + "grad_norm": 7.793897083274715e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389660 + }, + { + "epoch": 1.8898290802438975, + "grad_norm": 1.638958480043584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389670 + }, + { + "epoch": 1.8898775784367334, + "grad_norm": 1.1663398247208079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389680 + }, + { + "epoch": 1.8899260766295696, + "grad_norm": 1.0127203964316323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389690 + }, + { + "epoch": 1.8899745748224057, + "grad_norm": 1.1001452193681871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389700 + }, + { + "epoch": 1.8900230730152416, + "grad_norm": 1.044972286479151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389710 + }, + { + "epoch": 1.890071571208078, + "grad_norm": 1.2794801662607824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389720 + }, + { + "epoch": 1.890120069400914, + "grad_norm": 9.048621407714563e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389730 + }, + { + "epoch": 1.89016856759375, + "grad_norm": 1.0176560927277478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389740 + }, + { + "epoch": 1.8902170657865862, + "grad_norm": 1.242212555041533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389750 + }, + { + "epoch": 1.8902655639794221, + "grad_norm": 9.746786489017722e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389760 + }, + { + "epoch": 1.8903140621722583, + "grad_norm": 1.2905267077201188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389770 + }, + { + "epoch": 1.8903625603650944, + "grad_norm": 1.0727294608159355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389780 + }, + { + "epoch": 1.8904110585579303, + "grad_norm": 1.0916217263456929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389790 + }, + { + "epoch": 1.8904595567507667, + "grad_norm": 1.3930687714491796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389800 + }, + { + "epoch": 1.8905080549436026, + "grad_norm": 1.0208528244959325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389810 + }, + { + "epoch": 1.8905565531364388, + "grad_norm": 1.4610924026214889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389820 + }, + { + "epoch": 1.890605051329275, + "grad_norm": 7.485899011783204e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389830 + }, + { + "epoch": 1.8906535495221108, + "grad_norm": 1.120206061244744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389840 + }, + { + "epoch": 1.890702047714947, + "grad_norm": 1.5083582383113026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389850 + }, + { + "epoch": 1.8907505459077831, + "grad_norm": 1.31152821936098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389860 + }, + { + "epoch": 1.890799044100619, + "grad_norm": 1.2355422462917431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389870 + }, + { + "epoch": 1.8908475422934554, + "grad_norm": 8.610552271193228e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389880 + }, + { + "epoch": 1.8908960404862913, + "grad_norm": 9.482406859717685e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389890 + }, + { + "epoch": 1.8909445386791275, + "grad_norm": 1.4632420608506891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389900 + }, + { + "epoch": 1.8909930368719636, + "grad_norm": 1.3299972678737504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389910 + }, + { + "epoch": 1.8910415350647996, + "grad_norm": 1.2553231343304105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389920 + }, + { + "epoch": 1.8910900332576357, + "grad_norm": 1.1001136890342877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389930 + }, + { + "epoch": 1.8911385314504718, + "grad_norm": 9.989415516997724e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389940 + }, + { + "epoch": 1.8911870296433078, + "grad_norm": 2.1096926872132826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389950 + }, + { + "epoch": 1.8912355278361441, + "grad_norm": 1.1356966922448919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389960 + }, + { + "epoch": 1.89128402602898, + "grad_norm": 1.2525112502714819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389970 + }, + { + "epoch": 1.8913325242218162, + "grad_norm": 8.52198578371599e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389980 + }, + { + "epoch": 1.8913810224146523, + "grad_norm": 1.3988997515923529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 389990 + }, + { + "epoch": 1.8914295206074883, + "grad_norm": 9.658895905317877e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390000 + }, + { + "epoch": 1.8914780188003244, + "grad_norm": 1.4544152548978673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390010 + }, + { + "epoch": 1.8915265169931605, + "grad_norm": 1.1216553907900106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390020 + }, + { + "epoch": 1.8915750151859965, + "grad_norm": 8.091448400193713e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390030 + }, + { + "epoch": 1.8916235133788328, + "grad_norm": 1.2613557309748558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390040 + }, + { + "epoch": 1.8916720115716688, + "grad_norm": 1.7586774703204355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390050 + }, + { + "epoch": 1.891720509764505, + "grad_norm": 9.456570637667028e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390060 + }, + { + "epoch": 1.891769007957341, + "grad_norm": 1.0350843737683135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390070 + }, + { + "epoch": 1.891817506150177, + "grad_norm": 9.270142875550391e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390080 + }, + { + "epoch": 1.8918660043430133, + "grad_norm": 1.718984954379721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390090 + }, + { + "epoch": 1.8919145025358493, + "grad_norm": 1.4121337876815687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390100 + }, + { + "epoch": 1.8919630007286854, + "grad_norm": 1.85308728362088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390110 + }, + { + "epoch": 1.8920114989215215, + "grad_norm": 1.5016818011304167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390120 + }, + { + "epoch": 1.8920599971143575, + "grad_norm": 7.317941363993441e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390130 + }, + { + "epoch": 1.8921084953071936, + "grad_norm": 1.4910199297446525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390140 + }, + { + "epoch": 1.8921569935000297, + "grad_norm": 1.2374695934624924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390150 + }, + { + "epoch": 1.8922054916928657, + "grad_norm": 8.02582267311891e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390160 + }, + { + "epoch": 1.892253989885702, + "grad_norm": 2.0632731079217592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390170 + }, + { + "epoch": 1.892302488078538, + "grad_norm": 1.0986191512074583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390180 + }, + { + "epoch": 1.892350986271374, + "grad_norm": 7.645391875144014e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390190 + }, + { + "epoch": 1.8923994844642102, + "grad_norm": 2.0863591743136567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390200 + }, + { + "epoch": 1.8924479826570462, + "grad_norm": 1.3279580102221189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390210 + }, + { + "epoch": 1.8924964808498823, + "grad_norm": 9.093663599912816e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390220 + }, + { + "epoch": 1.8925449790427185, + "grad_norm": 1.4526373881551535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390230 + }, + { + "epoch": 1.8925934772355544, + "grad_norm": 1.2039343744163489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390240 + }, + { + "epoch": 1.8926419754283907, + "grad_norm": 1.1071717764821187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390250 + }, + { + "epoch": 1.8926904736212267, + "grad_norm": 8.568265208452885e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390260 + }, + { + "epoch": 1.8927389718140628, + "grad_norm": 1.0921419324461112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390270 + }, + { + "epoch": 1.892787470006899, + "grad_norm": 1.6729083895938857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390280 + }, + { + "epoch": 1.8928359681997349, + "grad_norm": 9.93931514869928e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390290 + }, + { + "epoch": 1.892884466392571, + "grad_norm": 8.85241124848335e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390300 + }, + { + "epoch": 1.8929329645854072, + "grad_norm": 1.1126307875031216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390310 + }, + { + "epoch": 1.892981462778243, + "grad_norm": 1.132805671488768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390320 + }, + { + "epoch": 1.8930299609710795, + "grad_norm": 9.128418021475682e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390330 + }, + { + "epoch": 1.8930784591639154, + "grad_norm": 1.0567505981384784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390340 + }, + { + "epoch": 1.8931269573567515, + "grad_norm": 1.3073310434208452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390350 + }, + { + "epoch": 1.8931754555495877, + "grad_norm": 1.5138827080818373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390360 + }, + { + "epoch": 1.8932239537424236, + "grad_norm": 1.967295837346228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390370 + }, + { + "epoch": 1.8932724519352597, + "grad_norm": 8.374689386414502e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390380 + }, + { + "epoch": 1.8933209501280959, + "grad_norm": 1.4091315669872984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390390 + }, + { + "epoch": 1.8933694483209318, + "grad_norm": 6.96667035171572e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390400 + }, + { + "epoch": 1.8934179465137682, + "grad_norm": 9.771905062905262e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390410 + }, + { + "epoch": 1.893466444706604, + "grad_norm": 1.3445544233547935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390420 + }, + { + "epoch": 1.8935149428994402, + "grad_norm": 1.3844644541904927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390430 + }, + { + "epoch": 1.8935634410922764, + "grad_norm": 1.1234449814878644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390440 + }, + { + "epoch": 1.8936119392851123, + "grad_norm": 1.2211851974086585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390450 + }, + { + "epoch": 1.8936604374779484, + "grad_norm": 9.493017927297842e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390460 + }, + { + "epoch": 1.8937089356707846, + "grad_norm": 8.605113066550985e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390470 + }, + { + "epoch": 1.8937574338636205, + "grad_norm": 2.5430541228388392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390480 + }, + { + "epoch": 1.8938059320564569, + "grad_norm": 1.268768290429989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390490 + }, + { + "epoch": 1.8938544302492928, + "grad_norm": 9.874640660711975e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390500 + }, + { + "epoch": 1.893902928442129, + "grad_norm": 1.9034615661439602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390510 + }, + { + "epoch": 1.893951426634965, + "grad_norm": 1.4135862258513043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390520 + }, + { + "epoch": 1.893999924827801, + "grad_norm": 1.5066873970681627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390530 + }, + { + "epoch": 1.8940484230206371, + "grad_norm": 9.888372787258959e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390540 + }, + { + "epoch": 1.8940969212134733, + "grad_norm": 1.0942030392868674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390550 + }, + { + "epoch": 1.8941454194063094, + "grad_norm": 1.0529589644647785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390560 + }, + { + "epoch": 1.8941939175991456, + "grad_norm": 1.773591407072672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390570 + }, + { + "epoch": 1.8942424157919815, + "grad_norm": 7.1738210927208e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390580 + }, + { + "epoch": 1.8942909139848176, + "grad_norm": 1.0179216580752382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390590 + }, + { + "epoch": 1.8943394121776538, + "grad_norm": 1.2011228456287881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390600 + }, + { + "epoch": 1.8943879103704897, + "grad_norm": 8.723093358753431e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390610 + }, + { + "epoch": 1.894436408563326, + "grad_norm": 1.3046381752701564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390620 + }, + { + "epoch": 1.894484906756162, + "grad_norm": 9.168661385672294e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390630 + }, + { + "epoch": 1.8945334049489981, + "grad_norm": 1.5087074700659286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390640 + }, + { + "epoch": 1.8945819031418343, + "grad_norm": 1.1908971586649386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390650 + }, + { + "epoch": 1.8946304013346702, + "grad_norm": 1.3018246036722303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390660 + }, + { + "epoch": 1.8946788995275063, + "grad_norm": 9.822673341375321e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390670 + }, + { + "epoch": 1.8947273977203425, + "grad_norm": 1.5085326765529317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390680 + }, + { + "epoch": 1.8947758959131784, + "grad_norm": 1.0924470217332782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390690 + }, + { + "epoch": 1.8948243941060148, + "grad_norm": 8.63663984773666e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390700 + }, + { + "epoch": 1.8948728922988507, + "grad_norm": 1.5337255021563578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390710 + }, + { + "epoch": 1.8949213904916868, + "grad_norm": 1.0546496120866777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390720 + }, + { + "epoch": 1.894969888684523, + "grad_norm": 8.269826601292607e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390730 + }, + { + "epoch": 1.895018386877359, + "grad_norm": 1.1708515046393586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390740 + }, + { + "epoch": 1.895066885070195, + "grad_norm": 9.660321431681496e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390750 + }, + { + "epoch": 1.8951153832630312, + "grad_norm": 9.867551220565929e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390760 + }, + { + "epoch": 1.8951638814558671, + "grad_norm": 1.3470032200757487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390770 + }, + { + "epoch": 1.8952123796487035, + "grad_norm": 8.714629906592108e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390780 + }, + { + "epoch": 1.8952608778415394, + "grad_norm": 1.624976420089297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390790 + }, + { + "epoch": 1.8953093760343755, + "grad_norm": 1.812723127159188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390800 + }, + { + "epoch": 1.8953578742272117, + "grad_norm": 1.5822886112459855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390810 + }, + { + "epoch": 1.8954063724200476, + "grad_norm": 8.855539412877533e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390820 + }, + { + "epoch": 1.8954548706128838, + "grad_norm": 1.3343443683311307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390830 + }, + { + "epoch": 1.89550336880572, + "grad_norm": 9.540173984134981e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390840 + }, + { + "epoch": 1.8955518669985558, + "grad_norm": 1.5826145727260155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390850 + }, + { + "epoch": 1.8956003651913922, + "grad_norm": 9.143189316773714e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390860 + }, + { + "epoch": 1.8956488633842281, + "grad_norm": 1.985682551719492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390870 + }, + { + "epoch": 1.8956973615770643, + "grad_norm": 1.1484503126268919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390880 + }, + { + "epoch": 1.8957458597699004, + "grad_norm": 8.071653567753856e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390890 + }, + { + "epoch": 1.8957943579627363, + "grad_norm": 1.0801705307983411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390900 + }, + { + "epoch": 1.8958428561555725, + "grad_norm": 1.032920682320082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390910 + }, + { + "epoch": 1.8958913543484086, + "grad_norm": 1.3374834573198768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390920 + }, + { + "epoch": 1.8959398525412445, + "grad_norm": 1.0282075635359433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390930 + }, + { + "epoch": 1.895988350734081, + "grad_norm": 1.274191507860678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390940 + }, + { + "epoch": 1.8960368489269168, + "grad_norm": 1.0570370356788317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390950 + }, + { + "epoch": 1.896085347119753, + "grad_norm": 1.3520565111946325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390960 + }, + { + "epoch": 1.896133845312589, + "grad_norm": 1.2507357816105014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390970 + }, + { + "epoch": 1.896182343505425, + "grad_norm": 9.186690519413787e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390980 + }, + { + "epoch": 1.8962308416982612, + "grad_norm": 1.7702337373748378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 390990 + }, + { + "epoch": 1.8962793398910973, + "grad_norm": 1.1925362919384952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391000 + }, + { + "epoch": 1.8963278380839332, + "grad_norm": 1.307159980257211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391010 + }, + { + "epoch": 1.8963763362767696, + "grad_norm": 1.2548404981771455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391020 + }, + { + "epoch": 1.8964248344696055, + "grad_norm": 7.218529329833245e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391030 + }, + { + "epoch": 1.8964733326624417, + "grad_norm": 1.4815560334113798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391040 + }, + { + "epoch": 1.8965218308552778, + "grad_norm": 1.2462233023313729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391050 + }, + { + "epoch": 1.8965703290481137, + "grad_norm": 8.55184101311579e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391060 + }, + { + "epoch": 1.89661882724095, + "grad_norm": 1.900079560357426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391070 + }, + { + "epoch": 1.896667325433786, + "grad_norm": 1.2438038154982678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391080 + }, + { + "epoch": 1.8967158236266222, + "grad_norm": 1.460541110276381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391090 + }, + { + "epoch": 1.8967643218194583, + "grad_norm": 8.425030451064686e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391100 + }, + { + "epoch": 1.8968128200122942, + "grad_norm": 1.825379492004231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391110 + }, + { + "epoch": 1.8968613182051304, + "grad_norm": 1.3802840648224901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391120 + }, + { + "epoch": 1.8969098163979665, + "grad_norm": 1.777326374963195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391130 + }, + { + "epoch": 1.8969583145908024, + "grad_norm": 1.1717883552364583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391140 + }, + { + "epoch": 1.8970068127836388, + "grad_norm": 1.2623576850501195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391150 + }, + { + "epoch": 1.8970553109764747, + "grad_norm": 1.1407988331768593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391160 + }, + { + "epoch": 1.8971038091693109, + "grad_norm": 1.2743027966166665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391170 + }, + { + "epoch": 1.897152307362147, + "grad_norm": 1.709853059139732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391180 + }, + { + "epoch": 1.897200805554983, + "grad_norm": 1.0101666170214685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391190 + }, + { + "epoch": 1.897249303747819, + "grad_norm": 1.3042436464161256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391200 + }, + { + "epoch": 1.8972978019406552, + "grad_norm": 1.4052778496420615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391210 + }, + { + "epoch": 1.8973463001334911, + "grad_norm": 1.4415102000953084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391220 + }, + { + "epoch": 1.8973947983263275, + "grad_norm": 1.0550150975063843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391230 + }, + { + "epoch": 1.8974432965191634, + "grad_norm": 8.614952307084423e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391240 + }, + { + "epoch": 1.8974917947119996, + "grad_norm": 1.1524977416854654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391250 + }, + { + "epoch": 1.8975402929048357, + "grad_norm": 1.5674508802021592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391260 + }, + { + "epoch": 1.8975887910976716, + "grad_norm": 1.3481308513974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391270 + }, + { + "epoch": 1.8976372892905078, + "grad_norm": 1.7225985970981128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391280 + }, + { + "epoch": 1.897685787483344, + "grad_norm": 1.1458925364138395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391290 + }, + { + "epoch": 1.8977342856761799, + "grad_norm": 1.1309610137288928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391300 + }, + { + "epoch": 1.8977827838690162, + "grad_norm": 1.2631300450038907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391310 + }, + { + "epoch": 1.8978312820618521, + "grad_norm": 1.0400808214683366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391320 + }, + { + "epoch": 1.8978797802546883, + "grad_norm": 1.213900535645962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391330 + }, + { + "epoch": 1.8979282784475244, + "grad_norm": 1.0455110555085412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391340 + }, + { + "epoch": 1.8979767766403604, + "grad_norm": 1.3717823321712785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391350 + }, + { + "epoch": 1.8980252748331965, + "grad_norm": 9.560518599016632e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391360 + }, + { + "epoch": 1.8980737730260326, + "grad_norm": 1.014533790311134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391370 + }, + { + "epoch": 1.8981222712188686, + "grad_norm": 9.238616094364716e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391380 + }, + { + "epoch": 1.898170769411705, + "grad_norm": 1.1346769746012342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391390 + }, + { + "epoch": 1.8982192676045408, + "grad_norm": 7.98651900169034e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391400 + }, + { + "epoch": 1.898267765797377, + "grad_norm": 8.465926626399778e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391410 + }, + { + "epoch": 1.8983162639902131, + "grad_norm": 1.1600920224452693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391420 + }, + { + "epoch": 1.898364762183049, + "grad_norm": 1.346528222256893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391430 + }, + { + "epoch": 1.8984132603758852, + "grad_norm": 9.767016528883232e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391440 + }, + { + "epoch": 1.8984617585687213, + "grad_norm": 1.2020402451184964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391450 + }, + { + "epoch": 1.8985102567615573, + "grad_norm": 1.0255480020759933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391460 + }, + { + "epoch": 1.8985587549543936, + "grad_norm": 9.631408559584997e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391470 + }, + { + "epoch": 1.8986072531472296, + "grad_norm": 1.3233558249226007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391480 + }, + { + "epoch": 1.8986557513400657, + "grad_norm": 1.3400695664245177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391490 + }, + { + "epoch": 1.8987042495329018, + "grad_norm": 1.1259515986239421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391500 + }, + { + "epoch": 1.8987527477257378, + "grad_norm": 1.6617480724789857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391510 + }, + { + "epoch": 1.898801245918574, + "grad_norm": 1.0486910007045935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391520 + }, + { + "epoch": 1.89884974411141, + "grad_norm": 8.953991326166033e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391530 + }, + { + "epoch": 1.898898242304246, + "grad_norm": 9.084353713717519e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391540 + }, + { + "epoch": 1.8989467404970823, + "grad_norm": 1.1576694269876953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391550 + }, + { + "epoch": 1.8989952386899183, + "grad_norm": 2.0100221931329543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391560 + }, + { + "epoch": 1.8990437368827544, + "grad_norm": 7.040971805594154e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391570 + }, + { + "epoch": 1.8990922350755906, + "grad_norm": 1.210485489622215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391580 + }, + { + "epoch": 1.8991407332684265, + "grad_norm": 8.415664609628948e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391590 + }, + { + "epoch": 1.8991892314612628, + "grad_norm": 1.6178871575789344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391600 + }, + { + "epoch": 1.8992377296540988, + "grad_norm": 1.103729463380887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391610 + }, + { + "epoch": 1.899286227846935, + "grad_norm": 9.528221767141076e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391620 + }, + { + "epoch": 1.899334726039771, + "grad_norm": 9.497950870240857e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391630 + }, + { + "epoch": 1.899383224232607, + "grad_norm": 1.4020407945736224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391640 + }, + { + "epoch": 1.8994317224254431, + "grad_norm": 9.38995281529742e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391650 + }, + { + "epoch": 1.8994802206182793, + "grad_norm": 1.1807513189410201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391660 + }, + { + "epoch": 1.8995287188111152, + "grad_norm": 1.0264810335058883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391670 + }, + { + "epoch": 1.8995772170039515, + "grad_norm": 8.616722446674885e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391680 + }, + { + "epoch": 1.8996257151967875, + "grad_norm": 7.65405161473609e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391690 + }, + { + "epoch": 1.8996742133896236, + "grad_norm": 1.0191142152393695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391700 + }, + { + "epoch": 1.8997227115824598, + "grad_norm": 1.5293391442128268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391710 + }, + { + "epoch": 1.8997712097752957, + "grad_norm": 1.2151004646909769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391720 + }, + { + "epoch": 1.8998197079681318, + "grad_norm": 1.2131566862194632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391730 + }, + { + "epoch": 1.899868206160968, + "grad_norm": 1.6461020990732322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391740 + }, + { + "epoch": 1.8999167043538039, + "grad_norm": 8.403234552645245e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391750 + }, + { + "epoch": 1.8999652025466403, + "grad_norm": 8.934994077947067e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391760 + }, + { + "epoch": 1.9000137007394762, + "grad_norm": 9.079776930320804e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391770 + }, + { + "epoch": 1.9000621989323123, + "grad_norm": 2.0250704224622496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391780 + }, + { + "epoch": 1.9001106971251485, + "grad_norm": 1.0136178119068973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391790 + }, + { + "epoch": 1.9001591953179844, + "grad_norm": 1.0781076476007456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391800 + }, + { + "epoch": 1.9002076935108205, + "grad_norm": 1.514058922680306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391810 + }, + { + "epoch": 1.9002561917036567, + "grad_norm": 1.5460527080790598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391820 + }, + { + "epoch": 1.9003046898964926, + "grad_norm": 9.81423564638817e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391830 + }, + { + "epoch": 1.900353188089329, + "grad_norm": 9.24893939213689e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391840 + }, + { + "epoch": 1.9004016862821649, + "grad_norm": 9.296314829043695e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391850 + }, + { + "epoch": 1.900450184475001, + "grad_norm": 8.67499672096983e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391860 + }, + { + "epoch": 1.9004986826678372, + "grad_norm": 1.2218008826891946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391870 + }, + { + "epoch": 1.900547180860673, + "grad_norm": 1.4923971392022395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391880 + }, + { + "epoch": 1.9005956790535092, + "grad_norm": 5.867461183584055e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391890 + }, + { + "epoch": 1.9006441772463454, + "grad_norm": 9.857767935272932e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391900 + }, + { + "epoch": 1.9006926754391813, + "grad_norm": 1.365069657310869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391910 + }, + { + "epoch": 1.9007411736320177, + "grad_norm": 1.7306595267996272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391920 + }, + { + "epoch": 1.9007896718248536, + "grad_norm": 8.664208017705732e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391930 + }, + { + "epoch": 1.9008381700176897, + "grad_norm": 1.1344174488669978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391940 + }, + { + "epoch": 1.9008866682105259, + "grad_norm": 1.0128488270311209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391950 + }, + { + "epoch": 1.9009351664033618, + "grad_norm": 1.4580666451990965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391960 + }, + { + "epoch": 1.900983664596198, + "grad_norm": 9.566703873531424e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391970 + }, + { + "epoch": 1.901032162789034, + "grad_norm": 1.1018470580381745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391980 + }, + { + "epoch": 1.90108066098187, + "grad_norm": 7.424460157778867e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 391990 + }, + { + "epoch": 1.9011291591747064, + "grad_norm": 1.0460460053707266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392000 + }, + { + "epoch": 1.9011776573675423, + "grad_norm": 1.1252829779095919e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392010 + }, + { + "epoch": 1.9012261555603784, + "grad_norm": 1.1363667340447137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392020 + }, + { + "epoch": 1.9012746537532146, + "grad_norm": 1.1120898868455242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392030 + }, + { + "epoch": 1.9013231519460505, + "grad_norm": 1.66927609512868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392040 + }, + { + "epoch": 1.9013716501388866, + "grad_norm": 1.4569256912011497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392050 + }, + { + "epoch": 1.9014201483317228, + "grad_norm": 1.273572536319989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392060 + }, + { + "epoch": 1.9014686465245587, + "grad_norm": 1.1315636427866593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392070 + }, + { + "epoch": 1.901517144717395, + "grad_norm": 8.0910105282328e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392080 + }, + { + "epoch": 1.901565642910231, + "grad_norm": 8.046096233726985e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392090 + }, + { + "epoch": 1.9016141411030671, + "grad_norm": 1.1957849821442323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392100 + }, + { + "epoch": 1.9016626392959033, + "grad_norm": 1.0460987631688567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392110 + }, + { + "epoch": 1.9017111374887392, + "grad_norm": 8.414986929494717e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392120 + }, + { + "epoch": 1.9017596356815756, + "grad_norm": 1.0798475003070962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392130 + }, + { + "epoch": 1.9018081338744115, + "grad_norm": 1.0298763619687179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392140 + }, + { + "epoch": 1.9018566320672476, + "grad_norm": 1.2691320883106982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392150 + }, + { + "epoch": 1.9019051302600838, + "grad_norm": 9.235368914062292e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392160 + }, + { + "epoch": 1.9019536284529197, + "grad_norm": 1.9654727623219514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392170 + }, + { + "epoch": 1.9020021266457559, + "grad_norm": 1.684814421309966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392180 + }, + { + "epoch": 1.902050624838592, + "grad_norm": 1.082821743381146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392190 + }, + { + "epoch": 1.902099123031428, + "grad_norm": 1.2555038786388195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392200 + }, + { + "epoch": 1.9021476212242643, + "grad_norm": 1.1267743182941103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392210 + }, + { + "epoch": 1.9021961194171002, + "grad_norm": 9.195851191634574e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392220 + }, + { + "epoch": 1.9022446176099364, + "grad_norm": 1.4776314394282508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392230 + }, + { + "epoch": 1.9022931158027725, + "grad_norm": 1.185493747613009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392240 + }, + { + "epoch": 1.9023416139956084, + "grad_norm": 1.5328385671864453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392250 + }, + { + "epoch": 1.9023901121884446, + "grad_norm": 1.2095508594711646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392260 + }, + { + "epoch": 1.9024386103812807, + "grad_norm": 7.162493265155945e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392270 + }, + { + "epoch": 1.9024871085741166, + "grad_norm": 1.0108347936466089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392280 + }, + { + "epoch": 1.902535606766953, + "grad_norm": 1.2456268905225443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392290 + }, + { + "epoch": 1.902584104959789, + "grad_norm": 8.482186508729228e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392300 + }, + { + "epoch": 1.902632603152625, + "grad_norm": 9.576996085058909e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392310 + }, + { + "epoch": 1.9026811013454612, + "grad_norm": 2.135364596256295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392320 + }, + { + "epoch": 1.9027295995382971, + "grad_norm": 8.668456175087158e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392330 + }, + { + "epoch": 1.9027780977311333, + "grad_norm": 1.5590734037118636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392340 + }, + { + "epoch": 1.9028265959239694, + "grad_norm": 1.569006613522106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392350 + }, + { + "epoch": 1.9028750941168053, + "grad_norm": 1.4226392508476238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392360 + }, + { + "epoch": 1.9029235923096417, + "grad_norm": 1.493020995724237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392370 + }, + { + "epoch": 1.9029720905024776, + "grad_norm": 1.2744891364491195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392380 + }, + { + "epoch": 1.9030205886953138, + "grad_norm": 1.5464234337514426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392390 + }, + { + "epoch": 1.90306908688815, + "grad_norm": 1.1861496673759575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392400 + }, + { + "epoch": 1.9031175850809858, + "grad_norm": 7.239105315193228e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392410 + }, + { + "epoch": 1.903166083273822, + "grad_norm": 9.517512111756332e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392420 + }, + { + "epoch": 1.9032145814666581, + "grad_norm": 8.715553612148597e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392430 + }, + { + "epoch": 1.903263079659494, + "grad_norm": 1.0532469119084453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392440 + }, + { + "epoch": 1.9033115778523304, + "grad_norm": 1.1900367802297751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392450 + }, + { + "epoch": 1.9033600760451663, + "grad_norm": 9.058897632030494e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392460 + }, + { + "epoch": 1.9034085742380025, + "grad_norm": 1.6878667352671073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392470 + }, + { + "epoch": 1.9034570724308386, + "grad_norm": 1.0107835457517922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392480 + }, + { + "epoch": 1.9035055706236745, + "grad_norm": 1.5549636245282272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392490 + }, + { + "epoch": 1.9035540688165107, + "grad_norm": 1.1611895445184928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392500 + }, + { + "epoch": 1.9036025670093468, + "grad_norm": 1.042381114757518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392510 + }, + { + "epoch": 1.9036510652021827, + "grad_norm": 1.3103382379142658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392520 + }, + { + "epoch": 1.9036995633950191, + "grad_norm": 1.2288771777946295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392530 + }, + { + "epoch": 1.903748061587855, + "grad_norm": 1.0665014649191562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392540 + }, + { + "epoch": 1.9037965597806912, + "grad_norm": 1.1226587659507459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392550 + }, + { + "epoch": 1.9038450579735273, + "grad_norm": 1.3924822184208097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392560 + }, + { + "epoch": 1.9038935561663632, + "grad_norm": 1.035604402233048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392570 + }, + { + "epoch": 1.9039420543591994, + "grad_norm": 1.1438990199508225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392580 + }, + { + "epoch": 1.9039905525520355, + "grad_norm": 1.1280391731816053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392590 + }, + { + "epoch": 1.9040390507448715, + "grad_norm": 1.203537003391375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392600 + }, + { + "epoch": 1.9040875489377078, + "grad_norm": 1.0635214486853783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392610 + }, + { + "epoch": 1.9041360471305437, + "grad_norm": 1.2968699891757751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392620 + }, + { + "epoch": 1.9041845453233799, + "grad_norm": 1.4859913299858363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392630 + }, + { + "epoch": 1.904233043516216, + "grad_norm": 1.2168815288760015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392640 + }, + { + "epoch": 1.904281541709052, + "grad_norm": 1.108391156634525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392650 + }, + { + "epoch": 1.9043300399018883, + "grad_norm": 9.613082774251325e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392660 + }, + { + "epoch": 1.9043785380947242, + "grad_norm": 9.857640037580495e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392670 + }, + { + "epoch": 1.9044270362875604, + "grad_norm": 1.1543480837872266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392680 + }, + { + "epoch": 1.9044755344803965, + "grad_norm": 7.085013464802614e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392690 + }, + { + "epoch": 1.9045240326732324, + "grad_norm": 9.601084371979596e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392700 + }, + { + "epoch": 1.9045725308660686, + "grad_norm": 1.4180008278685818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392710 + }, + { + "epoch": 1.9046210290589047, + "grad_norm": 1.3747972538169506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392720 + }, + { + "epoch": 1.9046695272517407, + "grad_norm": 2.184863312493235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392730 + }, + { + "epoch": 1.904718025444577, + "grad_norm": 9.065832529131512e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392740 + }, + { + "epoch": 1.904766523637413, + "grad_norm": 1.8423280678803167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392750 + }, + { + "epoch": 1.904815021830249, + "grad_norm": 1.041533703727282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392760 + }, + { + "epoch": 1.9048635200230852, + "grad_norm": 9.152452129512767e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392770 + }, + { + "epoch": 1.9049120182159212, + "grad_norm": 1.575466512804269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392780 + }, + { + "epoch": 1.9049605164087573, + "grad_norm": 7.2697243780339704e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392790 + }, + { + "epoch": 1.9050090146015934, + "grad_norm": 1.099559288064711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392800 + }, + { + "epoch": 1.9050575127944294, + "grad_norm": 1.0397287475427675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392810 + }, + { + "epoch": 1.9051060109872657, + "grad_norm": 2.0080777929365468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392820 + }, + { + "epoch": 1.9051545091801017, + "grad_norm": 1.3399998444185712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392830 + }, + { + "epoch": 1.9052030073729378, + "grad_norm": 1.691987705498832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392840 + }, + { + "epoch": 1.905251505565774, + "grad_norm": 1.367835622545499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392850 + }, + { + "epoch": 1.9053000037586099, + "grad_norm": 1.0675923256542319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392860 + }, + { + "epoch": 1.905348501951446, + "grad_norm": 8.471274348664792e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392870 + }, + { + "epoch": 1.9053970001442821, + "grad_norm": 1.4350680643815394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392880 + }, + { + "epoch": 1.905445498337118, + "grad_norm": 1.4570894713017424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392890 + }, + { + "epoch": 1.9054939965299544, + "grad_norm": 1.1077826655991885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392900 + }, + { + "epoch": 1.9055424947227904, + "grad_norm": 7.485218667113713e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392910 + }, + { + "epoch": 1.9055909929156265, + "grad_norm": 2.0225822794373016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392920 + }, + { + "epoch": 1.9056394911084626, + "grad_norm": 9.922188404232202e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392930 + }, + { + "epoch": 1.9056879893012986, + "grad_norm": 7.862333006869449e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392940 + }, + { + "epoch": 1.9057364874941347, + "grad_norm": 8.102467141668512e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392950 + }, + { + "epoch": 1.9057849856869709, + "grad_norm": 1.1060325988410113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392960 + }, + { + "epoch": 1.9058334838798068, + "grad_norm": 1.013485295686678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392970 + }, + { + "epoch": 1.9058819820726431, + "grad_norm": 1.4023838090793106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392980 + }, + { + "epoch": 1.905930480265479, + "grad_norm": 9.735546591116417e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 392990 + }, + { + "epoch": 1.9059789784583152, + "grad_norm": 1.1221068518807442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393000 + }, + { + "epoch": 1.9060274766511514, + "grad_norm": 1.4654158775329051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393010 + }, + { + "epoch": 1.9060759748439873, + "grad_norm": 1.3758395311924687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393020 + }, + { + "epoch": 1.9061244730368234, + "grad_norm": 1.5841571610053506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393030 + }, + { + "epoch": 1.9061729712296596, + "grad_norm": 1.896282419977524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393040 + }, + { + "epoch": 1.9062214694224955, + "grad_norm": 7.678313984627039e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393050 + }, + { + "epoch": 1.9062699676153319, + "grad_norm": 1.4826648353505334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393060 + }, + { + "epoch": 1.9063184658081678, + "grad_norm": 9.244947918318758e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393070 + }, + { + "epoch": 1.906366964001004, + "grad_norm": 1.2807057636621266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393080 + }, + { + "epoch": 1.90641546219384, + "grad_norm": 1.0572482445070364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393090 + }, + { + "epoch": 1.906463960386676, + "grad_norm": 8.362309955600722e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393100 + }, + { + "epoch": 1.9065124585795121, + "grad_norm": 1.1602426575052505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393110 + }, + { + "epoch": 1.9065609567723483, + "grad_norm": 1.3532729603582538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393120 + }, + { + "epoch": 1.9066094549651844, + "grad_norm": 9.998711192338305e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393130 + }, + { + "epoch": 1.9066579531580206, + "grad_norm": 1.4833625883170498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393140 + }, + { + "epoch": 1.9067064513508565, + "grad_norm": 1.0661480587259575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393150 + }, + { + "epoch": 1.9067549495436926, + "grad_norm": 1.201688792917821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393160 + }, + { + "epoch": 1.9068034477365288, + "grad_norm": 1.2098734458731997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393170 + }, + { + "epoch": 1.9068519459293647, + "grad_norm": 1.225194079523817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393180 + }, + { + "epoch": 1.906900444122201, + "grad_norm": 1.1198308946802626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393190 + }, + { + "epoch": 1.906948942315037, + "grad_norm": 9.234206288510904e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393200 + }, + { + "epoch": 1.9069974405078731, + "grad_norm": 1.0395855731815118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393210 + }, + { + "epoch": 1.9070459387007093, + "grad_norm": 1.3672229570715899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393220 + }, + { + "epoch": 1.9070944368935452, + "grad_norm": 1.1854569770264334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393230 + }, + { + "epoch": 1.9071429350863813, + "grad_norm": 9.504400821924719e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393240 + }, + { + "epoch": 1.9071914332792175, + "grad_norm": 9.274075729592823e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393250 + }, + { + "epoch": 1.9072399314720534, + "grad_norm": 1.307438246556103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393260 + }, + { + "epoch": 1.9072884296648898, + "grad_norm": 7.121362610718052e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393270 + }, + { + "epoch": 1.9073369278577257, + "grad_norm": 1.2817634065243055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393280 + }, + { + "epoch": 1.9073854260505618, + "grad_norm": 8.471094936624013e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393290 + }, + { + "epoch": 1.907433924243398, + "grad_norm": 9.468259953848701e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393300 + }, + { + "epoch": 1.907482422436234, + "grad_norm": 1.3130839526809268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393310 + }, + { + "epoch": 1.90753092062907, + "grad_norm": 1.088559020701041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393320 + }, + { + "epoch": 1.9075794188219062, + "grad_norm": 9.880144702378857e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393330 + }, + { + "epoch": 1.907627917014742, + "grad_norm": 1.2942530602799707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393340 + }, + { + "epoch": 1.9076764152075785, + "grad_norm": 1.0948930651011324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393350 + }, + { + "epoch": 1.9077249134004144, + "grad_norm": 9.452591598346771e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393360 + }, + { + "epoch": 1.9077734115932505, + "grad_norm": 1.1728928939191974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393370 + }, + { + "epoch": 1.9078219097860867, + "grad_norm": 1.3416405764132833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393380 + }, + { + "epoch": 1.9078704079789226, + "grad_norm": 1.5402626729610347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393390 + }, + { + "epoch": 1.9079189061717587, + "grad_norm": 1.1828422685766782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393400 + }, + { + "epoch": 1.9079674043645949, + "grad_norm": 9.170853410012114e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393410 + }, + { + "epoch": 1.9080159025574308, + "grad_norm": 1.0975873543372927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393420 + }, + { + "epoch": 1.9080644007502672, + "grad_norm": 1.3447770008667703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393430 + }, + { + "epoch": 1.908112898943103, + "grad_norm": 9.635550135556059e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393440 + }, + { + "epoch": 1.9081613971359392, + "grad_norm": 1.5774464401374644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393450 + }, + { + "epoch": 1.9082098953287754, + "grad_norm": 1.7050060918677445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393460 + }, + { + "epoch": 1.9082583935216113, + "grad_norm": 1.1103021613223518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393470 + }, + { + "epoch": 1.9083068917144475, + "grad_norm": 1.1336492633517992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393480 + }, + { + "epoch": 1.9083553899072836, + "grad_norm": 1.1052297743674444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393490 + }, + { + "epoch": 1.9084038881001195, + "grad_norm": 1.6803456404090866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393500 + }, + { + "epoch": 1.9084523862929559, + "grad_norm": 2.834040380150782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393510 + }, + { + "epoch": 1.9085008844857918, + "grad_norm": 2.43004656397261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393520 + }, + { + "epoch": 1.908549382678628, + "grad_norm": 1.8062246809336102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393530 + }, + { + "epoch": 1.908597880871464, + "grad_norm": 1.0858999921481427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393540 + }, + { + "epoch": 1.9086463790643, + "grad_norm": 2.1587380771848075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393550 + }, + { + "epoch": 1.9086948772571362, + "grad_norm": 2.0017974833308472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393560 + }, + { + "epoch": 1.9087433754499723, + "grad_norm": 1.1836176483370764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393570 + }, + { + "epoch": 1.9087918736428082, + "grad_norm": 8.716494193095059e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393580 + }, + { + "epoch": 1.9088403718356446, + "grad_norm": 7.34157845627692e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393590 + }, + { + "epoch": 1.9088888700284805, + "grad_norm": 1.2120104031509982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393600 + }, + { + "epoch": 1.9089373682213167, + "grad_norm": 1.0594028765353869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393610 + }, + { + "epoch": 1.9089858664141528, + "grad_norm": 1.9735608702831087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393620 + }, + { + "epoch": 1.9090343646069887, + "grad_norm": 1.5125385388614632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393630 + }, + { + "epoch": 1.909082862799825, + "grad_norm": 8.067452483828674e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393640 + }, + { + "epoch": 1.909131360992661, + "grad_norm": 1.5211975679108036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393650 + }, + { + "epoch": 1.9091798591854972, + "grad_norm": 9.927996202918621e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393660 + }, + { + "epoch": 1.9092283573783333, + "grad_norm": 1.3528116404870616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393670 + }, + { + "epoch": 1.9092768555711692, + "grad_norm": 1.6164127814022322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393680 + }, + { + "epoch": 1.9093253537640054, + "grad_norm": 1.3297549728008562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393690 + }, + { + "epoch": 1.9093738519568415, + "grad_norm": 1.3880231186647052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393700 + }, + { + "epoch": 1.9094223501496774, + "grad_norm": 9.269195189176571e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393710 + }, + { + "epoch": 1.9094708483425138, + "grad_norm": 1.2640627211624178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393720 + }, + { + "epoch": 1.9095193465353497, + "grad_norm": 6.552083320343627e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393730 + }, + { + "epoch": 1.9095678447281859, + "grad_norm": 1.0334593625316302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393740 + }, + { + "epoch": 1.909616342921022, + "grad_norm": 1.7351576175883565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393750 + }, + { + "epoch": 1.909664841113858, + "grad_norm": 1.509038405345109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393760 + }, + { + "epoch": 1.909713339306694, + "grad_norm": 1.2297356022372696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393770 + }, + { + "epoch": 1.9097618374995302, + "grad_norm": 1.857856446463302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393780 + }, + { + "epoch": 1.9098103356923661, + "grad_norm": 1.779604197338358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393790 + }, + { + "epoch": 1.9098588338852025, + "grad_norm": 9.307627557575415e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393800 + }, + { + "epoch": 1.9099073320780384, + "grad_norm": 6.968974286536422e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393810 + }, + { + "epoch": 1.9099558302708746, + "grad_norm": 1.0599399580257796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393820 + }, + { + "epoch": 1.9100043284637107, + "grad_norm": 1.6554668746948664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393830 + }, + { + "epoch": 1.9100528266565466, + "grad_norm": 1.2881540278897319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393840 + }, + { + "epoch": 1.9101013248493828, + "grad_norm": 1.1074914318953688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393850 + }, + { + "epoch": 1.910149823042219, + "grad_norm": 9.124289768180915e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393860 + }, + { + "epoch": 1.9101983212350548, + "grad_norm": 1.517565451081282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393870 + }, + { + "epoch": 1.9102468194278912, + "grad_norm": 1.3454527270084782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393880 + }, + { + "epoch": 1.9102953176207271, + "grad_norm": 1.9284229324512125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393890 + }, + { + "epoch": 1.9103438158135633, + "grad_norm": 1.1410580924575697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393900 + }, + { + "epoch": 1.9103923140063994, + "grad_norm": 1.2653287306818584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393910 + }, + { + "epoch": 1.9104408121992353, + "grad_norm": 1.2736192545048652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393920 + }, + { + "epoch": 1.9104893103920715, + "grad_norm": 1.0305317488246146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393930 + }, + { + "epoch": 1.9105378085849076, + "grad_norm": 1.3933854958736447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393940 + }, + { + "epoch": 1.9105863067777435, + "grad_norm": 1.0794533267244333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393950 + }, + { + "epoch": 1.91063480497058, + "grad_norm": 1.776618496762694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393960 + }, + { + "epoch": 1.9106833031634158, + "grad_norm": 1.1562120150188093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393970 + }, + { + "epoch": 1.910731801356252, + "grad_norm": 1.2096550427997954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393980 + }, + { + "epoch": 1.9107802995490881, + "grad_norm": 1.0369690883749172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 393990 + }, + { + "epoch": 1.910828797741924, + "grad_norm": 1.9067270429218297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394000 + }, + { + "epoch": 1.9108772959347602, + "grad_norm": 1.3779615670728163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394010 + }, + { + "epoch": 1.9109257941275963, + "grad_norm": 9.571355263915393e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394020 + }, + { + "epoch": 1.9109742923204323, + "grad_norm": 8.824386554806551e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394030 + }, + { + "epoch": 1.9110227905132686, + "grad_norm": 9.630697128670818e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394040 + }, + { + "epoch": 1.9110712887061045, + "grad_norm": 1.3473358428939264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394050 + }, + { + "epoch": 1.9111197868989407, + "grad_norm": 1.872087374010789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394060 + }, + { + "epoch": 1.9111682850917768, + "grad_norm": 1.5485126070302613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394070 + }, + { + "epoch": 1.9112167832846128, + "grad_norm": 1.6682820458413516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394080 + }, + { + "epoch": 1.911265281477449, + "grad_norm": 1.1294552848539752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394090 + }, + { + "epoch": 1.911313779670285, + "grad_norm": 1.2750322575527662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394100 + }, + { + "epoch": 1.911362277863121, + "grad_norm": 7.480315922236969e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394110 + }, + { + "epoch": 1.9114107760559573, + "grad_norm": 9.472478801342277e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394120 + }, + { + "epoch": 1.9114592742487932, + "grad_norm": 1.1050169668180843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394130 + }, + { + "epoch": 1.9115077724416294, + "grad_norm": 1.0768077096656725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394140 + }, + { + "epoch": 1.9115562706344655, + "grad_norm": 1.4932021841218557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394150 + }, + { + "epoch": 1.9116047688273015, + "grad_norm": 1.7463300139297644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394160 + }, + { + "epoch": 1.9116532670201378, + "grad_norm": 1.0209427969698481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394170 + }, + { + "epoch": 1.9117017652129737, + "grad_norm": 1.1083279183310424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394180 + }, + { + "epoch": 1.91175026340581, + "grad_norm": 1.0521334914415092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394190 + }, + { + "epoch": 1.911798761598646, + "grad_norm": 1.0090439594989675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394200 + }, + { + "epoch": 1.911847259791482, + "grad_norm": 9.779833831657925e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394210 + }, + { + "epoch": 1.911895757984318, + "grad_norm": 1.4524390579140345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394220 + }, + { + "epoch": 1.9119442561771542, + "grad_norm": 1.1788017673097784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394230 + }, + { + "epoch": 1.9119927543699902, + "grad_norm": 1.2287248551956509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394240 + }, + { + "epoch": 1.9120412525628265, + "grad_norm": 7.984565009167e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394250 + }, + { + "epoch": 1.9120897507556625, + "grad_norm": 1.1133408861496719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394260 + }, + { + "epoch": 1.9121382489484986, + "grad_norm": 1.2692224160559817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394270 + }, + { + "epoch": 1.9121867471413347, + "grad_norm": 1.030110041710941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394280 + }, + { + "epoch": 1.9122352453341707, + "grad_norm": 9.542550749586098e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394290 + }, + { + "epoch": 1.9122837435270068, + "grad_norm": 8.895217895599217e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394300 + }, + { + "epoch": 1.912332241719843, + "grad_norm": 9.47471967549518e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394310 + }, + { + "epoch": 1.9123807399126789, + "grad_norm": 1.0225290836274326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394320 + }, + { + "epoch": 1.9124292381055152, + "grad_norm": 1.0024746366354975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394330 + }, + { + "epoch": 1.9124777362983512, + "grad_norm": 1.0010228201906557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394340 + }, + { + "epoch": 1.9125262344911873, + "grad_norm": 1.1339214900374373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394350 + }, + { + "epoch": 1.9125747326840234, + "grad_norm": 1.3543631993684357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394360 + }, + { + "epoch": 1.9126232308768594, + "grad_norm": 7.2107808612997815e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394370 + }, + { + "epoch": 1.9126717290696955, + "grad_norm": 8.875906232219677e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394380 + }, + { + "epoch": 1.9127202272625317, + "grad_norm": 1.0636465930247141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394390 + }, + { + "epoch": 1.9127687254553676, + "grad_norm": 1.4073828324967508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394400 + }, + { + "epoch": 1.912817223648204, + "grad_norm": 1.0994843258060882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394410 + }, + { + "epoch": 1.9128657218410399, + "grad_norm": 1.184356079875215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394420 + }, + { + "epoch": 1.912914220033876, + "grad_norm": 1.3496045170313664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394430 + }, + { + "epoch": 1.9129627182267122, + "grad_norm": 1.2761711687403476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394440 + }, + { + "epoch": 1.913011216419548, + "grad_norm": 1.2691401707343175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394450 + }, + { + "epoch": 1.9130597146123842, + "grad_norm": 2.4847199853184065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394460 + }, + { + "epoch": 1.9131082128052204, + "grad_norm": 9.444425685956048e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394470 + }, + { + "epoch": 1.9131567109980563, + "grad_norm": 1.3628642214769116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394480 + }, + { + "epoch": 1.9132052091908927, + "grad_norm": 1.1203146854654733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394490 + }, + { + "epoch": 1.9132537073837286, + "grad_norm": 1.6084927167980823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394500 + }, + { + "epoch": 1.9133022055765647, + "grad_norm": 1.0766922464711115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394510 + }, + { + "epoch": 1.9133507037694009, + "grad_norm": 9.337432160805292e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394520 + }, + { + "epoch": 1.9133992019622368, + "grad_norm": 1.3983934010752819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394530 + }, + { + "epoch": 1.913447700155073, + "grad_norm": 1.3628021378053745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394540 + }, + { + "epoch": 1.913496198347909, + "grad_norm": 1.3696067391322231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394550 + }, + { + "epoch": 1.913544696540745, + "grad_norm": 1.540700367286263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394560 + }, + { + "epoch": 1.9135931947335814, + "grad_norm": 1.5143214682211692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394570 + }, + { + "epoch": 1.9136416929264173, + "grad_norm": 1.0005856587724793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394580 + }, + { + "epoch": 1.9136901911192534, + "grad_norm": 1.2492608725267473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394590 + }, + { + "epoch": 1.9137386893120896, + "grad_norm": 1.2685148043090066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394600 + }, + { + "epoch": 1.9137871875049255, + "grad_norm": 1.277396410870324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394610 + }, + { + "epoch": 1.9138356856977616, + "grad_norm": 1.045825559486957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394620 + }, + { + "epoch": 1.9138841838905978, + "grad_norm": 1.2920419401041272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394630 + }, + { + "epoch": 1.9139326820834337, + "grad_norm": 7.340374974518227e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394640 + }, + { + "epoch": 1.91398118027627, + "grad_norm": 8.834562414961056e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394650 + }, + { + "epoch": 1.914029678469106, + "grad_norm": 1.3591884950869826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394660 + }, + { + "epoch": 1.9140781766619421, + "grad_norm": 8.417208263722387e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394670 + }, + { + "epoch": 1.9141266748547783, + "grad_norm": 1.3211184146655341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394680 + }, + { + "epoch": 1.9141751730476142, + "grad_norm": 1.525580017869288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394690 + }, + { + "epoch": 1.9142236712404506, + "grad_norm": 1.2407403993108801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394700 + }, + { + "epoch": 1.9142721694332865, + "grad_norm": 9.736780270941381e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394710 + }, + { + "epoch": 1.9143206676261226, + "grad_norm": 1.2563059925696507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394720 + }, + { + "epoch": 1.9143691658189588, + "grad_norm": 9.289209401686094e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394730 + }, + { + "epoch": 1.9144176640117947, + "grad_norm": 1.1522977239053489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394740 + }, + { + "epoch": 1.9144661622046308, + "grad_norm": 1.8324517014889352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394750 + }, + { + "epoch": 1.914514660397467, + "grad_norm": 1.535525484541722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394760 + }, + { + "epoch": 1.914563158590303, + "grad_norm": 8.72475691693353e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394770 + }, + { + "epoch": 1.9146116567831393, + "grad_norm": 1.825154249956995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394780 + }, + { + "epoch": 1.9146601549759752, + "grad_norm": 1.1599139426721194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394790 + }, + { + "epoch": 1.9147086531688113, + "grad_norm": 7.762062992355823e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394800 + }, + { + "epoch": 1.9147571513616475, + "grad_norm": 1.869951837818462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394810 + }, + { + "epoch": 1.9148056495544834, + "grad_norm": 8.477130997164295e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394820 + }, + { + "epoch": 1.9148541477473195, + "grad_norm": 1.015342210308745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394830 + }, + { + "epoch": 1.9149026459401557, + "grad_norm": 1.3688888245155795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394840 + }, + { + "epoch": 1.9149511441329916, + "grad_norm": 1.852276021452326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394850 + }, + { + "epoch": 1.914999642325828, + "grad_norm": 1.062586729716486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394860 + }, + { + "epoch": 1.915048140518664, + "grad_norm": 1.4545202375870758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394870 + }, + { + "epoch": 1.9150966387115, + "grad_norm": 1.501902779921238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394880 + }, + { + "epoch": 1.9151451369043362, + "grad_norm": 1.1070208749686117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394890 + }, + { + "epoch": 1.915193635097172, + "grad_norm": 1.2226987422536695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394900 + }, + { + "epoch": 1.9152421332900083, + "grad_norm": 1.5066879299752145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394910 + }, + { + "epoch": 1.9152906314828444, + "grad_norm": 1.2194502296836163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394920 + }, + { + "epoch": 1.9153391296756803, + "grad_norm": 1.0471357114738566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394930 + }, + { + "epoch": 1.9153876278685167, + "grad_norm": 1.0662676075412492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394940 + }, + { + "epoch": 1.9154361260613526, + "grad_norm": 1.3280597066511746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394950 + }, + { + "epoch": 1.9154846242541888, + "grad_norm": 1.3637803775168322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394960 + }, + { + "epoch": 1.915533122447025, + "grad_norm": 7.797659407060564e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394970 + }, + { + "epoch": 1.9155816206398608, + "grad_norm": 1.0669834793475275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394980 + }, + { + "epoch": 1.915630118832697, + "grad_norm": 1.492493595378619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 394990 + }, + { + "epoch": 1.915678617025533, + "grad_norm": 1.695722318117987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395000 + }, + { + "epoch": 1.915727115218369, + "grad_norm": 6.344384573253592e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395010 + }, + { + "epoch": 1.9157756134112054, + "grad_norm": 9.449081517232116e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395020 + }, + { + "epoch": 1.9158241116040413, + "grad_norm": 1.5055357849291795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395030 + }, + { + "epoch": 1.9158726097968775, + "grad_norm": 1.074806998957456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395040 + }, + { + "epoch": 1.9159211079897136, + "grad_norm": 1.4139254211897878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395050 + }, + { + "epoch": 1.9159696061825495, + "grad_norm": 1.8608995233648784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395060 + }, + { + "epoch": 1.9160181043753857, + "grad_norm": 1.3646542562639752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395070 + }, + { + "epoch": 1.9160666025682218, + "grad_norm": 5.4316267039666855e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395080 + }, + { + "epoch": 1.9161151007610577, + "grad_norm": 1.2596296450340105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395090 + }, + { + "epoch": 1.916163598953894, + "grad_norm": 7.821938652341487e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395100 + }, + { + "epoch": 1.91621209714673, + "grad_norm": 1.8945168989148442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395110 + }, + { + "epoch": 1.9162605953395662, + "grad_norm": 1.3640470086784262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395120 + }, + { + "epoch": 1.9163090935324023, + "grad_norm": 1.4677496551485092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395130 + }, + { + "epoch": 1.9163575917252382, + "grad_norm": 1.259106952034017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395140 + }, + { + "epoch": 1.9164060899180744, + "grad_norm": 9.129196065771339e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395150 + }, + { + "epoch": 1.9164545881109105, + "grad_norm": 1.256562853768628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395160 + }, + { + "epoch": 1.9165030863037467, + "grad_norm": 1.0484482615424895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395170 + }, + { + "epoch": 1.9165515844965828, + "grad_norm": 1.1778973352249977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395180 + }, + { + "epoch": 1.9166000826894187, + "grad_norm": 1.4283033422657354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395190 + }, + { + "epoch": 1.9166485808822549, + "grad_norm": 9.695396485653873e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395200 + }, + { + "epoch": 1.916697079075091, + "grad_norm": 1.0117501503259518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395210 + }, + { + "epoch": 1.916745577267927, + "grad_norm": 7.417142011689748e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395220 + }, + { + "epoch": 1.9167940754607633, + "grad_norm": 5.8806164382474435e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395230 + }, + { + "epoch": 1.9168425736535992, + "grad_norm": 7.810212032666186e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395240 + }, + { + "epoch": 1.9168910718464354, + "grad_norm": 9.87687531761594e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395250 + }, + { + "epoch": 1.9169395700392715, + "grad_norm": 1.0919874782189254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395260 + }, + { + "epoch": 1.9169880682321074, + "grad_norm": 1.3631791695445372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395270 + }, + { + "epoch": 1.9170365664249436, + "grad_norm": 1.4123478386807164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395280 + }, + { + "epoch": 1.9170850646177797, + "grad_norm": 1.0579918274800093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395290 + }, + { + "epoch": 1.9171335628106156, + "grad_norm": 1.191458665061873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395300 + }, + { + "epoch": 1.917182061003452, + "grad_norm": 9.415900947828959e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395310 + }, + { + "epoch": 1.917230559196288, + "grad_norm": 9.884175256047456e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395320 + }, + { + "epoch": 1.917279057389124, + "grad_norm": 1.0608647293963713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395330 + }, + { + "epoch": 1.9173275555819602, + "grad_norm": 1.2726776965621411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395340 + }, + { + "epoch": 1.9173760537747961, + "grad_norm": 1.0874467548660505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395350 + }, + { + "epoch": 1.9174245519676323, + "grad_norm": 1.14571312437306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395360 + }, + { + "epoch": 1.9174730501604684, + "grad_norm": 9.640503506602727e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395370 + }, + { + "epoch": 1.9175215483533044, + "grad_norm": 9.564855574240028e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395380 + }, + { + "epoch": 1.9175700465461407, + "grad_norm": 1.2935331028529617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395390 + }, + { + "epoch": 1.9176185447389766, + "grad_norm": 1.2040162644666452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395400 + }, + { + "epoch": 1.9176670429318128, + "grad_norm": 1.4898327904688813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395410 + }, + { + "epoch": 1.917715541124649, + "grad_norm": 1.1908626085244123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395420 + }, + { + "epoch": 1.9177640393174848, + "grad_norm": 9.136098988449248e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395430 + }, + { + "epoch": 1.917812537510321, + "grad_norm": 1.2403650551107148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395440 + }, + { + "epoch": 1.9178610357031571, + "grad_norm": 1.1898612761740424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395450 + }, + { + "epoch": 1.917909533895993, + "grad_norm": 1.0670423655767536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395460 + }, + { + "epoch": 1.9179580320888294, + "grad_norm": 1.747252298400781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395470 + }, + { + "epoch": 1.9180065302816653, + "grad_norm": 1.952986927733491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395480 + }, + { + "epoch": 1.9180550284745015, + "grad_norm": 1.1821565948366697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395490 + }, + { + "epoch": 1.9181035266673376, + "grad_norm": 1.5068636116666312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395500 + }, + { + "epoch": 1.9181520248601736, + "grad_norm": 1.3492991612906735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395510 + }, + { + "epoch": 1.9182005230530097, + "grad_norm": 1.423695739077857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395520 + }, + { + "epoch": 1.9182490212458458, + "grad_norm": 7.89153098423867e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395530 + }, + { + "epoch": 1.9182975194386818, + "grad_norm": 1.3082354755056258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395540 + }, + { + "epoch": 1.9183460176315181, + "grad_norm": 2.2959676826417308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395550 + }, + { + "epoch": 1.918394515824354, + "grad_norm": 1.629092594157555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395560 + }, + { + "epoch": 1.9184430140171902, + "grad_norm": 1.7718983613690398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395570 + }, + { + "epoch": 1.9184915122100263, + "grad_norm": 1.5369201022963352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395580 + }, + { + "epoch": 1.9185400104028623, + "grad_norm": 2.1972299535377715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395590 + }, + { + "epoch": 1.9185885085956984, + "grad_norm": 1.1826013945892555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395600 + }, + { + "epoch": 1.9186370067885345, + "grad_norm": 8.01314570253453e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395610 + }, + { + "epoch": 1.9186855049813705, + "grad_norm": 9.043071180769857e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395620 + }, + { + "epoch": 1.9187340031742068, + "grad_norm": 9.893091679202826e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395630 + }, + { + "epoch": 1.9187825013670428, + "grad_norm": 1.7716246247800882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395640 + }, + { + "epoch": 1.918830999559879, + "grad_norm": 1.6977729444533907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395650 + }, + { + "epoch": 1.918879497752715, + "grad_norm": 1.9847490762003872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395660 + }, + { + "epoch": 1.918927995945551, + "grad_norm": 1.4420226790434754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395670 + }, + { + "epoch": 1.9189764941383873, + "grad_norm": 1.2045522801429343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395680 + }, + { + "epoch": 1.9190249923312233, + "grad_norm": 1.1489188267432837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395690 + }, + { + "epoch": 1.9190734905240594, + "grad_norm": 1.46086973629167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395700 + }, + { + "epoch": 1.9191219887168955, + "grad_norm": 1.2855362108155077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395710 + }, + { + "epoch": 1.9191704869097315, + "grad_norm": 1.0350132306768955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395720 + }, + { + "epoch": 1.9192189851025676, + "grad_norm": 1.693015150294741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395730 + }, + { + "epoch": 1.9192674832954038, + "grad_norm": 8.971911213961903e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395740 + }, + { + "epoch": 1.9193159814882397, + "grad_norm": 1.2914782132611435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395750 + }, + { + "epoch": 1.919364479681076, + "grad_norm": 1.8983310923204044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395760 + }, + { + "epoch": 1.919412977873912, + "grad_norm": 1.1846381653413118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395770 + }, + { + "epoch": 1.919461476066748, + "grad_norm": 1.4645139323476997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395780 + }, + { + "epoch": 1.9195099742595843, + "grad_norm": 1.0223958568644775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395790 + }, + { + "epoch": 1.9195584724524202, + "grad_norm": 1.382876035904701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395800 + }, + { + "epoch": 1.9196069706452563, + "grad_norm": 1.3138353516239931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395810 + }, + { + "epoch": 1.9196554688380925, + "grad_norm": 1.30745299031787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395820 + }, + { + "epoch": 1.9197039670309284, + "grad_norm": 1.562656315456934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395830 + }, + { + "epoch": 1.9197524652237647, + "grad_norm": 6.96400581645662e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395840 + }, + { + "epoch": 1.9198009634166007, + "grad_norm": 1.0406854933364684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395850 + }, + { + "epoch": 1.9198494616094368, + "grad_norm": 2.2104986285853556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395860 + }, + { + "epoch": 1.919897959802273, + "grad_norm": 1.1643940034389288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395870 + }, + { + "epoch": 1.9199464579951089, + "grad_norm": 9.142805623696404e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395880 + }, + { + "epoch": 1.919994956187945, + "grad_norm": 1.1081354500674934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395890 + }, + { + "epoch": 1.9200434543807812, + "grad_norm": 1.1361827922939938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395900 + }, + { + "epoch": 1.920091952573617, + "grad_norm": 1.5069803183109798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395910 + }, + { + "epoch": 1.9201404507664535, + "grad_norm": 1.3845827595559967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395920 + }, + { + "epoch": 1.9201889489592894, + "grad_norm": 1.161603702115599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395930 + }, + { + "epoch": 1.9202374471521255, + "grad_norm": 1.3932004883088211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395940 + }, + { + "epoch": 1.9202859453449617, + "grad_norm": 1.3779057006502171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395950 + }, + { + "epoch": 1.9203344435377976, + "grad_norm": 2.491388428893515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395960 + }, + { + "epoch": 1.9203829417306337, + "grad_norm": 1.6087852827695315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395970 + }, + { + "epoch": 1.9204314399234699, + "grad_norm": 1.2742678912047722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395980 + }, + { + "epoch": 1.9204799381163058, + "grad_norm": 2.018786737778555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 395990 + }, + { + "epoch": 1.9205284363091422, + "grad_norm": 1.19768746031923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396000 + }, + { + "epoch": 1.920576934501978, + "grad_norm": 1.2954674666332266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396010 + }, + { + "epoch": 1.9206254326948142, + "grad_norm": 1.0467400279878802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396020 + }, + { + "epoch": 1.9206739308876504, + "grad_norm": 1.698402485317274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396030 + }, + { + "epoch": 1.9207224290804863, + "grad_norm": 1.3640707230422322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396040 + }, + { + "epoch": 1.9207709272733224, + "grad_norm": 8.870133960670046e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396050 + }, + { + "epoch": 1.9208194254661586, + "grad_norm": 1.2908759394747449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396060 + }, + { + "epoch": 1.9208679236589945, + "grad_norm": 1.1792701926083282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396070 + }, + { + "epoch": 1.9209164218518309, + "grad_norm": 9.670176659426488e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396080 + }, + { + "epoch": 1.9209649200446668, + "grad_norm": 1.6469892116788287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396090 + }, + { + "epoch": 1.921013418237503, + "grad_norm": 1.1185786519263274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396100 + }, + { + "epoch": 1.921061916430339, + "grad_norm": 7.72397967807592e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396110 + }, + { + "epoch": 1.921110414623175, + "grad_norm": 7.377344068970615e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396120 + }, + { + "epoch": 1.9211589128160111, + "grad_norm": 2.0655106069966678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396130 + }, + { + "epoch": 1.9212074110088473, + "grad_norm": 9.109877296964441e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396140 + }, + { + "epoch": 1.9212559092016832, + "grad_norm": 1.045292297163769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396150 + }, + { + "epoch": 1.9213044073945196, + "grad_norm": 1.0445145193216376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396160 + }, + { + "epoch": 1.9213529055873555, + "grad_norm": 1.6800736801769744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396170 + }, + { + "epoch": 1.9214014037801916, + "grad_norm": 1.0254711746426892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396180 + }, + { + "epoch": 1.9214499019730278, + "grad_norm": 9.804861811346655e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396190 + }, + { + "epoch": 1.9214984001658637, + "grad_norm": 8.971073661712126e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396200 + }, + { + "epoch": 1.9215468983587, + "grad_norm": 1.836747109962289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396210 + }, + { + "epoch": 1.921595396551536, + "grad_norm": 2.1942394567986412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396220 + }, + { + "epoch": 1.9216438947443721, + "grad_norm": 1.5275421816340895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396230 + }, + { + "epoch": 1.9216923929372083, + "grad_norm": 8.110568217034597e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396240 + }, + { + "epoch": 1.9217408911300442, + "grad_norm": 1.5831508548558304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396250 + }, + { + "epoch": 1.9217893893228803, + "grad_norm": 9.72857971959229e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396260 + }, + { + "epoch": 1.9218378875157165, + "grad_norm": 1.2799541870833764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396270 + }, + { + "epoch": 1.9218863857085524, + "grad_norm": 1.1800697308217423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396280 + }, + { + "epoch": 1.9219348839013888, + "grad_norm": 8.08289879472568e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396290 + }, + { + "epoch": 1.9219833820942247, + "grad_norm": 7.667303236758016e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396300 + }, + { + "epoch": 1.9220318802870608, + "grad_norm": 1.1917631326241462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396310 + }, + { + "epoch": 1.922080378479897, + "grad_norm": 1.369375723925259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396320 + }, + { + "epoch": 1.922128876672733, + "grad_norm": 1.507930313948691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396330 + }, + { + "epoch": 1.922177374865569, + "grad_norm": 1.104748115210441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396340 + }, + { + "epoch": 1.9222258730584052, + "grad_norm": 7.273674551555587e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396350 + }, + { + "epoch": 1.9222743712512411, + "grad_norm": 2.1179022979822548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396360 + }, + { + "epoch": 1.9223228694440775, + "grad_norm": 1.5953412813018986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396370 + }, + { + "epoch": 1.9223713676369134, + "grad_norm": 1.6877098829581882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396380 + }, + { + "epoch": 1.9224198658297496, + "grad_norm": 7.868259821464108e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396390 + }, + { + "epoch": 1.9224683640225857, + "grad_norm": 1.3812718968608806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396400 + }, + { + "epoch": 1.9225168622154216, + "grad_norm": 1.0679892525899959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396410 + }, + { + "epoch": 1.9225653604082578, + "grad_norm": 1.1853583004040047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396420 + }, + { + "epoch": 1.922613858601094, + "grad_norm": 1.2685924311028884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396430 + }, + { + "epoch": 1.9226623567939298, + "grad_norm": 1.315334419160763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396440 + }, + { + "epoch": 1.9227108549867662, + "grad_norm": 1.3769503759419877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396450 + }, + { + "epoch": 1.9227593531796021, + "grad_norm": 1.5125047880815146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396460 + }, + { + "epoch": 1.9228078513724383, + "grad_norm": 1.2550303019054354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396470 + }, + { + "epoch": 1.9228563495652744, + "grad_norm": 1.3344111593482921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396480 + }, + { + "epoch": 1.9229048477581103, + "grad_norm": 1.0428057528599766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396490 + }, + { + "epoch": 1.9229533459509465, + "grad_norm": 8.917105276395887e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396500 + }, + { + "epoch": 1.9230018441437826, + "grad_norm": 1.0589957355477964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396510 + }, + { + "epoch": 1.9230503423366185, + "grad_norm": 8.38983282847039e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396520 + }, + { + "epoch": 1.923098840529455, + "grad_norm": 1.208886057924019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396530 + }, + { + "epoch": 1.9231473387222908, + "grad_norm": 1.3570684131991584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396540 + }, + { + "epoch": 1.923195836915127, + "grad_norm": 1.4595006092577023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396550 + }, + { + "epoch": 1.923244335107963, + "grad_norm": 1.2080808353687189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396560 + }, + { + "epoch": 1.923292833300799, + "grad_norm": 1.3364424233941463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396570 + }, + { + "epoch": 1.9233413314936352, + "grad_norm": 1.499207513688816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396580 + }, + { + "epoch": 1.9233898296864713, + "grad_norm": 2.0427405544864996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396590 + }, + { + "epoch": 1.9234383278793072, + "grad_norm": 9.561478719888328e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396600 + }, + { + "epoch": 1.9234868260721436, + "grad_norm": 1.0405882377995113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396610 + }, + { + "epoch": 1.9235353242649795, + "grad_norm": 1.6506824351836258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396620 + }, + { + "epoch": 1.9235838224578157, + "grad_norm": 1.0521119087059105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396630 + }, + { + "epoch": 1.9236323206506518, + "grad_norm": 8.229463333009335e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396640 + }, + { + "epoch": 1.9236808188434877, + "grad_norm": 1.2097191692816978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396650 + }, + { + "epoch": 1.9237293170363239, + "grad_norm": 1.1365134611196481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396660 + }, + { + "epoch": 1.92377781522916, + "grad_norm": 1.2659997494779418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396670 + }, + { + "epoch": 1.923826313421996, + "grad_norm": 7.923212308469374e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396680 + }, + { + "epoch": 1.9238748116148323, + "grad_norm": 1.030977969662672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396690 + }, + { + "epoch": 1.9239233098076682, + "grad_norm": 1.1226987339796324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396700 + }, + { + "epoch": 1.9239718080005044, + "grad_norm": 1.672110450101627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396710 + }, + { + "epoch": 1.9240203061933405, + "grad_norm": 1.2986610897769424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396720 + }, + { + "epoch": 1.9240688043861764, + "grad_norm": 1.1266839017309849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396730 + }, + { + "epoch": 1.9241173025790128, + "grad_norm": 1.1153436396682537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396740 + }, + { + "epoch": 1.9241658007718487, + "grad_norm": 1.2550779970865733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396750 + }, + { + "epoch": 1.9242142989646849, + "grad_norm": 1.669494942291294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396760 + }, + { + "epoch": 1.924262797157521, + "grad_norm": 1.1681319023182368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396770 + }, + { + "epoch": 1.924311295350357, + "grad_norm": 1.757280720937615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396780 + }, + { + "epoch": 1.924359793543193, + "grad_norm": 1.0309268994035392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396790 + }, + { + "epoch": 1.9244082917360292, + "grad_norm": 8.280279573114058e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396800 + }, + { + "epoch": 1.9244567899288652, + "grad_norm": 1.328603627115399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396810 + }, + { + "epoch": 1.9245052881217015, + "grad_norm": 1.1133633570636903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396820 + }, + { + "epoch": 1.9245537863145374, + "grad_norm": 1.5437647604699123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396830 + }, + { + "epoch": 1.9246022845073736, + "grad_norm": 9.949208568116319e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396840 + }, + { + "epoch": 1.9246507827002097, + "grad_norm": 1.0769297453805393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396850 + }, + { + "epoch": 1.9246992808930456, + "grad_norm": 1.5934686459218028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396860 + }, + { + "epoch": 1.9247477790858818, + "grad_norm": 9.670519496296492e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396870 + }, + { + "epoch": 1.924796277278718, + "grad_norm": 8.415820929030815e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396880 + }, + { + "epoch": 1.9248447754715539, + "grad_norm": 1.1375320241313602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396890 + }, + { + "epoch": 1.9248932736643902, + "grad_norm": 9.449233395741885e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396900 + }, + { + "epoch": 1.9249417718572261, + "grad_norm": 1.6576379380239814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396910 + }, + { + "epoch": 1.9249902700500623, + "grad_norm": 1.415355388445505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396920 + }, + { + "epoch": 1.9250387682428984, + "grad_norm": 1.5608447867521136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396930 + }, + { + "epoch": 1.9250872664357344, + "grad_norm": 2.2997451054607154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396940 + }, + { + "epoch": 1.9251357646285705, + "grad_norm": 1.0076333545327998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396950 + }, + { + "epoch": 1.9251842628214066, + "grad_norm": 8.98874308319364e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396960 + }, + { + "epoch": 1.9252327610142426, + "grad_norm": 9.755542151879126e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396970 + }, + { + "epoch": 1.925281259207079, + "grad_norm": 6.263343177437264e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396980 + }, + { + "epoch": 1.9253297573999149, + "grad_norm": 2.0384382182214722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 396990 + }, + { + "epoch": 1.925378255592751, + "grad_norm": 1.1451082748692443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397000 + }, + { + "epoch": 1.9254267537855871, + "grad_norm": 1.6060491603298033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397010 + }, + { + "epoch": 1.925475251978423, + "grad_norm": 1.1698132240667292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397020 + }, + { + "epoch": 1.9255237501712592, + "grad_norm": 1.2032831619990247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397030 + }, + { + "epoch": 1.9255722483640954, + "grad_norm": 1.3454899416842636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397040 + }, + { + "epoch": 1.9256207465569313, + "grad_norm": 1.8672638546490816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397050 + }, + { + "epoch": 1.9256692447497676, + "grad_norm": 9.136635448214747e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397060 + }, + { + "epoch": 1.9257177429426036, + "grad_norm": 9.364206299267153e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397070 + }, + { + "epoch": 1.9257662411354397, + "grad_norm": 1.2540239069380732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397080 + }, + { + "epoch": 1.9258147393282758, + "grad_norm": 7.687781966581042e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397090 + }, + { + "epoch": 1.9258632375211118, + "grad_norm": 9.405789036520673e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397100 + }, + { + "epoch": 1.925911735713948, + "grad_norm": 8.84841178105944e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397110 + }, + { + "epoch": 1.925960233906784, + "grad_norm": 1.176658681600884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397120 + }, + { + "epoch": 1.92600873209962, + "grad_norm": 1.2518716729914559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397130 + }, + { + "epoch": 1.9260572302924563, + "grad_norm": 1.615048361713889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397140 + }, + { + "epoch": 1.9261057284852923, + "grad_norm": 1.096452351134758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397150 + }, + { + "epoch": 1.9261542266781284, + "grad_norm": 1.2905483792735595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397160 + }, + { + "epoch": 1.9262027248709646, + "grad_norm": 8.386948024963203e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397170 + }, + { + "epoch": 1.9262512230638005, + "grad_norm": 9.097820274917012e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397180 + }, + { + "epoch": 1.9262997212566366, + "grad_norm": 1.433428398200931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397190 + }, + { + "epoch": 1.9263482194494728, + "grad_norm": 1.0534566996511785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397200 + }, + { + "epoch": 1.926396717642309, + "grad_norm": 6.6389400643629415e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397210 + }, + { + "epoch": 1.926445215835145, + "grad_norm": 1.3946667820619041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397220 + }, + { + "epoch": 1.926493714027981, + "grad_norm": 9.397808753419667e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397230 + }, + { + "epoch": 1.9265422122208171, + "grad_norm": 9.044237359034923e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397240 + }, + { + "epoch": 1.9265907104136533, + "grad_norm": 1.1733802374180868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397250 + }, + { + "epoch": 1.9266392086064892, + "grad_norm": 1.0795166538457579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397260 + }, + { + "epoch": 1.9266877067993256, + "grad_norm": 1.1124792642647208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397270 + }, + { + "epoch": 1.9267362049921615, + "grad_norm": 9.28685217616021e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397280 + }, + { + "epoch": 1.9267847031849976, + "grad_norm": 1.9756486224764558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397290 + }, + { + "epoch": 1.9268332013778338, + "grad_norm": 7.572950266876433e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397300 + }, + { + "epoch": 1.9268816995706697, + "grad_norm": 9.25767640325148e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397310 + }, + { + "epoch": 1.9269301977635058, + "grad_norm": 1.5406632414283195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397320 + }, + { + "epoch": 1.926978695956342, + "grad_norm": 6.16854656243504e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397330 + }, + { + "epoch": 1.927027194149178, + "grad_norm": 2.0305831682776443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397340 + }, + { + "epoch": 1.9270756923420143, + "grad_norm": 8.965974629404627e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397350 + }, + { + "epoch": 1.9271241905348502, + "grad_norm": 9.780887211263689e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397360 + }, + { + "epoch": 1.9271726887276863, + "grad_norm": 1.2439081764625826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397370 + }, + { + "epoch": 1.9272211869205225, + "grad_norm": 1.1922630882565954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397380 + }, + { + "epoch": 1.9272696851133584, + "grad_norm": 9.492938879418489e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397390 + }, + { + "epoch": 1.9273181833061945, + "grad_norm": 8.868236811565566e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397400 + }, + { + "epoch": 1.9273666814990307, + "grad_norm": 1.534326266039443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397410 + }, + { + "epoch": 1.9274151796918666, + "grad_norm": 8.35641955632127e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397420 + }, + { + "epoch": 1.927463677884703, + "grad_norm": 1.4512488100137944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397430 + }, + { + "epoch": 1.9275121760775389, + "grad_norm": 1.1482609529878118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397440 + }, + { + "epoch": 1.927560674270375, + "grad_norm": 1.4169942552655357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397450 + }, + { + "epoch": 1.9276091724632112, + "grad_norm": 7.49823048096232e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397460 + }, + { + "epoch": 1.927657670656047, + "grad_norm": 1.3035567292263295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397470 + }, + { + "epoch": 1.9277061688488832, + "grad_norm": 6.755510373324114e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397480 + }, + { + "epoch": 1.9277546670417194, + "grad_norm": 1.0852392762217278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397490 + }, + { + "epoch": 1.9278031652345553, + "grad_norm": 1.3437246870751096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397500 + }, + { + "epoch": 1.9278516634273917, + "grad_norm": 1.3488244299253438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397510 + }, + { + "epoch": 1.9279001616202276, + "grad_norm": 1.943704575069205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397520 + }, + { + "epoch": 1.9279486598130637, + "grad_norm": 2.263834097959716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397530 + }, + { + "epoch": 1.9279971580058999, + "grad_norm": 1.1717591341664502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397540 + }, + { + "epoch": 1.9280456561987358, + "grad_norm": 8.335768519884823e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397550 + }, + { + "epoch": 1.928094154391572, + "grad_norm": 1.0280729156875168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397560 + }, + { + "epoch": 1.928142652584408, + "grad_norm": 1.3099367812685614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397570 + }, + { + "epoch": 1.928191150777244, + "grad_norm": 9.708671200314711e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397580 + }, + { + "epoch": 1.9282396489700804, + "grad_norm": 1.4434148098985133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397590 + }, + { + "epoch": 1.9282881471629163, + "grad_norm": 8.023047115557347e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397600 + }, + { + "epoch": 1.9283366453557524, + "grad_norm": 9.782340271158319e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397610 + }, + { + "epoch": 1.9283851435485886, + "grad_norm": 1.1495889573609475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397620 + }, + { + "epoch": 1.9284336417414245, + "grad_norm": 1.0190116306318941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397630 + }, + { + "epoch": 1.9284821399342607, + "grad_norm": 1.0631945990269287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397640 + }, + { + "epoch": 1.9285306381270968, + "grad_norm": 1.2013281036615808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397650 + }, + { + "epoch": 1.9285791363199327, + "grad_norm": 6.531373664131479e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397660 + }, + { + "epoch": 1.928627634512769, + "grad_norm": 1.0118307969264606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397670 + }, + { + "epoch": 1.928676132705605, + "grad_norm": 1.9946231333278774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397680 + }, + { + "epoch": 1.9287246308984412, + "grad_norm": 1.1159442259156549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397690 + }, + { + "epoch": 1.9287731290912773, + "grad_norm": 1.00308925610193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397700 + }, + { + "epoch": 1.9288216272841132, + "grad_norm": 1.4650113122627317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397710 + }, + { + "epoch": 1.9288701254769494, + "grad_norm": 1.1350988593505917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397720 + }, + { + "epoch": 1.9289186236697855, + "grad_norm": 7.55681828223942e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397730 + }, + { + "epoch": 1.9289671218626216, + "grad_norm": 1.41684521892671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397740 + }, + { + "epoch": 1.9290156200554578, + "grad_norm": 1.5454133972525597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397750 + }, + { + "epoch": 1.9290641182482937, + "grad_norm": 9.710381831951054e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397760 + }, + { + "epoch": 1.9291126164411299, + "grad_norm": 1.3277686505830388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397770 + }, + { + "epoch": 1.929161114633966, + "grad_norm": 1.0718968823653086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397780 + }, + { + "epoch": 1.929209612826802, + "grad_norm": 7.987949857124477e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397790 + }, + { + "epoch": 1.9292581110196383, + "grad_norm": 9.226647890159256e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397800 + }, + { + "epoch": 1.9293066092124742, + "grad_norm": 9.753438057202857e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397810 + }, + { + "epoch": 1.9293551074053104, + "grad_norm": 8.51606607454869e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397820 + }, + { + "epoch": 1.9294036055981465, + "grad_norm": 1.3851039426526768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397830 + }, + { + "epoch": 1.9294521037909824, + "grad_norm": 1.3612523552808398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397840 + }, + { + "epoch": 1.9295006019838186, + "grad_norm": 1.0041604880939303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397850 + }, + { + "epoch": 1.9295491001766547, + "grad_norm": 1.4304259110531348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397860 + }, + { + "epoch": 1.9295975983694906, + "grad_norm": 9.031826841976454e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397870 + }, + { + "epoch": 1.929646096562327, + "grad_norm": 9.196551964407718e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397880 + }, + { + "epoch": 1.929694594755163, + "grad_norm": 1.4819442561986307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397890 + }, + { + "epoch": 1.929743092947999, + "grad_norm": 1.4967213246563915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397900 + }, + { + "epoch": 1.9297915911408352, + "grad_norm": 1.0684637175017997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397910 + }, + { + "epoch": 1.9298400893336711, + "grad_norm": 7.892052344971034e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397920 + }, + { + "epoch": 1.9298885875265073, + "grad_norm": 1.0394069605013101e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397930 + }, + { + "epoch": 1.9299370857193434, + "grad_norm": 6.43425313029411e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397940 + }, + { + "epoch": 1.9299855839121793, + "grad_norm": 1.6186069373702594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397950 + }, + { + "epoch": 1.9300340821050157, + "grad_norm": 1.1351818152149917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397960 + }, + { + "epoch": 1.9300825802978516, + "grad_norm": 1.0926269666811095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397970 + }, + { + "epoch": 1.9301310784906878, + "grad_norm": 1.2774504121182417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397980 + }, + { + "epoch": 1.930179576683524, + "grad_norm": 1.344413469439587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 397990 + }, + { + "epoch": 1.9302280748763598, + "grad_norm": 1.4455856955919444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398000 + }, + { + "epoch": 1.930276573069196, + "grad_norm": 1.7405778152124185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398010 + }, + { + "epoch": 1.9303250712620321, + "grad_norm": 1.1594067927944707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398020 + }, + { + "epoch": 1.930373569454868, + "grad_norm": 1.1031589863819136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398030 + }, + { + "epoch": 1.9304220676477044, + "grad_norm": 1.0941367811767577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398040 + }, + { + "epoch": 1.9304705658405403, + "grad_norm": 6.166517518835235e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398050 + }, + { + "epoch": 1.9305190640333765, + "grad_norm": 1.0901786140493641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398060 + }, + { + "epoch": 1.9305675622262126, + "grad_norm": 9.353116503518777e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398070 + }, + { + "epoch": 1.9306160604190485, + "grad_norm": 1.0766560976094297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398080 + }, + { + "epoch": 1.9306645586118847, + "grad_norm": 1.3380867081025372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398090 + }, + { + "epoch": 1.9307130568047208, + "grad_norm": 1.1721076553783405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398100 + }, + { + "epoch": 1.9307615549975568, + "grad_norm": 1.234775304226332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398110 + }, + { + "epoch": 1.9308100531903931, + "grad_norm": 7.626917764014252e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398120 + }, + { + "epoch": 1.930858551383229, + "grad_norm": 1.3967483170063133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398130 + }, + { + "epoch": 1.9309070495760652, + "grad_norm": 1.0475710077173517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398140 + }, + { + "epoch": 1.9309555477689013, + "grad_norm": 8.346876079201593e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398150 + }, + { + "epoch": 1.9310040459617372, + "grad_norm": 1.4468317210969417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398160 + }, + { + "epoch": 1.9310525441545734, + "grad_norm": 8.358537861852255e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398170 + }, + { + "epoch": 1.9311010423474095, + "grad_norm": 9.632529440750659e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398180 + }, + { + "epoch": 1.9311495405402455, + "grad_norm": 1.3107960938896213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398190 + }, + { + "epoch": 1.9311980387330818, + "grad_norm": 1.2239672386726852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398200 + }, + { + "epoch": 1.9312465369259177, + "grad_norm": 8.897288239495538e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398210 + }, + { + "epoch": 1.9312950351187539, + "grad_norm": 9.24956111703068e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398220 + }, + { + "epoch": 1.93134353331159, + "grad_norm": 6.943227770506155e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398230 + }, + { + "epoch": 1.931392031504426, + "grad_norm": 1.7049195832896658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398240 + }, + { + "epoch": 1.9314405296972623, + "grad_norm": 1.2933181636753943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398250 + }, + { + "epoch": 1.9314890278900982, + "grad_norm": 8.382056826405915e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398260 + }, + { + "epoch": 1.9315375260829344, + "grad_norm": 8.837670151251587e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398270 + }, + { + "epoch": 1.9315860242757705, + "grad_norm": 8.999872846970902e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398280 + }, + { + "epoch": 1.9316345224686065, + "grad_norm": 1.1407785827088901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398290 + }, + { + "epoch": 1.9316830206614426, + "grad_norm": 9.954614021978614e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398300 + }, + { + "epoch": 1.9317315188542787, + "grad_norm": 1.1260389953804406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398310 + }, + { + "epoch": 1.9317800170471147, + "grad_norm": 1.2186860409713063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398320 + }, + { + "epoch": 1.931828515239951, + "grad_norm": 1.0727865706883222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398330 + }, + { + "epoch": 1.931877013432787, + "grad_norm": 1.4973824846720163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398340 + }, + { + "epoch": 1.931925511625623, + "grad_norm": 1.154199136266243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398350 + }, + { + "epoch": 1.9319740098184592, + "grad_norm": 1.584289321954202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398360 + }, + { + "epoch": 1.9320225080112952, + "grad_norm": 1.3628624451200722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398370 + }, + { + "epoch": 1.9320710062041313, + "grad_norm": 9.411028401018484e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398380 + }, + { + "epoch": 1.9321195043969674, + "grad_norm": 1.0367066316518958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398390 + }, + { + "epoch": 1.9321680025898034, + "grad_norm": 1.0786673776408406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398400 + }, + { + "epoch": 1.9322165007826397, + "grad_norm": 6.6675629462054076e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398410 + }, + { + "epoch": 1.9322649989754757, + "grad_norm": 1.3264772391607949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398420 + }, + { + "epoch": 1.9323134971683118, + "grad_norm": 1.1235727903624593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398430 + }, + { + "epoch": 1.932361995361148, + "grad_norm": 1.0864626531770227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398440 + }, + { + "epoch": 1.9324104935539839, + "grad_norm": 8.22198575889388e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398450 + }, + { + "epoch": 1.93245899174682, + "grad_norm": 1.2298373874841673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398460 + }, + { + "epoch": 1.9325074899396562, + "grad_norm": 1.7406247110329787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398470 + }, + { + "epoch": 1.932555988132492, + "grad_norm": 1.0431988606285358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398480 + }, + { + "epoch": 1.9326044863253284, + "grad_norm": 9.639883558065776e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398490 + }, + { + "epoch": 1.9326529845181644, + "grad_norm": 1.2821537609397637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398500 + }, + { + "epoch": 1.9327014827110005, + "grad_norm": 9.68110214216722e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398510 + }, + { + "epoch": 1.9327499809038367, + "grad_norm": 8.66972449387049e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398520 + }, + { + "epoch": 1.9327984790966726, + "grad_norm": 1.2830816409348245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398530 + }, + { + "epoch": 1.9328469772895087, + "grad_norm": 1.128288840135383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398540 + }, + { + "epoch": 1.9328954754823449, + "grad_norm": 9.261655442571737e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398550 + }, + { + "epoch": 1.9329439736751808, + "grad_norm": 1.0007896733554844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398560 + }, + { + "epoch": 1.9329924718680171, + "grad_norm": 1.5192838986877177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398570 + }, + { + "epoch": 1.933040970060853, + "grad_norm": 9.698450043060802e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398580 + }, + { + "epoch": 1.9330894682536892, + "grad_norm": 1.4488514388233398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398590 + }, + { + "epoch": 1.9331379664465254, + "grad_norm": 9.423471780678483e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398600 + }, + { + "epoch": 1.9331864646393613, + "grad_norm": 1.190510801052369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398610 + }, + { + "epoch": 1.9332349628321974, + "grad_norm": 8.730391520828107e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398620 + }, + { + "epoch": 1.9332834610250336, + "grad_norm": 5.72406388954505e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398630 + }, + { + "epoch": 1.9333319592178695, + "grad_norm": 1.0751427304001027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398640 + }, + { + "epoch": 1.9333804574107059, + "grad_norm": 9.926538702131893e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398650 + }, + { + "epoch": 1.9334289556035418, + "grad_norm": 9.846024440207657e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398660 + }, + { + "epoch": 1.933477453796378, + "grad_norm": 7.854221273362327e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398670 + }, + { + "epoch": 1.933525951989214, + "grad_norm": 1.1670766575377911e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398680 + }, + { + "epoch": 1.93357445018205, + "grad_norm": 1.3122915198948704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398690 + }, + { + "epoch": 1.9336229483748861, + "grad_norm": 1.1556098300502526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398700 + }, + { + "epoch": 1.9336714465677223, + "grad_norm": 1.2427938678172268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398710 + }, + { + "epoch": 1.9337199447605582, + "grad_norm": 7.919302547065854e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398720 + }, + { + "epoch": 1.9337684429533946, + "grad_norm": 1.4599351949584616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398730 + }, + { + "epoch": 1.9338169411462305, + "grad_norm": 1.3690866218496467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398740 + }, + { + "epoch": 1.9338654393390666, + "grad_norm": 8.603886492153379e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398750 + }, + { + "epoch": 1.9339139375319028, + "grad_norm": 1.3327309922317454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398760 + }, + { + "epoch": 1.9339624357247387, + "grad_norm": 1.236762869893937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398770 + }, + { + "epoch": 1.934010933917575, + "grad_norm": 9.383527732609309e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398780 + }, + { + "epoch": 1.934059432110411, + "grad_norm": 1.9112420091005333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398790 + }, + { + "epoch": 1.9341079303032471, + "grad_norm": 8.865963962989554e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398800 + }, + { + "epoch": 1.9341564284960833, + "grad_norm": 1.0805079497799852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398810 + }, + { + "epoch": 1.9342049266889192, + "grad_norm": 1.041438846272058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398820 + }, + { + "epoch": 1.9342534248817553, + "grad_norm": 1.1536388733190961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398830 + }, + { + "epoch": 1.9343019230745915, + "grad_norm": 1.1435181690444551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398840 + }, + { + "epoch": 1.9343504212674274, + "grad_norm": 1.00194590402225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398850 + }, + { + "epoch": 1.9343989194602638, + "grad_norm": 1.4958171590251368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398860 + }, + { + "epoch": 1.9344474176530997, + "grad_norm": 8.139756424441202e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398870 + }, + { + "epoch": 1.9344959158459358, + "grad_norm": 1.2808719418444525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398880 + }, + { + "epoch": 1.934544414038772, + "grad_norm": 9.76900960125704e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398890 + }, + { + "epoch": 1.934592912231608, + "grad_norm": 1.3730686809765302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398900 + }, + { + "epoch": 1.934641410424444, + "grad_norm": 9.467575168287112e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398910 + }, + { + "epoch": 1.9346899086172802, + "grad_norm": 1.1051334958267489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398920 + }, + { + "epoch": 1.934738406810116, + "grad_norm": 1.3636365814306828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398930 + }, + { + "epoch": 1.9347869050029525, + "grad_norm": 1.543264716019621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398940 + }, + { + "epoch": 1.9348354031957884, + "grad_norm": 2.212259708755937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398950 + }, + { + "epoch": 1.9348839013886245, + "grad_norm": 1.3305530011109568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398960 + }, + { + "epoch": 1.9349323995814607, + "grad_norm": 1.8659008560462098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398970 + }, + { + "epoch": 1.9349808977742966, + "grad_norm": 1.2793728743076827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398980 + }, + { + "epoch": 1.9350293959671327, + "grad_norm": 1.1792137932786773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 398990 + }, + { + "epoch": 1.935077894159969, + "grad_norm": 1.1864420557117228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399000 + }, + { + "epoch": 1.9351263923528048, + "grad_norm": 1.1953812162346367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399010 + }, + { + "epoch": 1.9351748905456412, + "grad_norm": 8.955138852684286e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399020 + }, + { + "epoch": 1.935223388738477, + "grad_norm": 1.1651925646560812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399030 + }, + { + "epoch": 1.9352718869313132, + "grad_norm": 1.4968193795539264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399040 + }, + { + "epoch": 1.9353203851241494, + "grad_norm": 1.4895201516651468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399050 + }, + { + "epoch": 1.9353688833169853, + "grad_norm": 8.87026896378984e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399060 + }, + { + "epoch": 1.9354173815098215, + "grad_norm": 1.2192991505344253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399070 + }, + { + "epoch": 1.9354658797026576, + "grad_norm": 1.2432568752274165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399080 + }, + { + "epoch": 1.9355143778954935, + "grad_norm": 9.884362661694013e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399090 + }, + { + "epoch": 1.9355628760883299, + "grad_norm": 1.4784438562287505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399100 + }, + { + "epoch": 1.9356113742811658, + "grad_norm": 1.5761221661136915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399110 + }, + { + "epoch": 1.935659872474002, + "grad_norm": 9.84681314264435e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399120 + }, + { + "epoch": 1.935708370666838, + "grad_norm": 8.23036394592691e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399130 + }, + { + "epoch": 1.935756868859674, + "grad_norm": 9.531879285873401e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399140 + }, + { + "epoch": 1.9358053670525102, + "grad_norm": 1.0637106306887745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399150 + }, + { + "epoch": 1.9358538652453463, + "grad_norm": 1.337240007615037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399160 + }, + { + "epoch": 1.9359023634381822, + "grad_norm": 1.6721388718110575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399170 + }, + { + "epoch": 1.9359508616310186, + "grad_norm": 1.3756976890988426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399180 + }, + { + "epoch": 1.9359993598238545, + "grad_norm": 1.0526325588955388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399190 + }, + { + "epoch": 1.9360478580166907, + "grad_norm": 1.3964458922544054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399200 + }, + { + "epoch": 1.9360963562095268, + "grad_norm": 1.3522146069533392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399210 + }, + { + "epoch": 1.9361448544023627, + "grad_norm": 1.215742972959788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399220 + }, + { + "epoch": 1.9361933525951989, + "grad_norm": 8.837012899221008e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399230 + }, + { + "epoch": 1.936241850788035, + "grad_norm": 9.203664497192676e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399240 + }, + { + "epoch": 1.936290348980871, + "grad_norm": 1.2928475179307952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399250 + }, + { + "epoch": 1.9363388471737073, + "grad_norm": 1.4456380981187067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399260 + }, + { + "epoch": 1.9363873453665432, + "grad_norm": 1.4896178512913139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399270 + }, + { + "epoch": 1.9364358435593794, + "grad_norm": 7.094776321991958e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399280 + }, + { + "epoch": 1.9364843417522155, + "grad_norm": 7.1184782513000755e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399290 + }, + { + "epoch": 1.9365328399450514, + "grad_norm": 1.5476027570571205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399300 + }, + { + "epoch": 1.9365813381378878, + "grad_norm": 8.239686266620083e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399310 + }, + { + "epoch": 1.9366298363307237, + "grad_norm": 1.1867222760031382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399320 + }, + { + "epoch": 1.9366783345235599, + "grad_norm": 1.0986328291551217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399330 + }, + { + "epoch": 1.936726832716396, + "grad_norm": 7.0229888571304855e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399340 + }, + { + "epoch": 1.936775330909232, + "grad_norm": 1.2936882676228834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399350 + }, + { + "epoch": 1.936823829102068, + "grad_norm": 1.0586700405212923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399360 + }, + { + "epoch": 1.9368723272949042, + "grad_norm": 8.784143190609939e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399370 + }, + { + "epoch": 1.9369208254877401, + "grad_norm": 1.1205979255635157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399380 + }, + { + "epoch": 1.9369693236805765, + "grad_norm": 1.2054114151283102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399390 + }, + { + "epoch": 1.9370178218734124, + "grad_norm": 8.853562327715281e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399400 + }, + { + "epoch": 1.9370663200662486, + "grad_norm": 1.2865664977823599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399410 + }, + { + "epoch": 1.9371148182590847, + "grad_norm": 5.894982724186093e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399420 + }, + { + "epoch": 1.9371633164519206, + "grad_norm": 1.2915909231026035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399430 + }, + { + "epoch": 1.9372118146447568, + "grad_norm": 1.078636824303203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399440 + }, + { + "epoch": 1.937260312837593, + "grad_norm": 1.4592798081025649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399450 + }, + { + "epoch": 1.9373088110304288, + "grad_norm": 1.1144131839557758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399460 + }, + { + "epoch": 1.9373573092232652, + "grad_norm": 1.5269533193418283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399470 + }, + { + "epoch": 1.9374058074161011, + "grad_norm": 1.1435132840631468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399480 + }, + { + "epoch": 1.9374543056089373, + "grad_norm": 8.940827633807658e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399490 + }, + { + "epoch": 1.9375028038017734, + "grad_norm": 1.3766635831302665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399500 + }, + { + "epoch": 1.9375513019946093, + "grad_norm": 1.79918782095001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399510 + }, + { + "epoch": 1.9375998001874455, + "grad_norm": 1.7133505281208272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399520 + }, + { + "epoch": 1.9376482983802816, + "grad_norm": 7.486268494005799e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399530 + }, + { + "epoch": 1.9376967965731176, + "grad_norm": 1.0202577449547334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399540 + }, + { + "epoch": 1.937745294765954, + "grad_norm": 1.1257485610371987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399550 + }, + { + "epoch": 1.9377937929587898, + "grad_norm": 1.0702212449587023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399560 + }, + { + "epoch": 1.937842291151626, + "grad_norm": 1.5074693493488667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399570 + }, + { + "epoch": 1.9378907893444621, + "grad_norm": 1.5183637458449084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399580 + }, + { + "epoch": 1.937939287537298, + "grad_norm": 1.269017424476715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399590 + }, + { + "epoch": 1.9379877857301342, + "grad_norm": 1.8041237836996515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399600 + }, + { + "epoch": 1.9380362839229703, + "grad_norm": 1.2040048069650311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399610 + }, + { + "epoch": 1.9380847821158063, + "grad_norm": 9.937308753649177e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399620 + }, + { + "epoch": 1.9381332803086426, + "grad_norm": 1.2245401137533918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399630 + }, + { + "epoch": 1.9381817785014785, + "grad_norm": 1.852998288143226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399640 + }, + { + "epoch": 1.9382302766943147, + "grad_norm": 1.681445915835411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399650 + }, + { + "epoch": 1.9382787748871508, + "grad_norm": 1.5693036203856536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399660 + }, + { + "epoch": 1.9383272730799868, + "grad_norm": 1.178779029942234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399670 + }, + { + "epoch": 1.938375771272823, + "grad_norm": 1.2738139432144635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399680 + }, + { + "epoch": 1.938424269465659, + "grad_norm": 9.871177653053564e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399690 + }, + { + "epoch": 1.938472767658495, + "grad_norm": 1.9784240024023347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399700 + }, + { + "epoch": 1.9385212658513313, + "grad_norm": 1.9838333642496764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399710 + }, + { + "epoch": 1.9385697640441673, + "grad_norm": 1.3237209550709395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399720 + }, + { + "epoch": 1.9386182622370034, + "grad_norm": 1.3512536867210656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399730 + }, + { + "epoch": 1.9386667604298395, + "grad_norm": 1.0319023857618959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399740 + }, + { + "epoch": 1.9387152586226755, + "grad_norm": 1.2454689723995216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399750 + }, + { + "epoch": 1.9387637568155116, + "grad_norm": 1.3837953005690906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399760 + }, + { + "epoch": 1.9388122550083478, + "grad_norm": 1.135278804298423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399770 + }, + { + "epoch": 1.938860753201184, + "grad_norm": 8.119584116172973e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399780 + }, + { + "epoch": 1.93890925139402, + "grad_norm": 1.1533004773411903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399790 + }, + { + "epoch": 1.938957749586856, + "grad_norm": 6.472364422194232e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399800 + }, + { + "epoch": 1.939006247779692, + "grad_norm": 1.4560474603797502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399810 + }, + { + "epoch": 1.9390547459725282, + "grad_norm": 1.1087474938165087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399820 + }, + { + "epoch": 1.9391032441653642, + "grad_norm": 8.849662336274378e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399830 + }, + { + "epoch": 1.9391517423582005, + "grad_norm": 8.25814616689513e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399840 + }, + { + "epoch": 1.9392002405510365, + "grad_norm": 7.396109946711249e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399850 + }, + { + "epoch": 1.9392487387438726, + "grad_norm": 1.0303553565904622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399860 + }, + { + "epoch": 1.9392972369367087, + "grad_norm": 1.2281689443227606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399870 + }, + { + "epoch": 1.9393457351295447, + "grad_norm": 9.694081093414297e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399880 + }, + { + "epoch": 1.9393942333223808, + "grad_norm": 1.2956032691135988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399890 + }, + { + "epoch": 1.939442731515217, + "grad_norm": 9.712711523945927e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399900 + }, + { + "epoch": 1.9394912297080529, + "grad_norm": 9.095383113333355e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399910 + }, + { + "epoch": 1.9395397279008892, + "grad_norm": 1.1069378302863697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399920 + }, + { + "epoch": 1.9395882260937252, + "grad_norm": 1.216894851552297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399930 + }, + { + "epoch": 1.9396367242865613, + "grad_norm": 1.7270716412554066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399940 + }, + { + "epoch": 1.9396852224793975, + "grad_norm": 9.822920254975998e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399950 + }, + { + "epoch": 1.9397337206722334, + "grad_norm": 1.0350993839836065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399960 + }, + { + "epoch": 1.9397822188650695, + "grad_norm": 7.99804045215069e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399970 + }, + { + "epoch": 1.9398307170579057, + "grad_norm": 1.0758578028458032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399980 + }, + { + "epoch": 1.9398792152507416, + "grad_norm": 1.0452428256257917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 399990 + }, + { + "epoch": 1.939927713443578, + "grad_norm": 1.7316539313583235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400000 + }, + { + "epoch": 1.9399762116364139, + "grad_norm": 8.36856717256751e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400010 + }, + { + "epoch": 1.94002470982925, + "grad_norm": 1.305113350724696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400020 + }, + { + "epoch": 1.9400732080220862, + "grad_norm": 1.183195230680667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400030 + }, + { + "epoch": 1.940121706214922, + "grad_norm": 1.095877522061528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400040 + }, + { + "epoch": 1.9401702044077582, + "grad_norm": 1.2451319975070874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400050 + }, + { + "epoch": 1.9402187026005944, + "grad_norm": 1.4305302720174495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400060 + }, + { + "epoch": 1.9402672007934303, + "grad_norm": 1.2456605524846509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400070 + }, + { + "epoch": 1.9403156989862667, + "grad_norm": 7.901141962918246e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400080 + }, + { + "epoch": 1.9403641971791026, + "grad_norm": 9.698347014364117e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400090 + }, + { + "epoch": 1.9404126953719387, + "grad_norm": 9.559364855249441e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400100 + }, + { + "epoch": 1.9404611935647749, + "grad_norm": 6.551390985265471e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400110 + }, + { + "epoch": 1.9405096917576108, + "grad_norm": 1.8839395821146354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400120 + }, + { + "epoch": 1.940558189950447, + "grad_norm": 1.1589786907961752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400130 + }, + { + "epoch": 1.940606688143283, + "grad_norm": 1.032817387169871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400140 + }, + { + "epoch": 1.940655186336119, + "grad_norm": 1.5092441074671115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400150 + }, + { + "epoch": 1.9407036845289554, + "grad_norm": 1.0841790576421317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400160 + }, + { + "epoch": 1.9407521827217913, + "grad_norm": 1.3604673831935088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400170 + }, + { + "epoch": 1.9408006809146274, + "grad_norm": 1.2665230642028291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400180 + }, + { + "epoch": 1.9408491791074636, + "grad_norm": 1.05356496860054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400190 + }, + { + "epoch": 1.9408976773002995, + "grad_norm": 1.0418111706655964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400200 + }, + { + "epoch": 1.9409461754931356, + "grad_norm": 1.0004078454528553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400210 + }, + { + "epoch": 1.9409946736859718, + "grad_norm": 1.0858649091005645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400220 + }, + { + "epoch": 1.9410431718788077, + "grad_norm": 9.006178913750773e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400230 + }, + { + "epoch": 1.941091670071644, + "grad_norm": 1.1600302052272582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400240 + }, + { + "epoch": 1.94114016826448, + "grad_norm": 9.524175226260922e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400250 + }, + { + "epoch": 1.9411886664573161, + "grad_norm": 1.2209152799869116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400260 + }, + { + "epoch": 1.9412371646501523, + "grad_norm": 1.2267866722481813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400270 + }, + { + "epoch": 1.9412856628429882, + "grad_norm": 1.6026744376063107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400280 + }, + { + "epoch": 1.9413341610358246, + "grad_norm": 1.2186547770909328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400290 + }, + { + "epoch": 1.9413826592286605, + "grad_norm": 1.0683761431096173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400300 + }, + { + "epoch": 1.9414311574214966, + "grad_norm": 1.4287585337058317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400310 + }, + { + "epoch": 1.9414796556143328, + "grad_norm": 1.2872109600436943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400320 + }, + { + "epoch": 1.9415281538071687, + "grad_norm": 1.343468714054552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400330 + }, + { + "epoch": 1.9415766520000048, + "grad_norm": 1.2277462602128253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400340 + }, + { + "epoch": 1.941625150192841, + "grad_norm": 9.222471675229826e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400350 + }, + { + "epoch": 1.941673648385677, + "grad_norm": 8.122974293200969e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400360 + }, + { + "epoch": 1.9417221465785133, + "grad_norm": 7.535703616667888e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400370 + }, + { + "epoch": 1.9417706447713492, + "grad_norm": 1.0634567892964242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400380 + }, + { + "epoch": 1.9418191429641853, + "grad_norm": 7.553957459549565e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400390 + }, + { + "epoch": 1.9418676411570215, + "grad_norm": 6.952511011348861e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400400 + }, + { + "epoch": 1.9419161393498574, + "grad_norm": 8.854552646653246e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400410 + }, + { + "epoch": 1.9419646375426936, + "grad_norm": 1.2335260812790239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400420 + }, + { + "epoch": 1.9420131357355297, + "grad_norm": 9.335445305680423e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400430 + }, + { + "epoch": 1.9420616339283656, + "grad_norm": 1.480793887509435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400440 + }, + { + "epoch": 1.942110132121202, + "grad_norm": 1.3029310963474927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400450 + }, + { + "epoch": 1.942158630314038, + "grad_norm": 1.4313221718964542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400460 + }, + { + "epoch": 1.942207128506874, + "grad_norm": 9.843700077283302e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400470 + }, + { + "epoch": 1.9422556266997102, + "grad_norm": 1.1292154766806561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400480 + }, + { + "epoch": 1.9423041248925461, + "grad_norm": 8.364172465746833e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400490 + }, + { + "epoch": 1.9423526230853823, + "grad_norm": 1.492534629221609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400500 + }, + { + "epoch": 1.9424011212782184, + "grad_norm": 1.2482908040567509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400510 + }, + { + "epoch": 1.9424496194710543, + "grad_norm": 6.098061611226058e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400520 + }, + { + "epoch": 1.9424981176638907, + "grad_norm": 9.508557496928915e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400530 + }, + { + "epoch": 1.9425466158567266, + "grad_norm": 1.0182056087160163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400540 + }, + { + "epoch": 1.9425951140495628, + "grad_norm": 1.194654686287322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400550 + }, + { + "epoch": 1.942643612242399, + "grad_norm": 1.315766962051157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400560 + }, + { + "epoch": 1.9426921104352348, + "grad_norm": 7.525958523046938e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400570 + }, + { + "epoch": 1.942740608628071, + "grad_norm": 1.8530744938516364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400580 + }, + { + "epoch": 1.942789106820907, + "grad_norm": 1.9903570347423738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400590 + }, + { + "epoch": 1.942837605013743, + "grad_norm": 9.867206607339085e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400600 + }, + { + "epoch": 1.9428861032065794, + "grad_norm": 1.174407326942628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400610 + }, + { + "epoch": 1.9429346013994153, + "grad_norm": 1.3647302843367015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400620 + }, + { + "epoch": 1.9429830995922515, + "grad_norm": 1.146480510527681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400630 + }, + { + "epoch": 1.9430315977850876, + "grad_norm": 1.4233845213595941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400640 + }, + { + "epoch": 1.9430800959779235, + "grad_norm": 1.4955038096786666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400650 + }, + { + "epoch": 1.9431285941707597, + "grad_norm": 1.3062686043951999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400660 + }, + { + "epoch": 1.9431770923635958, + "grad_norm": 1.3884820404541642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400670 + }, + { + "epoch": 1.9432255905564317, + "grad_norm": 8.144862562176058e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400680 + }, + { + "epoch": 1.943274088749268, + "grad_norm": 1.4361014599728605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400690 + }, + { + "epoch": 1.943322586942104, + "grad_norm": 7.483234476524103e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400700 + }, + { + "epoch": 1.9433710851349402, + "grad_norm": 1.4357287803079544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400710 + }, + { + "epoch": 1.9434195833277763, + "grad_norm": 1.246937042509444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400720 + }, + { + "epoch": 1.9434680815206122, + "grad_norm": 9.95687443605675e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400730 + }, + { + "epoch": 1.9435165797134484, + "grad_norm": 1.2026422524513691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400740 + }, + { + "epoch": 1.9435650779062845, + "grad_norm": 1.2290370499101755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400750 + }, + { + "epoch": 1.9436135760991204, + "grad_norm": 1.6448071349373095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400760 + }, + { + "epoch": 1.9436620742919568, + "grad_norm": 1.1067516680896006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400770 + }, + { + "epoch": 1.9437105724847927, + "grad_norm": 1.1501449570516797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400780 + }, + { + "epoch": 1.9437590706776289, + "grad_norm": 1.0212027667932944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400790 + }, + { + "epoch": 1.943807568870465, + "grad_norm": 1.2912304114820472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400800 + }, + { + "epoch": 1.943856067063301, + "grad_norm": 1.1883896533504412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400810 + }, + { + "epoch": 1.9439045652561373, + "grad_norm": 9.624467445235041e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400820 + }, + { + "epoch": 1.9439530634489732, + "grad_norm": 1.1472542915669237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400830 + }, + { + "epoch": 1.9440015616418094, + "grad_norm": 8.189180888962255e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400840 + }, + { + "epoch": 1.9440500598346455, + "grad_norm": 1.266016891321442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400850 + }, + { + "epoch": 1.9440985580274814, + "grad_norm": 1.722707665408052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400860 + }, + { + "epoch": 1.9441470562203176, + "grad_norm": 1.4305514106638384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400870 + }, + { + "epoch": 1.9441955544131537, + "grad_norm": 7.376934174629923e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400880 + }, + { + "epoch": 1.9442440526059896, + "grad_norm": 9.36237931625783e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400890 + }, + { + "epoch": 1.944292550798826, + "grad_norm": 8.242752258524888e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400900 + }, + { + "epoch": 1.944341048991662, + "grad_norm": 1.615450351266645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400910 + }, + { + "epoch": 1.944389547184498, + "grad_norm": 6.971839994207585e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400920 + }, + { + "epoch": 1.9444380453773342, + "grad_norm": 1.2079706124268341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400930 + }, + { + "epoch": 1.9444865435701701, + "grad_norm": 7.873270035929636e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400940 + }, + { + "epoch": 1.9445350417630063, + "grad_norm": 1.9880646462411278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400950 + }, + { + "epoch": 1.9445835399558424, + "grad_norm": 1.5415789533790303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400960 + }, + { + "epoch": 1.9446320381486784, + "grad_norm": 1.076812239375613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400970 + }, + { + "epoch": 1.9446805363415147, + "grad_norm": 1.1899321528119344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400980 + }, + { + "epoch": 1.9447290345343506, + "grad_norm": 1.1952976386453429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 400990 + }, + { + "epoch": 1.9447775327271868, + "grad_norm": 1.5847017920123108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401000 + }, + { + "epoch": 1.944826030920023, + "grad_norm": 1.1475943750838269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401010 + }, + { + "epoch": 1.9448745291128589, + "grad_norm": 9.534929290566652e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401020 + }, + { + "epoch": 1.944923027305695, + "grad_norm": 9.51372935986683e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401030 + }, + { + "epoch": 1.9449715254985311, + "grad_norm": 1.0063231137280582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401040 + }, + { + "epoch": 1.945020023691367, + "grad_norm": 9.063671591036382e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401050 + }, + { + "epoch": 1.9450685218842034, + "grad_norm": 1.2816739669574417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401060 + }, + { + "epoch": 1.9451170200770393, + "grad_norm": 1.5692943833300887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401070 + }, + { + "epoch": 1.9451655182698755, + "grad_norm": 9.403947842656635e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401080 + }, + { + "epoch": 1.9452140164627116, + "grad_norm": 6.967798338308739e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401090 + }, + { + "epoch": 1.9452625146555476, + "grad_norm": 1.0240396974836585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401100 + }, + { + "epoch": 1.9453110128483837, + "grad_norm": 1.4348791488316692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401110 + }, + { + "epoch": 1.9453595110412198, + "grad_norm": 2.52673206802001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401120 + }, + { + "epoch": 1.9454080092340558, + "grad_norm": 1.2219367739874087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401130 + }, + { + "epoch": 1.9454565074268921, + "grad_norm": 1.4251683388977199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401140 + }, + { + "epoch": 1.945505005619728, + "grad_norm": 1.6731553031945623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401150 + }, + { + "epoch": 1.9455535038125642, + "grad_norm": 1.1264874366645472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401160 + }, + { + "epoch": 1.9456020020054003, + "grad_norm": 1.1274608802125385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401170 + }, + { + "epoch": 1.9456505001982363, + "grad_norm": 1.3557239775252583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401180 + }, + { + "epoch": 1.9456989983910724, + "grad_norm": 1.1759881957118523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401190 + }, + { + "epoch": 1.9457474965839086, + "grad_norm": 1.1707559366413989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401200 + }, + { + "epoch": 1.9457959947767445, + "grad_norm": 1.671705085470876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401210 + }, + { + "epoch": 1.9458444929695808, + "grad_norm": 8.039768850665041e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401220 + }, + { + "epoch": 1.9458929911624168, + "grad_norm": 1.0327013022504161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401230 + }, + { + "epoch": 1.945941489355253, + "grad_norm": 1.2427267215286975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401240 + }, + { + "epoch": 1.945989987548089, + "grad_norm": 1.243462399713735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401250 + }, + { + "epoch": 1.946038485740925, + "grad_norm": 1.3691069611354578e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401260 + }, + { + "epoch": 1.9460869839337611, + "grad_norm": 1.6294494642465907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401270 + }, + { + "epoch": 1.9461354821265973, + "grad_norm": 1.946391314788798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401280 + }, + { + "epoch": 1.9461839803194332, + "grad_norm": 7.852216654669064e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401290 + }, + { + "epoch": 1.9462324785122695, + "grad_norm": 1.5938050879071852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401300 + }, + { + "epoch": 1.9462809767051055, + "grad_norm": 1.3677712296100708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401310 + }, + { + "epoch": 1.9463294748979416, + "grad_norm": 8.253861594198497e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401320 + }, + { + "epoch": 1.9463779730907778, + "grad_norm": 1.0326356658652003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401330 + }, + { + "epoch": 1.9464264712836137, + "grad_norm": 1.7393038120872006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401340 + }, + { + "epoch": 1.94647496947645, + "grad_norm": 8.9262490732267e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401350 + }, + { + "epoch": 1.946523467669286, + "grad_norm": 1.2370793278648762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401360 + }, + { + "epoch": 1.9465719658621221, + "grad_norm": 1.1008192579708975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401370 + }, + { + "epoch": 1.9466204640549583, + "grad_norm": 2.5314252027897055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401380 + }, + { + "epoch": 1.9466689622477942, + "grad_norm": 1.0451393528398967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401390 + }, + { + "epoch": 1.9467174604406303, + "grad_norm": 1.610060351708853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401400 + }, + { + "epoch": 1.9467659586334665, + "grad_norm": 8.984546440160557e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401410 + }, + { + "epoch": 1.9468144568263024, + "grad_norm": 1.705614671720923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401420 + }, + { + "epoch": 1.9468629550191388, + "grad_norm": 1.8300879034427453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401430 + }, + { + "epoch": 1.9469114532119747, + "grad_norm": 1.2748466282630488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401440 + }, + { + "epoch": 1.9469599514048108, + "grad_norm": 9.119859534223451e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401450 + }, + { + "epoch": 1.947008449597647, + "grad_norm": 1.4441432938383514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401460 + }, + { + "epoch": 1.9470569477904829, + "grad_norm": 1.1777182784555862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401470 + }, + { + "epoch": 1.947105445983319, + "grad_norm": 1.2866099297070832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401480 + }, + { + "epoch": 1.9471539441761552, + "grad_norm": 1.3279180421932324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401490 + }, + { + "epoch": 1.947202442368991, + "grad_norm": 1.1501019692161663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401500 + }, + { + "epoch": 1.9472509405618275, + "grad_norm": 1.6497587296271377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401510 + }, + { + "epoch": 1.9472994387546634, + "grad_norm": 1.6576741757035052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401520 + }, + { + "epoch": 1.9473479369474995, + "grad_norm": 9.911820697539042e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401530 + }, + { + "epoch": 1.9473964351403357, + "grad_norm": 1.1018673085061437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401540 + }, + { + "epoch": 1.9474449333331716, + "grad_norm": 1.0827783114564227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401550 + }, + { + "epoch": 1.9474934315260077, + "grad_norm": 1.0765976554694134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401560 + }, + { + "epoch": 1.9475419297188439, + "grad_norm": 9.360933361790558e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401570 + }, + { + "epoch": 1.9475904279116798, + "grad_norm": 8.73491146080596e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401580 + }, + { + "epoch": 1.9476389261045162, + "grad_norm": 1.3592289960229209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401590 + }, + { + "epoch": 1.947687424297352, + "grad_norm": 1.9562200748168834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401600 + }, + { + "epoch": 1.9477359224901882, + "grad_norm": 8.906910764494569e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401610 + }, + { + "epoch": 1.9477844206830244, + "grad_norm": 1.3780891094938852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401620 + }, + { + "epoch": 1.9478329188758603, + "grad_norm": 9.273339429682892e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401630 + }, + { + "epoch": 1.9478814170686964, + "grad_norm": 1.4468284348367888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401640 + }, + { + "epoch": 1.9479299152615326, + "grad_norm": 9.748088558581003e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401650 + }, + { + "epoch": 1.9479784134543685, + "grad_norm": 1.1279711387146563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401660 + }, + { + "epoch": 1.9480269116472049, + "grad_norm": 7.548063507556435e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401670 + }, + { + "epoch": 1.9480754098400408, + "grad_norm": 9.291098557184796e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401680 + }, + { + "epoch": 1.948123908032877, + "grad_norm": 1.3788137742665185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401690 + }, + { + "epoch": 1.948172406225713, + "grad_norm": 1.5586628876462783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401700 + }, + { + "epoch": 1.948220904418549, + "grad_norm": 1.2633181611931832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401710 + }, + { + "epoch": 1.9482694026113851, + "grad_norm": 1.5272494380269563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401720 + }, + { + "epoch": 1.9483179008042213, + "grad_norm": 1.1711350111909269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401730 + }, + { + "epoch": 1.9483663989970572, + "grad_norm": 1.3677206034401479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401740 + }, + { + "epoch": 1.9484148971898936, + "grad_norm": 8.271460849584855e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401750 + }, + { + "epoch": 1.9484633953827295, + "grad_norm": 1.551010342382142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401760 + }, + { + "epoch": 1.9485118935755656, + "grad_norm": 1.3032568801918387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401770 + }, + { + "epoch": 1.9485603917684018, + "grad_norm": 1.6609401853884265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401780 + }, + { + "epoch": 1.9486088899612377, + "grad_norm": 1.7484888203966875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401790 + }, + { + "epoch": 1.9486573881540739, + "grad_norm": 9.200518569230098e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401800 + }, + { + "epoch": 1.94870588634691, + "grad_norm": 1.3571074930496252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401810 + }, + { + "epoch": 1.9487543845397461, + "grad_norm": 1.247265402071207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401820 + }, + { + "epoch": 1.9488028827325823, + "grad_norm": 1.3034798129751834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401830 + }, + { + "epoch": 1.9488513809254182, + "grad_norm": 9.157792746350424e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401840 + }, + { + "epoch": 1.9488998791182544, + "grad_norm": 9.900516850791519e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401850 + }, + { + "epoch": 1.9489483773110905, + "grad_norm": 1.4746457388525869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401860 + }, + { + "epoch": 1.9489968755039264, + "grad_norm": 9.396011080298194e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401870 + }, + { + "epoch": 1.9490453736967628, + "grad_norm": 6.542563379952071e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401880 + }, + { + "epoch": 1.9490938718895987, + "grad_norm": 1.2460318110640856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401890 + }, + { + "epoch": 1.9491423700824348, + "grad_norm": 1.2572436425273281e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401900 + }, + { + "epoch": 1.949190868275271, + "grad_norm": 1.1357064622075086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401910 + }, + { + "epoch": 1.949239366468107, + "grad_norm": 1.263309634680354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401920 + }, + { + "epoch": 1.949287864660943, + "grad_norm": 5.534992908451386e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401930 + }, + { + "epoch": 1.9493363628537792, + "grad_norm": 8.164319886816429e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401940 + }, + { + "epoch": 1.9493848610466151, + "grad_norm": 7.483958341936159e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401950 + }, + { + "epoch": 1.9494333592394515, + "grad_norm": 1.246643499541733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401960 + }, + { + "epoch": 1.9494818574322874, + "grad_norm": 1.9530292050262688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401970 + }, + { + "epoch": 1.9495303556251236, + "grad_norm": 1.0681365125719822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401980 + }, + { + "epoch": 1.9495788538179597, + "grad_norm": 1.1853832582175983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 401990 + }, + { + "epoch": 1.9496273520107956, + "grad_norm": 8.99694541089957e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402000 + }, + { + "epoch": 1.9496758502036318, + "grad_norm": 2.0962115598877062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402010 + }, + { + "epoch": 1.949724348396468, + "grad_norm": 6.847804101539623e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402020 + }, + { + "epoch": 1.9497728465893038, + "grad_norm": 1.0381967285866267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402030 + }, + { + "epoch": 1.9498213447821402, + "grad_norm": 7.560586823274207e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402040 + }, + { + "epoch": 1.9498698429749761, + "grad_norm": 1.3240342155995677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402050 + }, + { + "epoch": 1.9499183411678123, + "grad_norm": 1.0063586408648462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402060 + }, + { + "epoch": 1.9499668393606484, + "grad_norm": 6.598971591387226e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402070 + }, + { + "epoch": 1.9500153375534843, + "grad_norm": 2.157620215825773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402080 + }, + { + "epoch": 1.9500638357463205, + "grad_norm": 1.3965882672550833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402090 + }, + { + "epoch": 1.9501123339391566, + "grad_norm": 1.1165319335759705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402100 + }, + { + "epoch": 1.9501608321319925, + "grad_norm": 1.450325815000042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402110 + }, + { + "epoch": 1.950209330324829, + "grad_norm": 1.1036854097312698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402120 + }, + { + "epoch": 1.9502578285176648, + "grad_norm": 1.2097502555263873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402130 + }, + { + "epoch": 1.950306326710501, + "grad_norm": 9.267941081247955e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402140 + }, + { + "epoch": 1.9503548249033371, + "grad_norm": 1.1676370093027799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402150 + }, + { + "epoch": 1.950403323096173, + "grad_norm": 6.164722510249021e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402160 + }, + { + "epoch": 1.9504518212890092, + "grad_norm": 8.43520631121919e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402170 + }, + { + "epoch": 1.9505003194818453, + "grad_norm": 1.3297292156266849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402180 + }, + { + "epoch": 1.9505488176746812, + "grad_norm": 1.4823325678037236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402190 + }, + { + "epoch": 1.9505973158675176, + "grad_norm": 1.889076628458497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402200 + }, + { + "epoch": 1.9506458140603535, + "grad_norm": 1.1312200953739193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402210 + }, + { + "epoch": 1.9506943122531897, + "grad_norm": 1.7341301727924474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402220 + }, + { + "epoch": 1.9507428104460258, + "grad_norm": 1.2685240413645715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402230 + }, + { + "epoch": 1.9507913086388617, + "grad_norm": 1.3917309971134273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402240 + }, + { + "epoch": 1.9508398068316979, + "grad_norm": 1.2537468840889687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402250 + }, + { + "epoch": 1.950888305024534, + "grad_norm": 7.467658491577822e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402260 + }, + { + "epoch": 1.95093680321737, + "grad_norm": 1.4435896922293523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402270 + }, + { + "epoch": 1.9509853014102063, + "grad_norm": 1.3673910004285972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402280 + }, + { + "epoch": 1.9510337996030422, + "grad_norm": 1.4110624668717264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402290 + }, + { + "epoch": 1.9510822977958784, + "grad_norm": 1.5501559147423905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402300 + }, + { + "epoch": 1.9511307959887145, + "grad_norm": 1.331748666899557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402310 + }, + { + "epoch": 1.9511792941815504, + "grad_norm": 1.317958542301767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402320 + }, + { + "epoch": 1.9512277923743868, + "grad_norm": 1.2586246711521198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402330 + }, + { + "epoch": 1.9512762905672227, + "grad_norm": 1.027530149855238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402340 + }, + { + "epoch": 1.9513247887600589, + "grad_norm": 2.234260065847593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402350 + }, + { + "epoch": 1.951373286952895, + "grad_norm": 1.2785770664436313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402360 + }, + { + "epoch": 1.951421785145731, + "grad_norm": 1.5031750066896166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402370 + }, + { + "epoch": 1.951470283338567, + "grad_norm": 1.2033661178634247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402380 + }, + { + "epoch": 1.9515187815314032, + "grad_norm": 8.981460908330519e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402390 + }, + { + "epoch": 1.9515672797242392, + "grad_norm": 1.0020492879903031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402400 + }, + { + "epoch": 1.9516157779170755, + "grad_norm": 1.1001639599328428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402410 + }, + { + "epoch": 1.9516642761099114, + "grad_norm": 1.657958570433493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402420 + }, + { + "epoch": 1.9517127743027476, + "grad_norm": 1.0196514743654461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402430 + }, + { + "epoch": 1.9517612724955837, + "grad_norm": 1.2960435391562442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402440 + }, + { + "epoch": 1.9518097706884197, + "grad_norm": 1.3348539162905126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402450 + }, + { + "epoch": 1.9518582688812558, + "grad_norm": 1.0205750022862503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402460 + }, + { + "epoch": 1.951906767074092, + "grad_norm": 8.381261018541863e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402470 + }, + { + "epoch": 1.9519552652669279, + "grad_norm": 1.5882779536013913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402480 + }, + { + "epoch": 1.9520037634597642, + "grad_norm": 1.520950831945811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402490 + }, + { + "epoch": 1.9520522616526002, + "grad_norm": 1.1983440906249143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402500 + }, + { + "epoch": 1.9521007598454363, + "grad_norm": 1.912439984153025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402510 + }, + { + "epoch": 1.9521492580382724, + "grad_norm": 1.7506570415548595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402520 + }, + { + "epoch": 1.9521977562311084, + "grad_norm": 8.773797688377272e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402530 + }, + { + "epoch": 1.9522462544239445, + "grad_norm": 1.2406106364437619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402540 + }, + { + "epoch": 1.9522947526167806, + "grad_norm": 1.2161230245055776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402550 + }, + { + "epoch": 1.9523432508096166, + "grad_norm": 1.599344834346539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402560 + }, + { + "epoch": 1.952391749002453, + "grad_norm": 1.1474290850799207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402570 + }, + { + "epoch": 1.9524402471952889, + "grad_norm": 9.44062694685499e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402580 + }, + { + "epoch": 1.952488745388125, + "grad_norm": 9.61196899851302e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402590 + }, + { + "epoch": 1.9525372435809611, + "grad_norm": 1.4741458720379796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402600 + }, + { + "epoch": 1.952585741773797, + "grad_norm": 1.1424588386432788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402610 + }, + { + "epoch": 1.9526342399666332, + "grad_norm": 1.1454544868172434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402620 + }, + { + "epoch": 1.9526827381594694, + "grad_norm": 7.525225775850686e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402630 + }, + { + "epoch": 1.9527312363523053, + "grad_norm": 1.2208872135488491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402640 + }, + { + "epoch": 1.9527797345451416, + "grad_norm": 1.3468636872460138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402650 + }, + { + "epoch": 1.9528282327379776, + "grad_norm": 1.185312736851074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402660 + }, + { + "epoch": 1.9528767309308137, + "grad_norm": 1.1492249818445543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402670 + }, + { + "epoch": 1.9529252291236499, + "grad_norm": 8.712380150655008e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402680 + }, + { + "epoch": 1.9529737273164858, + "grad_norm": 1.2075737743089121e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402690 + }, + { + "epoch": 1.953022225509322, + "grad_norm": 1.197960131094078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402700 + }, + { + "epoch": 1.953070723702158, + "grad_norm": 1.6413267189818725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402710 + }, + { + "epoch": 1.953119221894994, + "grad_norm": 9.758474917020976e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402720 + }, + { + "epoch": 1.9531677200878304, + "grad_norm": 1.69603939781382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402730 + }, + { + "epoch": 1.9532162182806663, + "grad_norm": 8.813769269977456e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402740 + }, + { + "epoch": 1.9532647164735024, + "grad_norm": 1.1058624238557968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402750 + }, + { + "epoch": 1.9533132146663386, + "grad_norm": 1.2485429579101037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402760 + }, + { + "epoch": 1.9533617128591745, + "grad_norm": 1.0475178058300116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402770 + }, + { + "epoch": 1.9534102110520106, + "grad_norm": 1.0748944845317965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402780 + }, + { + "epoch": 1.9534587092448468, + "grad_norm": 1.1025191426483616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402790 + }, + { + "epoch": 1.9535072074376827, + "grad_norm": 1.093410162411601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402800 + }, + { + "epoch": 1.953555705630519, + "grad_norm": 1.0860836674453367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402810 + }, + { + "epoch": 1.953604203823355, + "grad_norm": 1.1341939831766013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402820 + }, + { + "epoch": 1.9536527020161911, + "grad_norm": 1.1749253125969972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402830 + }, + { + "epoch": 1.9537012002090273, + "grad_norm": 1.0398112593179576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402840 + }, + { + "epoch": 1.9537496984018632, + "grad_norm": 1.5288151189452037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402850 + }, + { + "epoch": 1.9537981965946996, + "grad_norm": 1.4219992294783879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402860 + }, + { + "epoch": 1.9538466947875355, + "grad_norm": 1.5511620432562268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402870 + }, + { + "epoch": 1.9538951929803716, + "grad_norm": 1.4008746163085561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402880 + }, + { + "epoch": 1.9539436911732078, + "grad_norm": 7.203345475659262e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402890 + }, + { + "epoch": 1.9539921893660437, + "grad_norm": 6.951541120514548e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402900 + }, + { + "epoch": 1.9540406875588798, + "grad_norm": 1.3445821345214881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402910 + }, + { + "epoch": 1.954089185751716, + "grad_norm": 7.527941825458129e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402920 + }, + { + "epoch": 1.954137683944552, + "grad_norm": 1.5334693515001163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402930 + }, + { + "epoch": 1.9541861821373883, + "grad_norm": 1.3310936353150282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402940 + }, + { + "epoch": 1.9542346803302242, + "grad_norm": 1.517384262683663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402950 + }, + { + "epoch": 1.9542831785230603, + "grad_norm": 9.19553855283084e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402960 + }, + { + "epoch": 1.9543316767158965, + "grad_norm": 1.172867580834236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402970 + }, + { + "epoch": 1.9543801749087324, + "grad_norm": 1.5098875039143422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402980 + }, + { + "epoch": 1.9544286731015685, + "grad_norm": 1.1184472903380538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 402990 + }, + { + "epoch": 1.9544771712944047, + "grad_norm": 1.2023535056471246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403000 + }, + { + "epoch": 1.9545256694872406, + "grad_norm": 1.087682033329429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403010 + }, + { + "epoch": 1.954574167680077, + "grad_norm": 1.3052711800298766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403020 + }, + { + "epoch": 1.954622665872913, + "grad_norm": 9.903943443134722e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403030 + }, + { + "epoch": 1.954671164065749, + "grad_norm": 1.7693585263600653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403040 + }, + { + "epoch": 1.9547196622585852, + "grad_norm": 1.3314637392625173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403050 + }, + { + "epoch": 1.954768160451421, + "grad_norm": 1.4222997002377724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403060 + }, + { + "epoch": 1.9548166586442572, + "grad_norm": 5.459349861069995e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403070 + }, + { + "epoch": 1.9548651568370934, + "grad_norm": 6.183053180564002e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403080 + }, + { + "epoch": 1.9549136550299293, + "grad_norm": 9.853199145481994e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403090 + }, + { + "epoch": 1.9549621532227657, + "grad_norm": 1.63590243573708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403100 + }, + { + "epoch": 1.9550106514156016, + "grad_norm": 8.374229309993098e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403110 + }, + { + "epoch": 1.9550591496084377, + "grad_norm": 1.1878547034882558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403120 + }, + { + "epoch": 1.9551076478012739, + "grad_norm": 1.1718209513844613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403130 + }, + { + "epoch": 1.9551561459941098, + "grad_norm": 1.4566774453328435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403140 + }, + { + "epoch": 1.955204644186946, + "grad_norm": 6.453819256790894e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403150 + }, + { + "epoch": 1.955253142379782, + "grad_norm": 8.535663731379373e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403160 + }, + { + "epoch": 1.955301640572618, + "grad_norm": 9.624426589027735e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403170 + }, + { + "epoch": 1.9553501387654544, + "grad_norm": 1.736796662044071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403180 + }, + { + "epoch": 1.9553986369582903, + "grad_norm": 1.0787861270955545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403190 + }, + { + "epoch": 1.9554471351511264, + "grad_norm": 1.0288061957908212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403200 + }, + { + "epoch": 1.9554956333439626, + "grad_norm": 1.2592957787660453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403210 + }, + { + "epoch": 1.9555441315367985, + "grad_norm": 1.1004344990794834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403220 + }, + { + "epoch": 1.9555926297296347, + "grad_norm": 9.052045335522507e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403230 + }, + { + "epoch": 1.9556411279224708, + "grad_norm": 1.675661565059272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403240 + }, + { + "epoch": 1.9556896261153067, + "grad_norm": 1.2777553237697248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403250 + }, + { + "epoch": 1.955738124308143, + "grad_norm": 1.124906479077481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403260 + }, + { + "epoch": 1.955786622500979, + "grad_norm": 8.649136518101841e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403270 + }, + { + "epoch": 1.9558351206938152, + "grad_norm": 7.671038915191275e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403280 + }, + { + "epoch": 1.9558836188866513, + "grad_norm": 1.0372377623468765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403290 + }, + { + "epoch": 1.9559321170794872, + "grad_norm": 1.2015099137840934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403300 + }, + { + "epoch": 1.9559806152723234, + "grad_norm": 1.627593881892153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403310 + }, + { + "epoch": 1.9560291134651595, + "grad_norm": 7.750501573866586e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403320 + }, + { + "epoch": 1.9560776116579954, + "grad_norm": 1.6631314991855106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403330 + }, + { + "epoch": 1.9561261098508318, + "grad_norm": 1.0540609274301005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403340 + }, + { + "epoch": 1.9561746080436677, + "grad_norm": 1.2961491435703465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403350 + }, + { + "epoch": 1.9562231062365039, + "grad_norm": 9.852692883782765e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403360 + }, + { + "epoch": 1.95627160442934, + "grad_norm": 1.3020848399492024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403370 + }, + { + "epoch": 1.956320102622176, + "grad_norm": 1.556152362525154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403380 + }, + { + "epoch": 1.9563686008150123, + "grad_norm": 9.076130069729516e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403390 + }, + { + "epoch": 1.9564170990078482, + "grad_norm": 1.1295779422937358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403400 + }, + { + "epoch": 1.9564655972006844, + "grad_norm": 1.5968900868301716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403410 + }, + { + "epoch": 1.9565140953935205, + "grad_norm": 1.3911489737949978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403420 + }, + { + "epoch": 1.9565625935863564, + "grad_norm": 1.5708971901062796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403430 + }, + { + "epoch": 1.9566110917791926, + "grad_norm": 1.134884719533602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403440 + }, + { + "epoch": 1.9566595899720287, + "grad_norm": 1.1568986657550795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403450 + }, + { + "epoch": 1.9567080881648646, + "grad_norm": 1.5122019192403968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403460 + }, + { + "epoch": 1.956756586357701, + "grad_norm": 8.534513540325861e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403470 + }, + { + "epoch": 1.956805084550537, + "grad_norm": 1.0754488855013733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403480 + }, + { + "epoch": 1.956853582743373, + "grad_norm": 1.1685448164655554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403490 + }, + { + "epoch": 1.9569020809362092, + "grad_norm": 1.0592595245384473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403500 + }, + { + "epoch": 1.9569505791290451, + "grad_norm": 1.0867062805175465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403510 + }, + { + "epoch": 1.9569990773218813, + "grad_norm": 1.1095604435240602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403520 + }, + { + "epoch": 1.9570475755147174, + "grad_norm": 1.081332179353467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403530 + }, + { + "epoch": 1.9570960737075533, + "grad_norm": 1.2511281788363249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403540 + }, + { + "epoch": 1.9571445719003897, + "grad_norm": 1.583166842067385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403550 + }, + { + "epoch": 1.9571930700932256, + "grad_norm": 9.353269270206965e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403560 + }, + { + "epoch": 1.9572415682860618, + "grad_norm": 1.4446280616198237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403570 + }, + { + "epoch": 1.957290066478898, + "grad_norm": 1.3476297411330052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403580 + }, + { + "epoch": 1.9573385646717338, + "grad_norm": 1.0540408545978153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403590 + }, + { + "epoch": 1.95738706286457, + "grad_norm": 1.1966455382150798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403600 + }, + { + "epoch": 1.9574355610574061, + "grad_norm": 1.1183063364228474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403610 + }, + { + "epoch": 1.957484059250242, + "grad_norm": 1.3472883253484724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403620 + }, + { + "epoch": 1.9575325574430784, + "grad_norm": 6.747811642782153e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403630 + }, + { + "epoch": 1.9575810556359143, + "grad_norm": 7.344795882602284e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403640 + }, + { + "epoch": 1.9576295538287505, + "grad_norm": 1.8485362573983366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403650 + }, + { + "epoch": 1.9576780520215866, + "grad_norm": 9.807314071963447e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403660 + }, + { + "epoch": 1.9577265502144225, + "grad_norm": 6.845784827902435e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403670 + }, + { + "epoch": 1.9577750484072587, + "grad_norm": 9.869008721352657e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403680 + }, + { + "epoch": 1.9578235466000948, + "grad_norm": 1.1027388779893954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403690 + }, + { + "epoch": 1.9578720447929308, + "grad_norm": 1.1893821039166141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403700 + }, + { + "epoch": 1.9579205429857671, + "grad_norm": 1.3758834072064019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403710 + }, + { + "epoch": 1.957969041178603, + "grad_norm": 1.9169551279674124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403720 + }, + { + "epoch": 1.9580175393714392, + "grad_norm": 9.124486055611669e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403730 + }, + { + "epoch": 1.9580660375642753, + "grad_norm": 1.3233540485657613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403740 + }, + { + "epoch": 1.9581145357571113, + "grad_norm": 1.0818154372316258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403750 + }, + { + "epoch": 1.9581630339499474, + "grad_norm": 9.037091075470016e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403760 + }, + { + "epoch": 1.9582115321427835, + "grad_norm": 1.1419898804376771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403770 + }, + { + "epoch": 1.9582600303356195, + "grad_norm": 1.5492435778696745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403780 + }, + { + "epoch": 1.9583085285284558, + "grad_norm": 1.522029080547327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403790 + }, + { + "epoch": 1.9583570267212917, + "grad_norm": 1.1388812559687267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403800 + }, + { + "epoch": 1.958405524914128, + "grad_norm": 1.7656402562238327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403810 + }, + { + "epoch": 1.958454023106964, + "grad_norm": 8.103343773768756e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403820 + }, + { + "epoch": 1.9585025212998, + "grad_norm": 1.6046502793187756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403830 + }, + { + "epoch": 1.958551019492636, + "grad_norm": 1.9326268585473372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403840 + }, + { + "epoch": 1.9585995176854722, + "grad_norm": 1.9672105722179367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403850 + }, + { + "epoch": 1.9586480158783082, + "grad_norm": 1.0777589487531714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403860 + }, + { + "epoch": 1.9586965140711445, + "grad_norm": 1.1799705212922618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403870 + }, + { + "epoch": 1.9587450122639805, + "grad_norm": 1.1142158307109185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403880 + }, + { + "epoch": 1.9587935104568166, + "grad_norm": 9.814729473589523e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403890 + }, + { + "epoch": 1.9588420086496527, + "grad_norm": 1.0111431691939288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403900 + }, + { + "epoch": 1.9588905068424887, + "grad_norm": 1.181353059820367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403910 + }, + { + "epoch": 1.958939005035325, + "grad_norm": 9.66806901203654e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403920 + }, + { + "epoch": 1.958987503228161, + "grad_norm": 1.8834441561921267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403930 + }, + { + "epoch": 1.959036001420997, + "grad_norm": 9.732569417053583e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403940 + }, + { + "epoch": 1.9590844996138332, + "grad_norm": 1.2907046098575847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403950 + }, + { + "epoch": 1.9591329978066692, + "grad_norm": 7.997738471487992e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403960 + }, + { + "epoch": 1.9591814959995053, + "grad_norm": 1.0354506585485979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403970 + }, + { + "epoch": 1.9592299941923415, + "grad_norm": 1.686028561209696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403980 + }, + { + "epoch": 1.9592784923851774, + "grad_norm": 1.8774061416593213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 403990 + }, + { + "epoch": 1.9593269905780137, + "grad_norm": 1.3209921156942528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404000 + }, + { + "epoch": 1.9593754887708497, + "grad_norm": 1.1126603638444976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404010 + }, + { + "epoch": 1.9594239869636858, + "grad_norm": 1.9355676172949643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404020 + }, + { + "epoch": 1.959472485156522, + "grad_norm": 8.96907437208938e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404030 + }, + { + "epoch": 1.9595209833493579, + "grad_norm": 1.1864198512512303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404040 + }, + { + "epoch": 1.959569481542194, + "grad_norm": 1.5940891273658053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404050 + }, + { + "epoch": 1.9596179797350302, + "grad_norm": 1.509609326433292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404060 + }, + { + "epoch": 1.959666477927866, + "grad_norm": 1.3838032941748679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404070 + }, + { + "epoch": 1.9597149761207024, + "grad_norm": 1.2607792143626284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404080 + }, + { + "epoch": 1.9597634743135384, + "grad_norm": 8.890147285001149e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404090 + }, + { + "epoch": 1.9598119725063745, + "grad_norm": 1.2532002102716433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404100 + }, + { + "epoch": 1.9598604706992107, + "grad_norm": 1.7429000465085664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404110 + }, + { + "epoch": 1.9599089688920466, + "grad_norm": 1.1125156795799285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404120 + }, + { + "epoch": 1.9599574670848827, + "grad_norm": 1.1651361653264303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404130 + }, + { + "epoch": 1.9600059652777189, + "grad_norm": 1.2273373428683954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404140 + }, + { + "epoch": 1.9600544634705548, + "grad_norm": 1.0380658110875629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404150 + }, + { + "epoch": 1.9601029616633912, + "grad_norm": 1.6802550462102772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404160 + }, + { + "epoch": 1.960151459856227, + "grad_norm": 1.2712785490975875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404170 + }, + { + "epoch": 1.9601999580490632, + "grad_norm": 6.2781269072331725e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404180 + }, + { + "epoch": 1.9602484562418994, + "grad_norm": 7.784277222810942e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404190 + }, + { + "epoch": 1.9602969544347353, + "grad_norm": 1.5458967439485605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404200 + }, + { + "epoch": 1.9603454526275714, + "grad_norm": 1.0223375035423032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404210 + }, + { + "epoch": 1.9603939508204076, + "grad_norm": 9.138511281037154e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404220 + }, + { + "epoch": 1.9604424490132435, + "grad_norm": 7.3309984749414525e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404230 + }, + { + "epoch": 1.9604909472060799, + "grad_norm": 1.201114407933801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404240 + }, + { + "epoch": 1.9605394453989158, + "grad_norm": 1.262661086798289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404250 + }, + { + "epoch": 1.960587943591752, + "grad_norm": 1.5013084109227748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404260 + }, + { + "epoch": 1.960636441784588, + "grad_norm": 1.733073595744372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404270 + }, + { + "epoch": 1.960684939977424, + "grad_norm": 1.2540271043803841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404280 + }, + { + "epoch": 1.9607334381702601, + "grad_norm": 1.1812358202689666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404290 + }, + { + "epoch": 1.9607819363630963, + "grad_norm": 7.24090654102838e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404300 + }, + { + "epoch": 1.9608304345559322, + "grad_norm": 1.4276673176993881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404310 + }, + { + "epoch": 1.9608789327487686, + "grad_norm": 1.095619772684131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404320 + }, + { + "epoch": 1.9609274309416045, + "grad_norm": 1.4258104030773211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404330 + }, + { + "epoch": 1.9609759291344406, + "grad_norm": 8.578869170605685e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404340 + }, + { + "epoch": 1.9610244273272768, + "grad_norm": 8.860001621258107e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404350 + }, + { + "epoch": 1.9610729255201127, + "grad_norm": 7.455179584781035e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404360 + }, + { + "epoch": 1.9611214237129488, + "grad_norm": 1.0857868382174729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404370 + }, + { + "epoch": 1.961169921905785, + "grad_norm": 9.091050579002058e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404380 + }, + { + "epoch": 1.9612184200986211, + "grad_norm": 8.933797701615731e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404390 + }, + { + "epoch": 1.9612669182914573, + "grad_norm": 1.8906204601876198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404400 + }, + { + "epoch": 1.9613154164842932, + "grad_norm": 1.0927536209237587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404410 + }, + { + "epoch": 1.9613639146771293, + "grad_norm": 1.2756732559182637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404420 + }, + { + "epoch": 1.9614124128699655, + "grad_norm": 1.3261847620071876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404430 + }, + { + "epoch": 1.9614609110628014, + "grad_norm": 1.0873293376789661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404440 + }, + { + "epoch": 1.9615094092556378, + "grad_norm": 9.844227655264604e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404450 + }, + { + "epoch": 1.9615579074484737, + "grad_norm": 1.3845626867237115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404460 + }, + { + "epoch": 1.9616064056413098, + "grad_norm": 1.0072204403854812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404470 + }, + { + "epoch": 1.961654903834146, + "grad_norm": 9.953901702886014e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404480 + }, + { + "epoch": 1.961703402026982, + "grad_norm": 9.66673496805015e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404490 + }, + { + "epoch": 1.961751900219818, + "grad_norm": 1.0366244751480735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404500 + }, + { + "epoch": 1.9618003984126542, + "grad_norm": 1.0041921960635136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404510 + }, + { + "epoch": 1.9618488966054901, + "grad_norm": 1.3628413952915253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404520 + }, + { + "epoch": 1.9618973947983265, + "grad_norm": 1.851565301080882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404530 + }, + { + "epoch": 1.9619458929911624, + "grad_norm": 8.65298765972966e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404540 + }, + { + "epoch": 1.9619943911839985, + "grad_norm": 1.4391671854241395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404550 + }, + { + "epoch": 1.9620428893768347, + "grad_norm": 1.0026267815987921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404560 + }, + { + "epoch": 1.9620913875696706, + "grad_norm": 6.67454580494109e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404570 + }, + { + "epoch": 1.9621398857625068, + "grad_norm": 1.317206255180281e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404580 + }, + { + "epoch": 1.962188383955343, + "grad_norm": 9.35837718429866e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404590 + }, + { + "epoch": 1.9622368821481788, + "grad_norm": 1.4622060895419509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404600 + }, + { + "epoch": 1.9622853803410152, + "grad_norm": 1.7240326499745606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404610 + }, + { + "epoch": 1.962333878533851, + "grad_norm": 9.962944247376981e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404620 + }, + { + "epoch": 1.9623823767266872, + "grad_norm": 8.067214452012195e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404630 + }, + { + "epoch": 1.9624308749195234, + "grad_norm": 1.1371199981624613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404640 + }, + { + "epoch": 1.9624793731123593, + "grad_norm": 1.4560127326035399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404650 + }, + { + "epoch": 1.9625278713051955, + "grad_norm": 1.3592201142387239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404660 + }, + { + "epoch": 1.9625763694980316, + "grad_norm": 9.200873840597978e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404670 + }, + { + "epoch": 1.9626248676908675, + "grad_norm": 1.0269959105357884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404680 + }, + { + "epoch": 1.962673365883704, + "grad_norm": 1.5899329852686606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404690 + }, + { + "epoch": 1.9627218640765398, + "grad_norm": 1.026724660846412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404700 + }, + { + "epoch": 1.962770362269376, + "grad_norm": 1.1513728637169152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404710 + }, + { + "epoch": 1.962818860462212, + "grad_norm": 1.6770055566439623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404720 + }, + { + "epoch": 1.962867358655048, + "grad_norm": 1.3408338439546696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404730 + }, + { + "epoch": 1.9629158568478842, + "grad_norm": 1.1470179472894415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404740 + }, + { + "epoch": 1.9629643550407203, + "grad_norm": 1.3428062217712977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404750 + }, + { + "epoch": 1.9630128532335562, + "grad_norm": 1.790264292367283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404760 + }, + { + "epoch": 1.9630613514263926, + "grad_norm": 9.376707410524432e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404770 + }, + { + "epoch": 1.9631098496192285, + "grad_norm": 1.407468275260726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404780 + }, + { + "epoch": 1.9631583478120647, + "grad_norm": 1.3937135889818819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404790 + }, + { + "epoch": 1.9632068460049008, + "grad_norm": 1.010997241479572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404800 + }, + { + "epoch": 1.9632553441977367, + "grad_norm": 1.5527412244864536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404810 + }, + { + "epoch": 1.9633038423905729, + "grad_norm": 1.6459393847867432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404820 + }, + { + "epoch": 1.963352340583409, + "grad_norm": 1.3386728170416973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404830 + }, + { + "epoch": 1.963400838776245, + "grad_norm": 1.0217656054578583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404840 + }, + { + "epoch": 1.9634493369690813, + "grad_norm": 1.4500428413555255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404850 + }, + { + "epoch": 1.9634978351619172, + "grad_norm": 1.534039029138512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404860 + }, + { + "epoch": 1.9635463333547534, + "grad_norm": 9.820207758082233e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404870 + }, + { + "epoch": 1.9635948315475895, + "grad_norm": 1.252952142039021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404880 + }, + { + "epoch": 1.9636433297404254, + "grad_norm": 1.474233446430162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404890 + }, + { + "epoch": 1.9636918279332618, + "grad_norm": 8.758499703276357e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404900 + }, + { + "epoch": 1.9637403261260977, + "grad_norm": 9.642366016748838e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404910 + }, + { + "epoch": 1.9637888243189339, + "grad_norm": 8.845789878364485e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404920 + }, + { + "epoch": 1.96383732251177, + "grad_norm": 9.632445951979207e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404930 + }, + { + "epoch": 1.963885820704606, + "grad_norm": 1.097541968420046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404940 + }, + { + "epoch": 1.963934318897442, + "grad_norm": 7.156070847003093e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404950 + }, + { + "epoch": 1.9639828170902782, + "grad_norm": 1.4208479726107726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404960 + }, + { + "epoch": 1.9640313152831141, + "grad_norm": 1.2945961636035008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404970 + }, + { + "epoch": 1.9640798134759505, + "grad_norm": 1.1325611559698245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404980 + }, + { + "epoch": 1.9641283116687864, + "grad_norm": 1.2268641214063791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 404990 + }, + { + "epoch": 1.9641768098616226, + "grad_norm": 1.859031506512565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405000 + }, + { + "epoch": 1.9642253080544587, + "grad_norm": 1.248009162679864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405010 + }, + { + "epoch": 1.9642738062472946, + "grad_norm": 8.35979996338665e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405020 + }, + { + "epoch": 1.9643223044401308, + "grad_norm": 1.883620548426279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405030 + }, + { + "epoch": 1.964370802632967, + "grad_norm": 1.4116407598407932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405040 + }, + { + "epoch": 1.9644193008258028, + "grad_norm": 1.8083557762338387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405050 + }, + { + "epoch": 1.9644677990186392, + "grad_norm": 1.591214093821236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405060 + }, + { + "epoch": 1.9645162972114751, + "grad_norm": 1.1454760695528421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405070 + }, + { + "epoch": 1.9645647954043113, + "grad_norm": 2.005168120433609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405080 + }, + { + "epoch": 1.9646132935971474, + "grad_norm": 1.2043495978275587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405090 + }, + { + "epoch": 1.9646617917899833, + "grad_norm": 9.934458589100359e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405100 + }, + { + "epoch": 1.9647102899828195, + "grad_norm": 6.965527177271724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405110 + }, + { + "epoch": 1.9647587881756556, + "grad_norm": 1.6244969813783428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405120 + }, + { + "epoch": 1.9648072863684916, + "grad_norm": 1.1971063251792202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405130 + }, + { + "epoch": 1.964855784561328, + "grad_norm": 8.823721309170196e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405140 + }, + { + "epoch": 1.9649042827541638, + "grad_norm": 1.689961415252128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405150 + }, + { + "epoch": 1.964952780947, + "grad_norm": 1.626776402474661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405160 + }, + { + "epoch": 1.9650012791398361, + "grad_norm": 7.80812747791515e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405170 + }, + { + "epoch": 1.965049777332672, + "grad_norm": 1.5296739874770537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405180 + }, + { + "epoch": 1.9650982755255082, + "grad_norm": 1.0603111277873722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405190 + }, + { + "epoch": 1.9651467737183443, + "grad_norm": 1.443717767557473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405200 + }, + { + "epoch": 1.9651952719111803, + "grad_norm": 1.2858428100059882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405210 + }, + { + "epoch": 1.9652437701040166, + "grad_norm": 7.807662960601647e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405220 + }, + { + "epoch": 1.9652922682968526, + "grad_norm": 2.0527865629560438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405230 + }, + { + "epoch": 1.9653407664896887, + "grad_norm": 1.93405842452421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405240 + }, + { + "epoch": 1.9653892646825248, + "grad_norm": 1.4719846674893233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405250 + }, + { + "epoch": 1.9654377628753608, + "grad_norm": 1.189898046760618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405260 + }, + { + "epoch": 1.965486261068197, + "grad_norm": 1.0471302047676545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405270 + }, + { + "epoch": 1.965534759261033, + "grad_norm": 1.0189770804913678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405280 + }, + { + "epoch": 1.965583257453869, + "grad_norm": 1.2161009088629271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405290 + }, + { + "epoch": 1.9656317556467053, + "grad_norm": 1.210556543895791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405300 + }, + { + "epoch": 1.9656802538395413, + "grad_norm": 1.3568096868254997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405310 + }, + { + "epoch": 1.9657287520323774, + "grad_norm": 8.47880254895017e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405320 + }, + { + "epoch": 1.9657772502252135, + "grad_norm": 1.3997893510975246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405330 + }, + { + "epoch": 1.9658257484180495, + "grad_norm": 1.6651556578040072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405340 + }, + { + "epoch": 1.9658742466108856, + "grad_norm": 9.308721793388486e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405350 + }, + { + "epoch": 1.9659227448037218, + "grad_norm": 1.1459518667322754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405360 + }, + { + "epoch": 1.9659712429965577, + "grad_norm": 7.316293793024897e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405370 + }, + { + "epoch": 1.966019741189394, + "grad_norm": 9.936655942510697e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405380 + }, + { + "epoch": 1.96606823938223, + "grad_norm": 1.1943620314980308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405390 + }, + { + "epoch": 1.966116737575066, + "grad_norm": 1.296929763583421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405400 + }, + { + "epoch": 1.9661652357679023, + "grad_norm": 1.8129583168047247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405410 + }, + { + "epoch": 1.9662137339607382, + "grad_norm": 1.4323079611244793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405420 + }, + { + "epoch": 1.9662622321535745, + "grad_norm": 9.962480618241898e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405430 + }, + { + "epoch": 1.9663107303464105, + "grad_norm": 8.892973468732635e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405440 + }, + { + "epoch": 1.9663592285392466, + "grad_norm": 1.855427278485422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405450 + }, + { + "epoch": 1.9664077267320828, + "grad_norm": 1.886446732157765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405460 + }, + { + "epoch": 1.9664562249249187, + "grad_norm": 8.77267947174687e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405470 + }, + { + "epoch": 1.9665047231177548, + "grad_norm": 1.1565825630555082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405480 + }, + { + "epoch": 1.966553221310591, + "grad_norm": 1.1645552966399464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405490 + }, + { + "epoch": 1.9666017195034269, + "grad_norm": 1.325650433869896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405500 + }, + { + "epoch": 1.9666502176962632, + "grad_norm": 1.0013524232022064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405510 + }, + { + "epoch": 1.9666987158890992, + "grad_norm": 1.1409126088324228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405520 + }, + { + "epoch": 1.9667472140819353, + "grad_norm": 8.01596122812498e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405530 + }, + { + "epoch": 1.9667957122747715, + "grad_norm": 5.907921263315075e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405540 + }, + { + "epoch": 1.9668442104676074, + "grad_norm": 9.235958664532973e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405550 + }, + { + "epoch": 1.9668927086604435, + "grad_norm": 1.5874899617074334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405560 + }, + { + "epoch": 1.9669412068532797, + "grad_norm": 1.534264271185748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405570 + }, + { + "epoch": 1.9669897050461156, + "grad_norm": 1.4624713884359153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405580 + }, + { + "epoch": 1.967038203238952, + "grad_norm": 1.687811668205086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405590 + }, + { + "epoch": 1.9670867014317879, + "grad_norm": 9.933659228522629e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405600 + }, + { + "epoch": 1.967135199624624, + "grad_norm": 9.057501415554725e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405610 + }, + { + "epoch": 1.9671836978174602, + "grad_norm": 1.6442033512475973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405620 + }, + { + "epoch": 1.967232196010296, + "grad_norm": 9.013515267497496e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405630 + }, + { + "epoch": 1.9672806942031322, + "grad_norm": 1.25295525066349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405640 + }, + { + "epoch": 1.9673291923959684, + "grad_norm": 7.539976643045065e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405650 + }, + { + "epoch": 1.9673776905888043, + "grad_norm": 7.530448264958522e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405660 + }, + { + "epoch": 1.9674261887816407, + "grad_norm": 1.1616334560926589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405670 + }, + { + "epoch": 1.9674746869744766, + "grad_norm": 9.50256229259594e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405680 + }, + { + "epoch": 1.9675231851673127, + "grad_norm": 1.0993647769907966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405690 + }, + { + "epoch": 1.9675716833601489, + "grad_norm": 1.8291544279236405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405700 + }, + { + "epoch": 1.9676201815529848, + "grad_norm": 1.2189129705575397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405710 + }, + { + "epoch": 1.967668679745821, + "grad_norm": 1.3690684141920428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405720 + }, + { + "epoch": 1.967717177938657, + "grad_norm": 1.120126036369129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405730 + }, + { + "epoch": 1.967765676131493, + "grad_norm": 1.1884032424802626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405740 + }, + { + "epoch": 1.9678141743243294, + "grad_norm": 9.437495229747128e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405750 + }, + { + "epoch": 1.9678626725171653, + "grad_norm": 1.353995138231312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405760 + }, + { + "epoch": 1.9679111707100014, + "grad_norm": 1.18565655071734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405770 + }, + { + "epoch": 1.9679596689028376, + "grad_norm": 1.5583983881128916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405780 + }, + { + "epoch": 1.9680081670956735, + "grad_norm": 1.0188317745019049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405790 + }, + { + "epoch": 1.9680566652885096, + "grad_norm": 2.151142375339532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405800 + }, + { + "epoch": 1.9681051634813458, + "grad_norm": 8.585555377749188e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405810 + }, + { + "epoch": 1.9681536616741817, + "grad_norm": 8.310236943032123e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405820 + }, + { + "epoch": 1.968202159867018, + "grad_norm": 9.915424037387766e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405830 + }, + { + "epoch": 1.968250658059854, + "grad_norm": 1.5828195643052823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405840 + }, + { + "epoch": 1.9682991562526901, + "grad_norm": 8.734949652478008e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405850 + }, + { + "epoch": 1.9683476544455263, + "grad_norm": 1.0310943210356527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405860 + }, + { + "epoch": 1.9683961526383622, + "grad_norm": 1.6475771857926702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405870 + }, + { + "epoch": 1.9684446508311984, + "grad_norm": 9.118096500060346e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405880 + }, + { + "epoch": 1.9684931490240345, + "grad_norm": 1.1025730550784374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405890 + }, + { + "epoch": 1.9685416472168704, + "grad_norm": 9.369449216478642e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405900 + }, + { + "epoch": 1.9685901454097068, + "grad_norm": 7.574286087219662e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405910 + }, + { + "epoch": 1.9686386436025427, + "grad_norm": 1.068788613167726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405920 + }, + { + "epoch": 1.9686871417953788, + "grad_norm": 9.900415598451673e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405930 + }, + { + "epoch": 1.968735639988215, + "grad_norm": 8.716999566615868e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405940 + }, + { + "epoch": 1.968784138181051, + "grad_norm": 1.1874066174755171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405950 + }, + { + "epoch": 1.9688326363738873, + "grad_norm": 7.57757856462149e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405960 + }, + { + "epoch": 1.9688811345667232, + "grad_norm": 1.3445697000236123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405970 + }, + { + "epoch": 1.9689296327595593, + "grad_norm": 7.406963931089194e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405980 + }, + { + "epoch": 1.9689781309523955, + "grad_norm": 1.865722509819534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 405990 + }, + { + "epoch": 1.9690266291452314, + "grad_norm": 1.0300549746489196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406000 + }, + { + "epoch": 1.9690751273380676, + "grad_norm": 1.643579317089916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406010 + }, + { + "epoch": 1.9691236255309037, + "grad_norm": 1.4250782776059623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406020 + }, + { + "epoch": 1.9691721237237396, + "grad_norm": 1.2487882727896249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406030 + }, + { + "epoch": 1.969220621916576, + "grad_norm": 1.1853075854162398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406040 + }, + { + "epoch": 1.969269120109412, + "grad_norm": 1.887998202221297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406050 + }, + { + "epoch": 1.969317618302248, + "grad_norm": 1.5941509445838165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406060 + }, + { + "epoch": 1.9693661164950842, + "grad_norm": 7.097994636495741e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406070 + }, + { + "epoch": 1.9694146146879201, + "grad_norm": 1.006711691786677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406080 + }, + { + "epoch": 1.9694631128807563, + "grad_norm": 1.2825347006639731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406090 + }, + { + "epoch": 1.9695116110735924, + "grad_norm": 7.81653675119287e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406100 + }, + { + "epoch": 1.9695601092664283, + "grad_norm": 1.375142488768688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406110 + }, + { + "epoch": 1.9696086074592647, + "grad_norm": 1.6361013877030928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406120 + }, + { + "epoch": 1.9696571056521006, + "grad_norm": 1.0901487712544622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406130 + }, + { + "epoch": 1.9697056038449368, + "grad_norm": 9.793232891297521e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406140 + }, + { + "epoch": 1.969754102037773, + "grad_norm": 1.3460746295379522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406150 + }, + { + "epoch": 1.9698026002306088, + "grad_norm": 1.665552851193297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406160 + }, + { + "epoch": 1.969851098423445, + "grad_norm": 1.5333624148183844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406170 + }, + { + "epoch": 1.9698995966162811, + "grad_norm": 8.340038881726741e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406180 + }, + { + "epoch": 1.969948094809117, + "grad_norm": 2.0470597661415013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406190 + }, + { + "epoch": 1.9699965930019534, + "grad_norm": 1.4795547009782695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406200 + }, + { + "epoch": 1.9700450911947893, + "grad_norm": 9.745022566676198e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406210 + }, + { + "epoch": 1.9700935893876255, + "grad_norm": 1.973110030917269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406220 + }, + { + "epoch": 1.9701420875804616, + "grad_norm": 1.3586876512761137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406230 + }, + { + "epoch": 1.9701905857732975, + "grad_norm": 1.2885680078511541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406240 + }, + { + "epoch": 1.9702390839661337, + "grad_norm": 8.365083736805445e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406250 + }, + { + "epoch": 1.9702875821589698, + "grad_norm": 1.4079436283509494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406260 + }, + { + "epoch": 1.9703360803518057, + "grad_norm": 1.552955808392653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406270 + }, + { + "epoch": 1.970384578544642, + "grad_norm": 1.824970752295485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406280 + }, + { + "epoch": 1.970433076737478, + "grad_norm": 1.1387347065294762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406290 + }, + { + "epoch": 1.9704815749303142, + "grad_norm": 1.5168975409096674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406300 + }, + { + "epoch": 1.9705300731231503, + "grad_norm": 8.910355120406166e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406310 + }, + { + "epoch": 1.9705785713159862, + "grad_norm": 1.1690580947743001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406320 + }, + { + "epoch": 1.9706270695088224, + "grad_norm": 1.295744755935857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406330 + }, + { + "epoch": 1.9706755677016585, + "grad_norm": 9.57216617081258e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406340 + }, + { + "epoch": 1.9707240658944944, + "grad_norm": 1.2387158854210156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406350 + }, + { + "epoch": 1.9707725640873308, + "grad_norm": 1.4581820195758155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406360 + }, + { + "epoch": 1.9708210622801667, + "grad_norm": 1.7816937258885446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406370 + }, + { + "epoch": 1.9708695604730029, + "grad_norm": 1.0414614060039185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406380 + }, + { + "epoch": 1.970918058665839, + "grad_norm": 1.4151567029330181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406390 + }, + { + "epoch": 1.970966556858675, + "grad_norm": 9.738403861092593e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406400 + }, + { + "epoch": 1.971015055051511, + "grad_norm": 1.2459706155709682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406410 + }, + { + "epoch": 1.9710635532443472, + "grad_norm": 1.6380504064272827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406420 + }, + { + "epoch": 1.9711120514371834, + "grad_norm": 7.421435466170578e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406430 + }, + { + "epoch": 1.9711605496300195, + "grad_norm": 1.5428046395982165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406440 + }, + { + "epoch": 1.9712090478228554, + "grad_norm": 1.580966646486104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406450 + }, + { + "epoch": 1.9712575460156916, + "grad_norm": 1.1556457124584085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406460 + }, + { + "epoch": 1.9713060442085277, + "grad_norm": 1.6568950655937442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406470 + }, + { + "epoch": 1.9713545424013637, + "grad_norm": 1.6278779213507732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406480 + }, + { + "epoch": 1.9714030405942, + "grad_norm": 6.868602131504531e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406490 + }, + { + "epoch": 1.971451538787036, + "grad_norm": 1.1849518699591499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406500 + }, + { + "epoch": 1.971500036979872, + "grad_norm": 1.3990522518270154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406510 + }, + { + "epoch": 1.9715485351727082, + "grad_norm": 1.1666396737552986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406520 + }, + { + "epoch": 1.9715970333655441, + "grad_norm": 1.0672095207553411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406530 + }, + { + "epoch": 1.9716455315583803, + "grad_norm": 1.1701973612332495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406540 + }, + { + "epoch": 1.9716940297512164, + "grad_norm": 1.1010504508135455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406550 + }, + { + "epoch": 1.9717425279440524, + "grad_norm": 9.602660888674563e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406560 + }, + { + "epoch": 1.9717910261368887, + "grad_norm": 1.4483927834874066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406570 + }, + { + "epoch": 1.9718395243297246, + "grad_norm": 1.0418110818477544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406580 + }, + { + "epoch": 1.9718880225225608, + "grad_norm": 9.876650608475757e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406590 + }, + { + "epoch": 1.971936520715397, + "grad_norm": 6.91537271890752e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406600 + }, + { + "epoch": 1.9719850189082329, + "grad_norm": 1.4634326639395567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406610 + }, + { + "epoch": 1.972033517101069, + "grad_norm": 1.1020993895272113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406620 + }, + { + "epoch": 1.9720820152939051, + "grad_norm": 7.874730201251623e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406630 + }, + { + "epoch": 1.972130513486741, + "grad_norm": 8.224597891626217e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406640 + }, + { + "epoch": 1.9721790116795774, + "grad_norm": 1.9782378402055656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406650 + }, + { + "epoch": 1.9722275098724134, + "grad_norm": 1.652921177708322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406660 + }, + { + "epoch": 1.9722760080652495, + "grad_norm": 1.3236575391317729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406670 + }, + { + "epoch": 1.9723245062580856, + "grad_norm": 6.904072868962885e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406680 + }, + { + "epoch": 1.9723730044509216, + "grad_norm": 1.2191446963072394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406690 + }, + { + "epoch": 1.9724215026437577, + "grad_norm": 1.202064936478564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406700 + }, + { + "epoch": 1.9724700008365939, + "grad_norm": 1.4589193852998505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406710 + }, + { + "epoch": 1.9725184990294298, + "grad_norm": 7.538347723823335e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406720 + }, + { + "epoch": 1.9725669972222661, + "grad_norm": 1.0347398493593118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406730 + }, + { + "epoch": 1.972615495415102, + "grad_norm": 8.642753179799456e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406740 + }, + { + "epoch": 1.9726639936079382, + "grad_norm": 1.1228404872554165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406750 + }, + { + "epoch": 1.9727124918007743, + "grad_norm": 8.723766597995564e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406760 + }, + { + "epoch": 1.9727609899936103, + "grad_norm": 1.2796507853352068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406770 + }, + { + "epoch": 1.9728094881864464, + "grad_norm": 1.7963870391213277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406780 + }, + { + "epoch": 1.9728579863792826, + "grad_norm": 9.98581572986268e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406790 + }, + { + "epoch": 1.9729064845721185, + "grad_norm": 1.3790592667817236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406800 + }, + { + "epoch": 1.9729549827649548, + "grad_norm": 1.4087588873223922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406810 + }, + { + "epoch": 1.9730034809577908, + "grad_norm": 1.4528147573855676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406820 + }, + { + "epoch": 1.973051979150627, + "grad_norm": 1.1010116374166046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406830 + }, + { + "epoch": 1.973100477343463, + "grad_norm": 1.5205705139464953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406840 + }, + { + "epoch": 1.973148975536299, + "grad_norm": 1.3728270076285298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406850 + }, + { + "epoch": 1.9731974737291351, + "grad_norm": 1.4696356132049004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406860 + }, + { + "epoch": 1.9732459719219713, + "grad_norm": 1.1360180351971394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406870 + }, + { + "epoch": 1.9732944701148072, + "grad_norm": 1.5331631075810037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406880 + }, + { + "epoch": 1.9733429683076436, + "grad_norm": 1.7947133557072448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406890 + }, + { + "epoch": 1.9733914665004795, + "grad_norm": 9.903360798091398e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406900 + }, + { + "epoch": 1.9734399646933156, + "grad_norm": 9.531607503276973e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406910 + }, + { + "epoch": 1.9734884628861518, + "grad_norm": 1.7387241868505043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406920 + }, + { + "epoch": 1.9735369610789877, + "grad_norm": 2.0351521357042657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406930 + }, + { + "epoch": 1.973585459271824, + "grad_norm": 1.2987644737449955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406940 + }, + { + "epoch": 1.97363395746466, + "grad_norm": 1.2351384803821475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406950 + }, + { + "epoch": 1.9736824556574961, + "grad_norm": 1.0510693648768665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406960 + }, + { + "epoch": 1.9737309538503323, + "grad_norm": 1.190759579827727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406970 + }, + { + "epoch": 1.9737794520431682, + "grad_norm": 1.2027571827388783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406980 + }, + { + "epoch": 1.9738279502360043, + "grad_norm": 1.2986806297021758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 406990 + }, + { + "epoch": 1.9738764484288405, + "grad_norm": 8.603071144364094e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407000 + }, + { + "epoch": 1.9739249466216764, + "grad_norm": 2.0890082552682543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407010 + }, + { + "epoch": 1.9739734448145128, + "grad_norm": 1.0952119211538047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407020 + }, + { + "epoch": 1.9740219430073487, + "grad_norm": 1.2155580542128064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407030 + }, + { + "epoch": 1.9740704412001848, + "grad_norm": 1.18194245501968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407040 + }, + { + "epoch": 1.974118939393021, + "grad_norm": 1.018221329474045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407050 + }, + { + "epoch": 1.9741674375858569, + "grad_norm": 9.743331474965089e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407060 + }, + { + "epoch": 1.974215935778693, + "grad_norm": 7.458314854602577e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407070 + }, + { + "epoch": 1.9742644339715292, + "grad_norm": 2.3362082046674004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407080 + }, + { + "epoch": 1.974312932164365, + "grad_norm": 8.508388660288801e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407090 + }, + { + "epoch": 1.9743614303572015, + "grad_norm": 1.2955756467647461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407100 + }, + { + "epoch": 1.9744099285500374, + "grad_norm": 1.7486524228615963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407110 + }, + { + "epoch": 1.9744584267428735, + "grad_norm": 1.4252521829405396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407120 + }, + { + "epoch": 1.9745069249357097, + "grad_norm": 1.2600793297679047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407130 + }, + { + "epoch": 1.9745554231285456, + "grad_norm": 1.7496804005645572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407140 + }, + { + "epoch": 1.9746039213213817, + "grad_norm": 1.847445219027577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407150 + }, + { + "epoch": 1.9746524195142179, + "grad_norm": 1.4456328578660305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407160 + }, + { + "epoch": 1.9747009177070538, + "grad_norm": 1.3485301764148971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407170 + }, + { + "epoch": 1.9747494158998902, + "grad_norm": 2.319516667625976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407180 + }, + { + "epoch": 1.974797914092726, + "grad_norm": 7.86120146756275e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407190 + }, + { + "epoch": 1.9748464122855622, + "grad_norm": 6.8275967102238155e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407200 + }, + { + "epoch": 1.9748949104783984, + "grad_norm": 1.015303841001014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407210 + }, + { + "epoch": 1.9749434086712343, + "grad_norm": 1.0883912437975596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407220 + }, + { + "epoch": 1.9749919068640704, + "grad_norm": 9.543755119523212e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407230 + }, + { + "epoch": 1.9750404050569066, + "grad_norm": 1.3778503671346698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407240 + }, + { + "epoch": 1.9750889032497425, + "grad_norm": 8.921918315252242e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407250 + }, + { + "epoch": 1.9751374014425789, + "grad_norm": 5.706364714086476e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407260 + }, + { + "epoch": 1.9751858996354148, + "grad_norm": 1.0220848167818986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407270 + }, + { + "epoch": 1.975234397828251, + "grad_norm": 1.3918571184490247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407280 + }, + { + "epoch": 1.975282896021087, + "grad_norm": 8.269291917883947e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407290 + }, + { + "epoch": 1.975331394213923, + "grad_norm": 1.3515432328858878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407300 + }, + { + "epoch": 1.9753798924067592, + "grad_norm": 1.3008178534335002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407310 + }, + { + "epoch": 1.9754283905995953, + "grad_norm": 1.025270623955521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407320 + }, + { + "epoch": 1.9754768887924312, + "grad_norm": 1.611792477262952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407330 + }, + { + "epoch": 1.9755253869852676, + "grad_norm": 1.3712988966574358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407340 + }, + { + "epoch": 1.9755738851781035, + "grad_norm": 1.497178736542537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407350 + }, + { + "epoch": 1.9756223833709396, + "grad_norm": 1.4381334345614505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407360 + }, + { + "epoch": 1.9756708815637758, + "grad_norm": 1.320544384952882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407370 + }, + { + "epoch": 1.9757193797566117, + "grad_norm": 9.181377436107141e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407380 + }, + { + "epoch": 1.9757678779494479, + "grad_norm": 1.530727367082818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407390 + }, + { + "epoch": 1.975816376142284, + "grad_norm": 1.1454546644529273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407400 + }, + { + "epoch": 1.97586487433512, + "grad_norm": 9.832163527789817e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407410 + }, + { + "epoch": 1.9759133725279563, + "grad_norm": 1.2638722068913921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407420 + }, + { + "epoch": 1.9759618707207922, + "grad_norm": 1.5716699053314187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407430 + }, + { + "epoch": 1.9760103689136284, + "grad_norm": 1.1995362036998358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407440 + }, + { + "epoch": 1.9760588671064645, + "grad_norm": 1.2697649154347346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407450 + }, + { + "epoch": 1.9761073652993004, + "grad_norm": 1.1334408966945375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407460 + }, + { + "epoch": 1.9761558634921368, + "grad_norm": 9.971373060579936e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407470 + }, + { + "epoch": 1.9762043616849727, + "grad_norm": 1.1789691001240499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407480 + }, + { + "epoch": 1.9762528598778089, + "grad_norm": 8.678204821421787e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407490 + }, + { + "epoch": 1.976301358070645, + "grad_norm": 1.1629475160646052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407500 + }, + { + "epoch": 1.976349856263481, + "grad_norm": 8.270428786261164e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407510 + }, + { + "epoch": 1.976398354456317, + "grad_norm": 1.3184391356446667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407520 + }, + { + "epoch": 1.9764468526491532, + "grad_norm": 1.752714240410569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407530 + }, + { + "epoch": 1.9764953508419891, + "grad_norm": 1.3110271979144272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407540 + }, + { + "epoch": 1.9765438490348255, + "grad_norm": 8.992520506012625e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407550 + }, + { + "epoch": 1.9765923472276614, + "grad_norm": 1.0033875952331073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407560 + }, + { + "epoch": 1.9766408454204976, + "grad_norm": 2.1341490352710935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407570 + }, + { + "epoch": 1.9766893436133337, + "grad_norm": 9.347468576947904e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407580 + }, + { + "epoch": 1.9767378418061696, + "grad_norm": 7.943397051235479e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407590 + }, + { + "epoch": 1.9767863399990058, + "grad_norm": 1.0046941945063281e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407600 + }, + { + "epoch": 1.976834838191842, + "grad_norm": 2.020685307968506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407610 + }, + { + "epoch": 1.9768833363846778, + "grad_norm": 1.3488818062512564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407620 + }, + { + "epoch": 1.9769318345775142, + "grad_norm": 8.47738235165707e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407630 + }, + { + "epoch": 1.9769803327703501, + "grad_norm": 7.617257935521593e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407640 + }, + { + "epoch": 1.9770288309631863, + "grad_norm": 9.499514952437949e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407650 + }, + { + "epoch": 1.9770773291560224, + "grad_norm": 1.2942361848899964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407660 + }, + { + "epoch": 1.9771258273488583, + "grad_norm": 1.2070995758506342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407670 + }, + { + "epoch": 1.9771743255416945, + "grad_norm": 9.984706395016474e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407680 + }, + { + "epoch": 1.9772228237345306, + "grad_norm": 1.0829427132819092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407690 + }, + { + "epoch": 1.9772713219273665, + "grad_norm": 7.904497500987873e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407700 + }, + { + "epoch": 1.977319820120203, + "grad_norm": 1.1380818065731546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407710 + }, + { + "epoch": 1.9773683183130388, + "grad_norm": 8.871317014325086e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407720 + }, + { + "epoch": 1.977416816505875, + "grad_norm": 1.4674709447604073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407730 + }, + { + "epoch": 1.9774653146987111, + "grad_norm": 2.2158145540629448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407740 + }, + { + "epoch": 1.977513812891547, + "grad_norm": 1.1626132057074301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407750 + }, + { + "epoch": 1.9775623110843832, + "grad_norm": 1.28489565653922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407760 + }, + { + "epoch": 1.9776108092772193, + "grad_norm": 1.3963993517052131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407770 + }, + { + "epoch": 1.9776593074700552, + "grad_norm": 1.0087126689484194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407780 + }, + { + "epoch": 1.9777078056628916, + "grad_norm": 9.910243292665655e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407790 + }, + { + "epoch": 1.9777563038557275, + "grad_norm": 1.5448474499635267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407800 + }, + { + "epoch": 1.9778048020485637, + "grad_norm": 8.608124879572188e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407810 + }, + { + "epoch": 1.9778533002413998, + "grad_norm": 9.897898500810243e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407820 + }, + { + "epoch": 1.9779017984342357, + "grad_norm": 1.1721842163581186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407830 + }, + { + "epoch": 1.977950296627072, + "grad_norm": 1.3005721832826111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407840 + }, + { + "epoch": 1.977998794819908, + "grad_norm": 1.3351504790648505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407850 + }, + { + "epoch": 1.978047293012744, + "grad_norm": 1.498840163094428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407860 + }, + { + "epoch": 1.9780957912055803, + "grad_norm": 9.479848017690529e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407870 + }, + { + "epoch": 1.9781442893984162, + "grad_norm": 9.108023668602527e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407880 + }, + { + "epoch": 1.9781927875912524, + "grad_norm": 6.459538237635343e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407890 + }, + { + "epoch": 1.9782412857840885, + "grad_norm": 8.182595045980179e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407900 + }, + { + "epoch": 1.9782897839769245, + "grad_norm": 6.4948171285550416e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407910 + }, + { + "epoch": 1.9783382821697606, + "grad_norm": 1.2216941236431467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407920 + }, + { + "epoch": 1.9783867803625967, + "grad_norm": 7.605099661134318e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407930 + }, + { + "epoch": 1.9784352785554327, + "grad_norm": 1.573430985502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407940 + }, + { + "epoch": 1.978483776748269, + "grad_norm": 1.1461414928248814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407950 + }, + { + "epoch": 1.978532274941105, + "grad_norm": 1.1893938278717542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407960 + }, + { + "epoch": 1.978580773133941, + "grad_norm": 1.4277706128495993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407970 + }, + { + "epoch": 1.9786292713267772, + "grad_norm": 9.213897200766041e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407980 + }, + { + "epoch": 1.9786777695196132, + "grad_norm": 1.3246252095200362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 407990 + }, + { + "epoch": 1.9787262677124495, + "grad_norm": 8.111340044081317e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408000 + }, + { + "epoch": 1.9787747659052854, + "grad_norm": 9.62574198126731e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408010 + }, + { + "epoch": 1.9788232640981216, + "grad_norm": 1.3285060163070739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408020 + }, + { + "epoch": 1.9788717622909577, + "grad_norm": 9.446600834905894e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408030 + }, + { + "epoch": 1.9789202604837937, + "grad_norm": 1.0392890992250159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408040 + }, + { + "epoch": 1.9789687586766298, + "grad_norm": 8.290056641158117e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408050 + }, + { + "epoch": 1.979017256869466, + "grad_norm": 1.3520051744819739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408060 + }, + { + "epoch": 1.9790657550623019, + "grad_norm": 1.370957392055061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408070 + }, + { + "epoch": 1.9791142532551382, + "grad_norm": 9.732269212747724e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408080 + }, + { + "epoch": 1.9791627514479742, + "grad_norm": 1.0727203125782125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408090 + }, + { + "epoch": 1.9792112496408103, + "grad_norm": 7.57591855915507e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408100 + }, + { + "epoch": 1.9792597478336464, + "grad_norm": 1.049272935205181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408110 + }, + { + "epoch": 1.9793082460264824, + "grad_norm": 9.169914605422491e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408120 + }, + { + "epoch": 1.9793567442193185, + "grad_norm": 1.4251969382428342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408130 + }, + { + "epoch": 1.9794052424121547, + "grad_norm": 1.7095992177473818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408140 + }, + { + "epoch": 1.9794537406049906, + "grad_norm": 8.942913964915533e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408150 + }, + { + "epoch": 1.979502238797827, + "grad_norm": 1.8812494673170477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408160 + }, + { + "epoch": 1.9795507369906629, + "grad_norm": 1.1136624067376033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408170 + }, + { + "epoch": 1.979599235183499, + "grad_norm": 1.4599679687421485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408180 + }, + { + "epoch": 1.9796477333763352, + "grad_norm": 1.011361572267333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408190 + }, + { + "epoch": 1.979696231569171, + "grad_norm": 1.2892646061857249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408200 + }, + { + "epoch": 1.9797447297620072, + "grad_norm": 6.4874274841031365e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408210 + }, + { + "epoch": 1.9797932279548434, + "grad_norm": 7.477546581924344e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408220 + }, + { + "epoch": 1.9798417261476793, + "grad_norm": 1.2934423310184684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408230 + }, + { + "epoch": 1.9798902243405156, + "grad_norm": 1.3534959819594405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408240 + }, + { + "epoch": 1.9799387225333516, + "grad_norm": 1.374147373667256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408250 + }, + { + "epoch": 1.9799872207261877, + "grad_norm": 1.9481687374423018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408260 + }, + { + "epoch": 1.9800357189190239, + "grad_norm": 1.1456557480471474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408270 + }, + { + "epoch": 1.9800842171118598, + "grad_norm": 1.2691827144806211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408280 + }, + { + "epoch": 1.980132715304696, + "grad_norm": 1.1549706968594364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408290 + }, + { + "epoch": 1.980181213497532, + "grad_norm": 1.4149443394728678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408300 + }, + { + "epoch": 1.980229711690368, + "grad_norm": 1.695085494191062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408310 + }, + { + "epoch": 1.9802782098832044, + "grad_norm": 1.7113800154788805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408320 + }, + { + "epoch": 1.9803267080760403, + "grad_norm": 1.040041208710818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408330 + }, + { + "epoch": 1.9803752062688764, + "grad_norm": 1.7137081087525985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408340 + }, + { + "epoch": 1.9804237044617126, + "grad_norm": 1.0054901800060634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408350 + }, + { + "epoch": 1.9804722026545485, + "grad_norm": 9.644244514106504e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408360 + }, + { + "epoch": 1.9805207008473846, + "grad_norm": 1.3751521699134628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408370 + }, + { + "epoch": 1.9805691990402208, + "grad_norm": 1.487088230334166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408380 + }, + { + "epoch": 1.9806176972330567, + "grad_norm": 9.168629411249185e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408390 + }, + { + "epoch": 1.980666195425893, + "grad_norm": 1.4062109698897984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408400 + }, + { + "epoch": 1.980714693618729, + "grad_norm": 9.740242390421372e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408410 + }, + { + "epoch": 1.9807631918115651, + "grad_norm": 1.3842686108489488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408420 + }, + { + "epoch": 1.9808116900044013, + "grad_norm": 1.1424975632223777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408430 + }, + { + "epoch": 1.9808601881972372, + "grad_norm": 8.72966943177289e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408440 + }, + { + "epoch": 1.9809086863900733, + "grad_norm": 1.2491082834742429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408450 + }, + { + "epoch": 1.9809571845829095, + "grad_norm": 8.313852717378722e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408460 + }, + { + "epoch": 1.9810056827757454, + "grad_norm": 7.63500906941772e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408470 + }, + { + "epoch": 1.9810541809685818, + "grad_norm": 1.0364555436126466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408480 + }, + { + "epoch": 1.9811026791614177, + "grad_norm": 1.4358414901494143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408490 + }, + { + "epoch": 1.9811511773542538, + "grad_norm": 1.2202036714370479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408500 + }, + { + "epoch": 1.98119967554709, + "grad_norm": 8.16941270187499e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408510 + }, + { + "epoch": 1.981248173739926, + "grad_norm": 1.3828260314596719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408520 + }, + { + "epoch": 1.9812966719327623, + "grad_norm": 8.459999811805119e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408530 + }, + { + "epoch": 1.9813451701255982, + "grad_norm": 1.8320058359222458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408540 + }, + { + "epoch": 1.9813936683184343, + "grad_norm": 8.364839487740028e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408550 + }, + { + "epoch": 1.9814421665112705, + "grad_norm": 1.3829406064758132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408560 + }, + { + "epoch": 1.9814906647041064, + "grad_norm": 1.2035801688625725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408570 + }, + { + "epoch": 1.9815391628969425, + "grad_norm": 1.0135302375147148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408580 + }, + { + "epoch": 1.9815876610897787, + "grad_norm": 1.695251405919862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408590 + }, + { + "epoch": 1.9816361592826146, + "grad_norm": 9.914506549080215e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408600 + }, + { + "epoch": 1.981684657475451, + "grad_norm": 8.104088067284465e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408610 + }, + { + "epoch": 1.981733155668287, + "grad_norm": 7.935827994742795e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408620 + }, + { + "epoch": 1.981781653861123, + "grad_norm": 1.7553192677155494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408630 + }, + { + "epoch": 1.9818301520539592, + "grad_norm": 1.4080333343713392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408640 + }, + { + "epoch": 1.981878650246795, + "grad_norm": 1.662082205200477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408650 + }, + { + "epoch": 1.9819271484396312, + "grad_norm": 1.9490460800852816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408660 + }, + { + "epoch": 1.9819756466324674, + "grad_norm": 1.501761204281138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408670 + }, + { + "epoch": 1.9820241448253033, + "grad_norm": 1.4802635561750321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408680 + }, + { + "epoch": 1.9820726430181397, + "grad_norm": 1.0896594737630494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408690 + }, + { + "epoch": 1.9821211412109756, + "grad_norm": 1.5475491110805706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408700 + }, + { + "epoch": 1.9821696394038117, + "grad_norm": 1.8777958743498857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408710 + }, + { + "epoch": 1.9822181375966479, + "grad_norm": 1.190404308459847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408720 + }, + { + "epoch": 1.9822666357894838, + "grad_norm": 1.0180636778045482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408730 + }, + { + "epoch": 1.98231513398232, + "grad_norm": 1.0532088090542402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408740 + }, + { + "epoch": 1.982363632175156, + "grad_norm": 9.53046175311556e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408750 + }, + { + "epoch": 1.982412130367992, + "grad_norm": 1.340382826953146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408760 + }, + { + "epoch": 1.9824606285608284, + "grad_norm": 9.761351726922385e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408770 + }, + { + "epoch": 1.9825091267536643, + "grad_norm": 1.3320012648421198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408780 + }, + { + "epoch": 1.9825576249465005, + "grad_norm": 9.267930423106918e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408790 + }, + { + "epoch": 1.9826061231393366, + "grad_norm": 1.9179260846158286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408800 + }, + { + "epoch": 1.9826546213321725, + "grad_norm": 1.4594197850215096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408810 + }, + { + "epoch": 1.9827031195250087, + "grad_norm": 8.545735674658772e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408820 + }, + { + "epoch": 1.9827516177178448, + "grad_norm": 1.9768172876410972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408830 + }, + { + "epoch": 1.9828001159106807, + "grad_norm": 1.130717297570527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408840 + }, + { + "epoch": 1.982848614103517, + "grad_norm": 1.2272667326840292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408850 + }, + { + "epoch": 1.982897112296353, + "grad_norm": 1.1171093383666175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408860 + }, + { + "epoch": 1.9829456104891892, + "grad_norm": 9.310049620125938e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408870 + }, + { + "epoch": 1.9829941086820253, + "grad_norm": 1.2794679093985906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408880 + }, + { + "epoch": 1.9830426068748612, + "grad_norm": 1.0122467308804062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408890 + }, + { + "epoch": 1.9830911050676974, + "grad_norm": 8.400803608310525e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408900 + }, + { + "epoch": 1.9831396032605335, + "grad_norm": 1.1179486669732341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408910 + }, + { + "epoch": 1.9831881014533694, + "grad_norm": 1.3266090448382784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408920 + }, + { + "epoch": 1.9832365996462058, + "grad_norm": 6.795577434104416e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408930 + }, + { + "epoch": 1.9832850978390417, + "grad_norm": 8.830693509764842e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408940 + }, + { + "epoch": 1.9833335960318779, + "grad_norm": 1.760909462689142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408950 + }, + { + "epoch": 1.983382094224714, + "grad_norm": 1.617704725731528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408960 + }, + { + "epoch": 1.98343059241755, + "grad_norm": 1.0037937592244361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408970 + }, + { + "epoch": 1.983479090610386, + "grad_norm": 1.039033925565036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408980 + }, + { + "epoch": 1.9835275888032222, + "grad_norm": 1.3697054157546518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 408990 + }, + { + "epoch": 1.9835760869960584, + "grad_norm": 7.050108496997609e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409000 + }, + { + "epoch": 1.9836245851888945, + "grad_norm": 1.1061382032551137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409010 + }, + { + "epoch": 1.9836730833817304, + "grad_norm": 7.359458376043904e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409020 + }, + { + "epoch": 1.9837215815745666, + "grad_norm": 1.503410196335153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409030 + }, + { + "epoch": 1.9837700797674027, + "grad_norm": 8.752077285123505e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409040 + }, + { + "epoch": 1.9838185779602386, + "grad_norm": 1.0905131020422232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409050 + }, + { + "epoch": 1.983867076153075, + "grad_norm": 1.551460471205246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409060 + }, + { + "epoch": 1.983915574345911, + "grad_norm": 6.646395878107114e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409070 + }, + { + "epoch": 1.983964072538747, + "grad_norm": 1.3601550108433003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409080 + }, + { + "epoch": 1.9840125707315832, + "grad_norm": 1.0134312056209183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409090 + }, + { + "epoch": 1.9840610689244191, + "grad_norm": 1.1248883602377191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409100 + }, + { + "epoch": 1.9841095671172553, + "grad_norm": 8.523246997071965e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409110 + }, + { + "epoch": 1.9841580653100914, + "grad_norm": 9.713956750090347e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409120 + }, + { + "epoch": 1.9842065635029273, + "grad_norm": 1.3206312488023286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409130 + }, + { + "epoch": 1.9842550616957637, + "grad_norm": 1.666251847609601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409140 + }, + { + "epoch": 1.9843035598885996, + "grad_norm": 8.959374575567836e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409150 + }, + { + "epoch": 1.9843520580814358, + "grad_norm": 1.4156763761263846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409160 + }, + { + "epoch": 1.984400556274272, + "grad_norm": 9.820473323429724e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409170 + }, + { + "epoch": 1.9844490544671078, + "grad_norm": 6.582704159541208e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409180 + }, + { + "epoch": 1.984497552659944, + "grad_norm": 1.3137347210090411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409190 + }, + { + "epoch": 1.9845460508527801, + "grad_norm": 1.576310637574352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409200 + }, + { + "epoch": 1.984594549045616, + "grad_norm": 6.642995487027292e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409210 + }, + { + "epoch": 1.9846430472384524, + "grad_norm": 1.3966112710761536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409220 + }, + { + "epoch": 1.9846915454312883, + "grad_norm": 1.582737318983618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409230 + }, + { + "epoch": 1.9847400436241245, + "grad_norm": 8.13203371308191e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409240 + }, + { + "epoch": 1.9847885418169606, + "grad_norm": 1.415100836510419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409250 + }, + { + "epoch": 1.9848370400097965, + "grad_norm": 9.517896693012062e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409260 + }, + { + "epoch": 1.9848855382026327, + "grad_norm": 1.0019683749362684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409270 + }, + { + "epoch": 1.9849340363954688, + "grad_norm": 6.916921257982267e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409280 + }, + { + "epoch": 1.9849825345883048, + "grad_norm": 7.99990562683206e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409290 + }, + { + "epoch": 1.9850310327811411, + "grad_norm": 1.3255042397020134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409300 + }, + { + "epoch": 1.985079530973977, + "grad_norm": 1.5528808461340304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409310 + }, + { + "epoch": 1.9851280291668132, + "grad_norm": 1.3798879372473039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409320 + }, + { + "epoch": 1.9851765273596493, + "grad_norm": 9.663880362609234e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409330 + }, + { + "epoch": 1.9852250255524853, + "grad_norm": 1.3149243471843874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409340 + }, + { + "epoch": 1.9852735237453214, + "grad_norm": 1.2142538530213187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409350 + }, + { + "epoch": 1.9853220219381575, + "grad_norm": 1.0647726256252099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409360 + }, + { + "epoch": 1.9853705201309935, + "grad_norm": 1.4385579838460671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409370 + }, + { + "epoch": 1.9854190183238298, + "grad_norm": 1.2151359918277649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409380 + }, + { + "epoch": 1.9854675165166658, + "grad_norm": 1.1657866672010186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409390 + }, + { + "epoch": 1.985516014709502, + "grad_norm": 1.1258679322168064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409400 + }, + { + "epoch": 1.985564512902338, + "grad_norm": 1.1679216704862938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409410 + }, + { + "epoch": 1.985613011095174, + "grad_norm": 1.6130526248048227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409420 + }, + { + "epoch": 1.98566150928801, + "grad_norm": 1.3530161879771185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409430 + }, + { + "epoch": 1.9857100074808463, + "grad_norm": 8.675308471595145e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409440 + }, + { + "epoch": 1.9857585056736822, + "grad_norm": 9.733490458074812e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409450 + }, + { + "epoch": 1.9858070038665185, + "grad_norm": 6.8804686392809344e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409460 + }, + { + "epoch": 1.9858555020593545, + "grad_norm": 1.2837873875071182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409470 + }, + { + "epoch": 1.9859040002521906, + "grad_norm": 9.223482422271445e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409480 + }, + { + "epoch": 1.9859524984450267, + "grad_norm": 1.2711553587507751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409490 + }, + { + "epoch": 1.9860009966378627, + "grad_norm": 1.4271931192411103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409500 + }, + { + "epoch": 1.986049494830699, + "grad_norm": 2.0500378283827558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409510 + }, + { + "epoch": 1.986097993023535, + "grad_norm": 1.1387096598980406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409520 + }, + { + "epoch": 1.986146491216371, + "grad_norm": 9.471439632591228e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409530 + }, + { + "epoch": 1.9861949894092072, + "grad_norm": 1.379765990350279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409540 + }, + { + "epoch": 1.9862434876020432, + "grad_norm": 1.3621261452101407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409550 + }, + { + "epoch": 1.9862919857948793, + "grad_norm": 1.118905768038303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409560 + }, + { + "epoch": 1.9863404839877155, + "grad_norm": 1.2460485976362179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409570 + }, + { + "epoch": 1.9863889821805514, + "grad_norm": 1.3898869610784459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409580 + }, + { + "epoch": 1.9864374803733877, + "grad_norm": 1.2518071024203437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409590 + }, + { + "epoch": 1.9864859785662237, + "grad_norm": 1.5659269436696377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409600 + }, + { + "epoch": 1.9865344767590598, + "grad_norm": 1.4396947634054413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409610 + }, + { + "epoch": 1.986582974951896, + "grad_norm": 1.27040040709403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409620 + }, + { + "epoch": 1.9866314731447319, + "grad_norm": 1.3734109849394827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409630 + }, + { + "epoch": 1.986679971337568, + "grad_norm": 7.460522866153951e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409640 + }, + { + "epoch": 1.9867284695304042, + "grad_norm": 1.2643179836402396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409650 + }, + { + "epoch": 1.98677696772324, + "grad_norm": 1.1476234185181511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409660 + }, + { + "epoch": 1.9868254659160764, + "grad_norm": 1.1334265082041384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409670 + }, + { + "epoch": 1.9868739641089124, + "grad_norm": 1.6010929471121926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409680 + }, + { + "epoch": 1.9869224623017485, + "grad_norm": 1.0537525518827806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409690 + }, + { + "epoch": 1.9869709604945847, + "grad_norm": 8.301078047168176e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409700 + }, + { + "epoch": 1.9870194586874206, + "grad_norm": 1.3700211631828552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409710 + }, + { + "epoch": 1.9870679568802567, + "grad_norm": 1.714151309784029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409720 + }, + { + "epoch": 1.9871164550730929, + "grad_norm": 7.094120846318219e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409730 + }, + { + "epoch": 1.9871649532659288, + "grad_norm": 1.2348301048348276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409740 + }, + { + "epoch": 1.9872134514587652, + "grad_norm": 9.460309868813965e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409750 + }, + { + "epoch": 1.987261949651601, + "grad_norm": 1.1747078865198546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409760 + }, + { + "epoch": 1.9873104478444372, + "grad_norm": 8.344491320144698e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409770 + }, + { + "epoch": 1.9873589460372734, + "grad_norm": 1.3380461183487569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409780 + }, + { + "epoch": 1.9874074442301093, + "grad_norm": 9.562529434958833e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409790 + }, + { + "epoch": 1.9874559424229454, + "grad_norm": 1.1597959037601413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409800 + }, + { + "epoch": 1.9875044406157816, + "grad_norm": 1.0116968596207698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409810 + }, + { + "epoch": 1.9875529388086175, + "grad_norm": 1.1828173995809266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409820 + }, + { + "epoch": 1.9876014370014539, + "grad_norm": 6.106320782350849e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409830 + }, + { + "epoch": 1.9876499351942898, + "grad_norm": 1.516147030145021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409840 + }, + { + "epoch": 1.987698433387126, + "grad_norm": 1.302293561877832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409850 + }, + { + "epoch": 1.987746931579962, + "grad_norm": 8.040989207813709e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409860 + }, + { + "epoch": 1.987795429772798, + "grad_norm": 8.987392163817276e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409870 + }, + { + "epoch": 1.9878439279656341, + "grad_norm": 1.2060273668623722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409880 + }, + { + "epoch": 1.9878924261584703, + "grad_norm": 6.650056949553118e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409890 + }, + { + "epoch": 1.9879409243513062, + "grad_norm": 1.2216986533530871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409900 + }, + { + "epoch": 1.9879894225441426, + "grad_norm": 1.5020267696286282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409910 + }, + { + "epoch": 1.9880379207369785, + "grad_norm": 1.0278228046445292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409920 + }, + { + "epoch": 1.9880864189298146, + "grad_norm": 1.8202724660909553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409930 + }, + { + "epoch": 1.9881349171226508, + "grad_norm": 1.0146739448657627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409940 + }, + { + "epoch": 1.9881834153154867, + "grad_norm": 1.040686559150572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409950 + }, + { + "epoch": 1.9882319135083228, + "grad_norm": 1.593231502283743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409960 + }, + { + "epoch": 1.988280411701159, + "grad_norm": 1.1684104350706548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409970 + }, + { + "epoch": 1.988328909893995, + "grad_norm": 1.733361720823723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409980 + }, + { + "epoch": 1.9883774080868313, + "grad_norm": 7.2735359957221135e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 409990 + }, + { + "epoch": 1.9884259062796672, + "grad_norm": 1.183014397554416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410000 + }, + { + "epoch": 1.9884744044725033, + "grad_norm": 1.8642337451524327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410010 + }, + { + "epoch": 1.9885229026653395, + "grad_norm": 1.0479090484238895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410020 + }, + { + "epoch": 1.9885714008581754, + "grad_norm": 1.1822272938388778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410030 + }, + { + "epoch": 1.9886198990510118, + "grad_norm": 1.8044637783987127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410040 + }, + { + "epoch": 1.9886683972438477, + "grad_norm": 1.1304209124318731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410050 + }, + { + "epoch": 1.9887168954366838, + "grad_norm": 9.371711406913619e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410060 + }, + { + "epoch": 1.98876539362952, + "grad_norm": 1.0544213502328148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410070 + }, + { + "epoch": 1.988813891822356, + "grad_norm": 1.5509176165551253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410080 + }, + { + "epoch": 1.988862390015192, + "grad_norm": 1.175571373579487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410090 + }, + { + "epoch": 1.9889108882080282, + "grad_norm": 1.0891140433955115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410100 + }, + { + "epoch": 1.9889593864008641, + "grad_norm": 8.117534200380305e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410110 + }, + { + "epoch": 1.9890078845937005, + "grad_norm": 1.2170956686929912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410120 + }, + { + "epoch": 1.9890563827865364, + "grad_norm": 1.1452874204564978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410130 + }, + { + "epoch": 1.9891048809793725, + "grad_norm": 1.2141884830896288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410140 + }, + { + "epoch": 1.9891533791722087, + "grad_norm": 1.058777776563602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410150 + }, + { + "epoch": 1.9892018773650446, + "grad_norm": 7.170254612276494e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410160 + }, + { + "epoch": 1.9892503755578808, + "grad_norm": 1.536417215675101e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410170 + }, + { + "epoch": 1.989298873750717, + "grad_norm": 1.4770173528688701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410180 + }, + { + "epoch": 1.9893473719435528, + "grad_norm": 1.080739586711843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410190 + }, + { + "epoch": 1.9893958701363892, + "grad_norm": 1.106957814300813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410200 + }, + { + "epoch": 1.989444368329225, + "grad_norm": 1.2675117844196393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410210 + }, + { + "epoch": 1.9894928665220613, + "grad_norm": 1.3192029690856089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410220 + }, + { + "epoch": 1.9895413647148974, + "grad_norm": 1.0677837281036773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410230 + }, + { + "epoch": 1.9895898629077333, + "grad_norm": 8.977972143497936e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410240 + }, + { + "epoch": 1.9896383611005695, + "grad_norm": 1.4362910860654665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410250 + }, + { + "epoch": 1.9896868592934056, + "grad_norm": 1.1112354592057727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410260 + }, + { + "epoch": 1.9897353574862415, + "grad_norm": 1.914068903374755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410270 + }, + { + "epoch": 1.989783855679078, + "grad_norm": 1.009972461218922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410280 + }, + { + "epoch": 1.9898323538719138, + "grad_norm": 7.558783821082216e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410290 + }, + { + "epoch": 1.98988085206475, + "grad_norm": 8.833946019137784e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410300 + }, + { + "epoch": 1.989929350257586, + "grad_norm": 1.3635896856101226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410310 + }, + { + "epoch": 1.989977848450422, + "grad_norm": 8.914197380249789e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410320 + }, + { + "epoch": 1.9900263466432582, + "grad_norm": 1.1304909897091875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410330 + }, + { + "epoch": 1.9900748448360943, + "grad_norm": 8.743225698992774e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410340 + }, + { + "epoch": 1.9901233430289302, + "grad_norm": 1.4223823896486465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410350 + }, + { + "epoch": 1.9901718412217666, + "grad_norm": 1.3158707901084199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410360 + }, + { + "epoch": 1.9902203394146025, + "grad_norm": 8.601013234965649e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410370 + }, + { + "epoch": 1.9902688376074387, + "grad_norm": 1.1841987834770862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410380 + }, + { + "epoch": 1.9903173358002748, + "grad_norm": 1.0498197866581904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410390 + }, + { + "epoch": 1.9903658339931107, + "grad_norm": 7.247524802522776e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410400 + }, + { + "epoch": 1.9904143321859469, + "grad_norm": 1.4959995908725432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410410 + }, + { + "epoch": 1.990462830378783, + "grad_norm": 1.1304317482085935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410420 + }, + { + "epoch": 1.990511328571619, + "grad_norm": 1.0303566888580917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410430 + }, + { + "epoch": 1.9905598267644553, + "grad_norm": 1.132015015059551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410440 + }, + { + "epoch": 1.9906083249572912, + "grad_norm": 1.2412400884898034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410450 + }, + { + "epoch": 1.9906568231501274, + "grad_norm": 1.2685704042780799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410460 + }, + { + "epoch": 1.9907053213429635, + "grad_norm": 9.705497738821123e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410470 + }, + { + "epoch": 1.9907538195357994, + "grad_norm": 1.0809367623210164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410480 + }, + { + "epoch": 1.9908023177286356, + "grad_norm": 1.70574843139093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410490 + }, + { + "epoch": 1.9908508159214717, + "grad_norm": 8.217583946645846e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410500 + }, + { + "epoch": 1.9908993141143076, + "grad_norm": 9.818513113657446e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410510 + }, + { + "epoch": 1.990947812307144, + "grad_norm": 1.7137280039491998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410520 + }, + { + "epoch": 1.99099631049998, + "grad_norm": 1.0142601425400244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410530 + }, + { + "epoch": 1.991044808692816, + "grad_norm": 1.2507639368664059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410540 + }, + { + "epoch": 1.9910933068856522, + "grad_norm": 1.3146465249747052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410550 + }, + { + "epoch": 1.9911418050784881, + "grad_norm": 7.007842306450129e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410560 + }, + { + "epoch": 1.9911903032713245, + "grad_norm": 1.1924877085789376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410570 + }, + { + "epoch": 1.9912388014641604, + "grad_norm": 1.2447872066445598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410580 + }, + { + "epoch": 1.9912872996569966, + "grad_norm": 1.0404221484350273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410590 + }, + { + "epoch": 1.9913357978498327, + "grad_norm": 1.0448726328604607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410600 + }, + { + "epoch": 1.9913842960426686, + "grad_norm": 1.4641081236277387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410610 + }, + { + "epoch": 1.9914327942355048, + "grad_norm": 1.7075297620294805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410620 + }, + { + "epoch": 1.991481292428341, + "grad_norm": 9.43820310794763e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410630 + }, + { + "epoch": 1.9915297906211769, + "grad_norm": 9.509151688291695e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410640 + }, + { + "epoch": 1.9915782888140132, + "grad_norm": 1.3130610376776986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410650 + }, + { + "epoch": 1.9916267870068491, + "grad_norm": 1.4531743808277042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410660 + }, + { + "epoch": 1.9916752851996853, + "grad_norm": 1.850916753198817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410670 + }, + { + "epoch": 1.9917237833925214, + "grad_norm": 1.7692043385864054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410680 + }, + { + "epoch": 1.9917722815853574, + "grad_norm": 1.2161700091439798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410690 + }, + { + "epoch": 1.9918207797781935, + "grad_norm": 1.5043745804632636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410700 + }, + { + "epoch": 1.9918692779710296, + "grad_norm": 9.117835375604955e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410710 + }, + { + "epoch": 1.9919177761638656, + "grad_norm": 8.357470271391776e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410720 + }, + { + "epoch": 1.991966274356702, + "grad_norm": 1.3083877092867624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410730 + }, + { + "epoch": 1.9920147725495378, + "grad_norm": 6.908781990944135e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410740 + }, + { + "epoch": 1.992063270742374, + "grad_norm": 1.4012311311262238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410750 + }, + { + "epoch": 1.9921117689352101, + "grad_norm": 1.2136908367210708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410760 + }, + { + "epoch": 1.992160267128046, + "grad_norm": 8.837515608206559e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410770 + }, + { + "epoch": 1.9922087653208822, + "grad_norm": 1.1320228310296443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410780 + }, + { + "epoch": 1.9922572635137183, + "grad_norm": 1.7441443844745663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410790 + }, + { + "epoch": 1.9923057617065543, + "grad_norm": 8.196281875427758e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410800 + }, + { + "epoch": 1.9923542598993906, + "grad_norm": 9.085195706859395e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410810 + }, + { + "epoch": 1.9924027580922266, + "grad_norm": 1.3534811493798315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410820 + }, + { + "epoch": 1.9924512562850627, + "grad_norm": 1.2624221668033897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410830 + }, + { + "epoch": 1.9924997544778988, + "grad_norm": 6.131011254240093e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410840 + }, + { + "epoch": 1.9925482526707348, + "grad_norm": 8.746419588590015e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410850 + }, + { + "epoch": 1.992596750863571, + "grad_norm": 8.414056118510871e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410860 + }, + { + "epoch": 1.992645249056407, + "grad_norm": 1.10303934874878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410870 + }, + { + "epoch": 1.992693747249243, + "grad_norm": 1.239425806431882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410880 + }, + { + "epoch": 1.9927422454420793, + "grad_norm": 1.3605845339270672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410890 + }, + { + "epoch": 1.9927907436349153, + "grad_norm": 1.2225865653192614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410900 + }, + { + "epoch": 1.9928392418277514, + "grad_norm": 1.4232064415864443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410910 + }, + { + "epoch": 1.9928877400205876, + "grad_norm": 1.0589663368421043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410920 + }, + { + "epoch": 1.9929362382134235, + "grad_norm": 1.3980076651876061e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410930 + }, + { + "epoch": 1.9929847364062596, + "grad_norm": 1.4076689147657362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410940 + }, + { + "epoch": 1.9930332345990958, + "grad_norm": 9.360413777415033e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410950 + }, + { + "epoch": 1.9930817327919317, + "grad_norm": 9.271055922965843e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410960 + }, + { + "epoch": 1.993130230984768, + "grad_norm": 1.290555751154443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410970 + }, + { + "epoch": 1.993178729177604, + "grad_norm": 9.394410582785895e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410980 + }, + { + "epoch": 1.9932272273704401, + "grad_norm": 1.5854078938559724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 410990 + }, + { + "epoch": 1.9932757255632763, + "grad_norm": 8.62138715973515e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411000 + }, + { + "epoch": 1.9933242237561122, + "grad_norm": 8.024658271210683e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411010 + }, + { + "epoch": 1.9933727219489483, + "grad_norm": 7.157684667191688e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411020 + }, + { + "epoch": 1.9934212201417845, + "grad_norm": 1.4584721874655315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411030 + }, + { + "epoch": 1.9934697183346206, + "grad_norm": 1.060764898141997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411040 + }, + { + "epoch": 1.9935182165274568, + "grad_norm": 8.636962256503011e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411050 + }, + { + "epoch": 1.9935667147202927, + "grad_norm": 1.5714759271645562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411060 + }, + { + "epoch": 1.9936152129131288, + "grad_norm": 9.850334770078462e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411070 + }, + { + "epoch": 1.993663711105965, + "grad_norm": 9.631141217880668e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411080 + }, + { + "epoch": 1.9937122092988009, + "grad_norm": 1.1611756889351454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411090 + }, + { + "epoch": 1.9937607074916373, + "grad_norm": 9.646961451892366e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411100 + }, + { + "epoch": 1.9938092056844732, + "grad_norm": 6.887335146643636e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411110 + }, + { + "epoch": 1.9938577038773093, + "grad_norm": 1.3358453898604239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411120 + }, + { + "epoch": 1.9939062020701455, + "grad_norm": 1.6753933351765227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411130 + }, + { + "epoch": 1.9939547002629814, + "grad_norm": 1.154874240683057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411140 + }, + { + "epoch": 1.9940031984558175, + "grad_norm": 1.2581952368861948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411150 + }, + { + "epoch": 1.9940516966486537, + "grad_norm": 9.302663528387711e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411160 + }, + { + "epoch": 1.9941001948414896, + "grad_norm": 1.1078871153813452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411170 + }, + { + "epoch": 1.994148693034326, + "grad_norm": 1.2004977456570032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411180 + }, + { + "epoch": 1.9941971912271619, + "grad_norm": 1.2776892432952991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411190 + }, + { + "epoch": 1.994245689419998, + "grad_norm": 1.4433958916981737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411200 + }, + { + "epoch": 1.9942941876128342, + "grad_norm": 1.4895472411069477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411210 + }, + { + "epoch": 1.99434268580567, + "grad_norm": 1.2283141614943816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411220 + }, + { + "epoch": 1.9943911839985062, + "grad_norm": 1.0236281156039695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411230 + }, + { + "epoch": 1.9944396821913424, + "grad_norm": 7.70369279479155e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411240 + }, + { + "epoch": 1.9944881803841783, + "grad_norm": 9.286559965460128e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411250 + }, + { + "epoch": 1.9945366785770147, + "grad_norm": 8.207933888115804e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411260 + }, + { + "epoch": 1.9945851767698506, + "grad_norm": 1.0303986996973435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411270 + }, + { + "epoch": 1.9946336749626867, + "grad_norm": 1.2112293390487139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411280 + }, + { + "epoch": 1.9946821731555229, + "grad_norm": 1.870497534639526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411290 + }, + { + "epoch": 1.9947306713483588, + "grad_norm": 1.0239426195823853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411300 + }, + { + "epoch": 1.994779169541195, + "grad_norm": 1.1582380388119873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411310 + }, + { + "epoch": 1.994827667734031, + "grad_norm": 6.2462399696983084e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411320 + }, + { + "epoch": 1.994876165926867, + "grad_norm": 1.0289208596248045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411330 + }, + { + "epoch": 1.9949246641197034, + "grad_norm": 1.5193348801290085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411340 + }, + { + "epoch": 1.9949731623125393, + "grad_norm": 9.00349217403118e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411350 + }, + { + "epoch": 1.9950216605053754, + "grad_norm": 1.1280322453899316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411360 + }, + { + "epoch": 1.9950701586982116, + "grad_norm": 1.3676103804982631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411370 + }, + { + "epoch": 1.9951186568910475, + "grad_norm": 1.6775031141946783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411380 + }, + { + "epoch": 1.9951671550838836, + "grad_norm": 1.1904187857680881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411390 + }, + { + "epoch": 1.9952156532767198, + "grad_norm": 9.013165325200134e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411400 + }, + { + "epoch": 1.9952641514695557, + "grad_norm": 7.921206801597691e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411410 + }, + { + "epoch": 1.995312649662392, + "grad_norm": 1.432933771639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411420 + }, + { + "epoch": 1.995361147855228, + "grad_norm": 1.5443170298112818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411430 + }, + { + "epoch": 1.9954096460480641, + "grad_norm": 1.5104916428754223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411440 + }, + { + "epoch": 1.9954581442409003, + "grad_norm": 1.2534028925870189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411450 + }, + { + "epoch": 1.9955066424337362, + "grad_norm": 1.5263076136307063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411460 + }, + { + "epoch": 1.9955551406265724, + "grad_norm": 1.2611306665633037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411470 + }, + { + "epoch": 1.9956036388194085, + "grad_norm": 1.1677288469513769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411480 + }, + { + "epoch": 1.9956521370122444, + "grad_norm": 1.5504021178003313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411490 + }, + { + "epoch": 1.9957006352050808, + "grad_norm": 1.0196138156004508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411500 + }, + { + "epoch": 1.9957491333979167, + "grad_norm": 1.096540724887518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411510 + }, + { + "epoch": 1.9957976315907529, + "grad_norm": 8.17465473090806e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411520 + }, + { + "epoch": 1.995846129783589, + "grad_norm": 1.4010661963936855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411530 + }, + { + "epoch": 1.995894627976425, + "grad_norm": 1.2150803030408497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411540 + }, + { + "epoch": 1.9959431261692613, + "grad_norm": 9.03607411117946e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411550 + }, + { + "epoch": 1.9959916243620972, + "grad_norm": 7.899875420491753e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411560 + }, + { + "epoch": 1.9960401225549333, + "grad_norm": 8.166415987886921e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411570 + }, + { + "epoch": 1.9960886207477695, + "grad_norm": 1.2460775522527001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411580 + }, + { + "epoch": 1.9961371189406054, + "grad_norm": 1.3239557894451082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411590 + }, + { + "epoch": 1.9961856171334416, + "grad_norm": 1.4335703291123991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411600 + }, + { + "epoch": 1.9962341153262777, + "grad_norm": 1.1500724816926322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411610 + }, + { + "epoch": 1.9962826135191136, + "grad_norm": 1.0678164130695222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411620 + }, + { + "epoch": 1.99633111171195, + "grad_norm": 1.3303126600305859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411630 + }, + { + "epoch": 1.996379609904786, + "grad_norm": 1.0648957271541803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411640 + }, + { + "epoch": 1.996428108097622, + "grad_norm": 1.1807560262866446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411650 + }, + { + "epoch": 1.9964766062904582, + "grad_norm": 1.2372437296903627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411660 + }, + { + "epoch": 1.9965251044832941, + "grad_norm": 1.3222576811244835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411670 + }, + { + "epoch": 1.9965736026761303, + "grad_norm": 8.349952729247434e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411680 + }, + { + "epoch": 1.9966221008689664, + "grad_norm": 1.1470570271399083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411690 + }, + { + "epoch": 1.9966705990618023, + "grad_norm": 2.0828231583891466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411700 + }, + { + "epoch": 1.9967190972546387, + "grad_norm": 1.5211208292953415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411710 + }, + { + "epoch": 1.9967675954474746, + "grad_norm": 1.5023788435541974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411720 + }, + { + "epoch": 1.9968160936403108, + "grad_norm": 9.662606714755384e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411730 + }, + { + "epoch": 1.996864591833147, + "grad_norm": 1.188037757060556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411740 + }, + { + "epoch": 1.9969130900259828, + "grad_norm": 1.09931939107355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411750 + }, + { + "epoch": 1.996961588218819, + "grad_norm": 2.4646263696581627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411760 + }, + { + "epoch": 1.9970100864116551, + "grad_norm": 1.1311514391820765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411770 + }, + { + "epoch": 1.997058584604491, + "grad_norm": 1.0108013093201862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411780 + }, + { + "epoch": 1.9971070827973274, + "grad_norm": 1.0261452132453996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411790 + }, + { + "epoch": 1.9971555809901633, + "grad_norm": 1.1469086125259764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411800 + }, + { + "epoch": 1.9972040791829995, + "grad_norm": 1.1837991031882211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411810 + }, + { + "epoch": 1.9972525773758356, + "grad_norm": 1.1528991095133279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411820 + }, + { + "epoch": 1.9973010755686715, + "grad_norm": 1.1215641748663074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411830 + }, + { + "epoch": 1.9973495737615077, + "grad_norm": 1.4896075484216453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411840 + }, + { + "epoch": 1.9973980719543438, + "grad_norm": 1.439621932775026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411850 + }, + { + "epoch": 1.9974465701471797, + "grad_norm": 1.1474537764399884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411860 + }, + { + "epoch": 1.9974950683400161, + "grad_norm": 9.578910109553362e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411870 + }, + { + "epoch": 1.997543566532852, + "grad_norm": 1.0739298339501602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411880 + }, + { + "epoch": 1.9975920647256882, + "grad_norm": 6.409266450901896e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411890 + }, + { + "epoch": 1.9976405629185243, + "grad_norm": 1.1939227384516471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411900 + }, + { + "epoch": 1.9976890611113602, + "grad_norm": 1.4219786237390508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411910 + }, + { + "epoch": 1.9977375593041964, + "grad_norm": 1.3765453665826044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411920 + }, + { + "epoch": 1.9977860574970325, + "grad_norm": 1.0258709437493962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411930 + }, + { + "epoch": 1.9978345556898685, + "grad_norm": 2.0289780522375622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411940 + }, + { + "epoch": 1.9978830538827048, + "grad_norm": 6.993729595450304e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411950 + }, + { + "epoch": 1.9979315520755407, + "grad_norm": 1.5398391894905217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411960 + }, + { + "epoch": 1.9979800502683769, + "grad_norm": 9.293234626284175e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411970 + }, + { + "epoch": 1.998028548461213, + "grad_norm": 9.568037029339393e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411980 + }, + { + "epoch": 1.998077046654049, + "grad_norm": 9.494172559243452e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 411990 + }, + { + "epoch": 1.998125544846885, + "grad_norm": 1.7440203947671762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412000 + }, + { + "epoch": 1.9981740430397212, + "grad_norm": 1.1834358382145638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412010 + }, + { + "epoch": 1.9982225412325572, + "grad_norm": 2.0851047111136722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412020 + }, + { + "epoch": 1.9982710394253935, + "grad_norm": 1.3745015792210324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412030 + }, + { + "epoch": 1.9983195376182294, + "grad_norm": 1.026182605556869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412040 + }, + { + "epoch": 1.9983680358110656, + "grad_norm": 1.0866095578876411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412050 + }, + { + "epoch": 1.9984165340039017, + "grad_norm": 1.5225298355403538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412060 + }, + { + "epoch": 1.9984650321967377, + "grad_norm": 1.0681387330180314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412070 + }, + { + "epoch": 1.998513530389574, + "grad_norm": 1.6689078563558724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412080 + }, + { + "epoch": 1.99856202858241, + "grad_norm": 8.95515661625268e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412090 + }, + { + "epoch": 1.998610526775246, + "grad_norm": 9.092559594137128e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412100 + }, + { + "epoch": 1.9986590249680822, + "grad_norm": 1.036910557417059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412110 + }, + { + "epoch": 1.9987075231609182, + "grad_norm": 1.0216814949615127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412120 + }, + { + "epoch": 1.9987560213537543, + "grad_norm": 1.1530192800535133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412130 + }, + { + "epoch": 1.9988045195465904, + "grad_norm": 9.371963649584814e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412140 + }, + { + "epoch": 1.9988530177394264, + "grad_norm": 1.3584026348212319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412150 + }, + { + "epoch": 1.9989015159322627, + "grad_norm": 9.593318139877738e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412160 + }, + { + "epoch": 1.9989500141250987, + "grad_norm": 1.1460742577185101e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412170 + }, + { + "epoch": 1.9989985123179348, + "grad_norm": 8.247122096349813e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412180 + }, + { + "epoch": 1.999047010510771, + "grad_norm": 1.5546140375022333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412190 + }, + { + "epoch": 1.9990955087036069, + "grad_norm": 1.0562848373751876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412200 + }, + { + "epoch": 1.999144006896443, + "grad_norm": 1.4462457897934655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412210 + }, + { + "epoch": 1.9991925050892791, + "grad_norm": 1.8576791660507297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412220 + }, + { + "epoch": 1.999241003282115, + "grad_norm": 1.300463114972672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412230 + }, + { + "epoch": 1.9992895014749514, + "grad_norm": 1.501106083878767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412240 + }, + { + "epoch": 1.9993379996677874, + "grad_norm": 9.361857955525466e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412250 + }, + { + "epoch": 1.9993864978606235, + "grad_norm": 1.165181462425835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412260 + }, + { + "epoch": 1.9994349960534596, + "grad_norm": 9.984532312046213e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412270 + }, + { + "epoch": 1.9994834942462956, + "grad_norm": 1.1039547054281229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412280 + }, + { + "epoch": 1.9995319924391317, + "grad_norm": 9.287252744627494e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412290 + }, + { + "epoch": 1.9995804906319679, + "grad_norm": 1.227338408682499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412300 + }, + { + "epoch": 1.9996289888248038, + "grad_norm": 1.1905984642623935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412310 + }, + { + "epoch": 1.9996774870176401, + "grad_norm": 1.0095334346260643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412320 + }, + { + "epoch": 1.999725985210476, + "grad_norm": 1.232376511950406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412330 + }, + { + "epoch": 1.9997744834033122, + "grad_norm": 9.440937809301886e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412340 + }, + { + "epoch": 1.9998229815961484, + "grad_norm": 1.2829937112712742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412350 + }, + { + "epoch": 1.9998714797889843, + "grad_norm": 1.2338150057189523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412360 + }, + { + "epoch": 1.9999199779818204, + "grad_norm": 1.0564238372978707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412370 + }, + { + "epoch": 1.9999684761746566, + "grad_norm": 1.5015782395266797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412380 + }, + { + "epoch": 2.0000169743674925, + "grad_norm": 1.4372349532720818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412390 + }, + { + "epoch": 2.000065472560329, + "grad_norm": 1.422839801534792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412400 + }, + { + "epoch": 2.0001139707531648, + "grad_norm": 1.4045629548320449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412410 + }, + { + "epoch": 2.0001624689460007, + "grad_norm": 1.5901200356438494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412420 + }, + { + "epoch": 2.000210967138837, + "grad_norm": 1.0135428496482746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412430 + }, + { + "epoch": 2.000259465331673, + "grad_norm": 8.189269706804225e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412440 + }, + { + "epoch": 2.0003079635245093, + "grad_norm": 1.3109634267038928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412450 + }, + { + "epoch": 2.0003564617173453, + "grad_norm": 9.528315914053564e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412460 + }, + { + "epoch": 2.000404959910181, + "grad_norm": 1.0459443089416709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412470 + }, + { + "epoch": 2.0004534581030176, + "grad_norm": 9.725434679808131e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412480 + }, + { + "epoch": 2.0005019562958535, + "grad_norm": 1.064979837650526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412490 + }, + { + "epoch": 2.0005504544886894, + "grad_norm": 1.0561240770812219e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412500 + }, + { + "epoch": 2.0005989526815258, + "grad_norm": 1.5199564273871147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412510 + }, + { + "epoch": 2.0006474508743617, + "grad_norm": 1.1309117198265994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412520 + }, + { + "epoch": 2.000695949067198, + "grad_norm": 1.8123166967143334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412530 + }, + { + "epoch": 2.000744447260034, + "grad_norm": 1.0900946811887025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412540 + }, + { + "epoch": 2.00079294545287, + "grad_norm": 9.165717074210988e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412550 + }, + { + "epoch": 2.0008414436457063, + "grad_norm": 1.5978658396420542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412560 + }, + { + "epoch": 2.000889941838542, + "grad_norm": 1.157821394315306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412570 + }, + { + "epoch": 2.0009384400313786, + "grad_norm": 1.3066974169362311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412580 + }, + { + "epoch": 2.0009869382242145, + "grad_norm": 1.2270737315134284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412590 + }, + { + "epoch": 2.0010354364170504, + "grad_norm": 1.0258172089550044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412600 + }, + { + "epoch": 2.0010839346098868, + "grad_norm": 6.804702579188415e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412610 + }, + { + "epoch": 2.0011324328027227, + "grad_norm": 7.230593723761558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412620 + }, + { + "epoch": 2.0011809309955586, + "grad_norm": 1.0615799794777558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412630 + }, + { + "epoch": 2.001229429188395, + "grad_norm": 1.7471972313387596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412640 + }, + { + "epoch": 2.001277927381231, + "grad_norm": 8.48958503496533e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412650 + }, + { + "epoch": 2.0013264255740673, + "grad_norm": 9.935141598305108e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412660 + }, + { + "epoch": 2.001374923766903, + "grad_norm": 1.7370211935485713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412670 + }, + { + "epoch": 2.001423421959739, + "grad_norm": 7.601438589688314e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412680 + }, + { + "epoch": 2.0014719201525755, + "grad_norm": 1.3751419558616362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412690 + }, + { + "epoch": 2.0015204183454114, + "grad_norm": 9.08761066398256e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412700 + }, + { + "epoch": 2.0015689165382473, + "grad_norm": 1.6963793925128812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412710 + }, + { + "epoch": 2.0016174147310837, + "grad_norm": 1.4699963024611407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412720 + }, + { + "epoch": 2.0016659129239196, + "grad_norm": 1.1245814945937127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412730 + }, + { + "epoch": 2.001714411116756, + "grad_norm": 1.0753776535921133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412740 + }, + { + "epoch": 2.001762909309592, + "grad_norm": 1.6610043118703288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412750 + }, + { + "epoch": 2.001811407502428, + "grad_norm": 1.803426208368819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412760 + }, + { + "epoch": 2.001859905695264, + "grad_norm": 1.3217635874696043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412770 + }, + { + "epoch": 2.0019084038881, + "grad_norm": 1.8557775760541517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412780 + }, + { + "epoch": 2.001956902080936, + "grad_norm": 1.0634582991997377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412790 + }, + { + "epoch": 2.0020054002737724, + "grad_norm": 1.4072972120970917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412800 + }, + { + "epoch": 2.0020538984666083, + "grad_norm": 1.3262568820948673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412810 + }, + { + "epoch": 2.0021023966594447, + "grad_norm": 1.005769778572585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412820 + }, + { + "epoch": 2.0021508948522806, + "grad_norm": 1.5582944712377866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412830 + }, + { + "epoch": 2.0021993930451165, + "grad_norm": 1.4268080050783283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412840 + }, + { + "epoch": 2.002247891237953, + "grad_norm": 1.2310676034132939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412850 + }, + { + "epoch": 2.002296389430789, + "grad_norm": 1.4252365510003528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412860 + }, + { + "epoch": 2.0023448876236247, + "grad_norm": 9.827396674211286e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412870 + }, + { + "epoch": 2.002393385816461, + "grad_norm": 1.0977888820207227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412880 + }, + { + "epoch": 2.002441884009297, + "grad_norm": 1.068350030664078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412890 + }, + { + "epoch": 2.0024903822021334, + "grad_norm": 1.6789954315754585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412900 + }, + { + "epoch": 2.0025388803949693, + "grad_norm": 1.2836359530865593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412910 + }, + { + "epoch": 2.0025873785878052, + "grad_norm": 1.8691837411211054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412920 + }, + { + "epoch": 2.0026358767806416, + "grad_norm": 1.2451696562720826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412930 + }, + { + "epoch": 2.0026843749734775, + "grad_norm": 1.4331811293288865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412940 + }, + { + "epoch": 2.0027328731663134, + "grad_norm": 9.386496913066367e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412950 + }, + { + "epoch": 2.00278137135915, + "grad_norm": 1.1128227228596188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412960 + }, + { + "epoch": 2.0028298695519857, + "grad_norm": 1.382979419872754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412970 + }, + { + "epoch": 2.002878367744822, + "grad_norm": 1.889716472192049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412980 + }, + { + "epoch": 2.002926865937658, + "grad_norm": 1.8210156937925603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 412990 + }, + { + "epoch": 2.002975364130494, + "grad_norm": 9.832216818494999e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413000 + }, + { + "epoch": 2.0030238623233303, + "grad_norm": 8.248751903749962e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413010 + }, + { + "epoch": 2.003072360516166, + "grad_norm": 7.96107535450119e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413020 + }, + { + "epoch": 2.003120858709002, + "grad_norm": 1.2095262569289389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413030 + }, + { + "epoch": 2.0031693569018385, + "grad_norm": 9.788718280390185e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413040 + }, + { + "epoch": 2.0032178550946744, + "grad_norm": 1.790236936471956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413050 + }, + { + "epoch": 2.003266353287511, + "grad_norm": 1.685166850506903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413060 + }, + { + "epoch": 2.0033148514803467, + "grad_norm": 1.381368619490786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413070 + }, + { + "epoch": 2.0033633496731826, + "grad_norm": 8.980264531999183e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413080 + }, + { + "epoch": 2.003411847866019, + "grad_norm": 9.92169102431717e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413090 + }, + { + "epoch": 2.003460346058855, + "grad_norm": 1.0827879926011974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413100 + }, + { + "epoch": 2.0035088442516913, + "grad_norm": 8.592935252238476e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413110 + }, + { + "epoch": 2.003557342444527, + "grad_norm": 1.5471000480715702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413120 + }, + { + "epoch": 2.003605840637363, + "grad_norm": 1.062385290850898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413130 + }, + { + "epoch": 2.0036543388301995, + "grad_norm": 1.1089109186457335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413140 + }, + { + "epoch": 2.0037028370230354, + "grad_norm": 1.0113975434933309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413150 + }, + { + "epoch": 2.0037513352158713, + "grad_norm": 1.1474272199052393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413160 + }, + { + "epoch": 2.0037998334087077, + "grad_norm": 9.811704337892024e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413170 + }, + { + "epoch": 2.0038483316015436, + "grad_norm": 1.0837614361491887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413180 + }, + { + "epoch": 2.00389682979438, + "grad_norm": 1.56195731904063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413190 + }, + { + "epoch": 2.003945327987216, + "grad_norm": 1.621084599889855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413200 + }, + { + "epoch": 2.003993826180052, + "grad_norm": 1.0321493881804145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413210 + }, + { + "epoch": 2.004042324372888, + "grad_norm": 1.6080841547250202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413220 + }, + { + "epoch": 2.004090822565724, + "grad_norm": 1.371449354081733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413230 + }, + { + "epoch": 2.00413932075856, + "grad_norm": 1.8493249598350303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413240 + }, + { + "epoch": 2.0041878189513964, + "grad_norm": 1.4726702524114899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413250 + }, + { + "epoch": 2.0042363171442323, + "grad_norm": 1.0809332984251796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413260 + }, + { + "epoch": 2.0042848153370687, + "grad_norm": 1.0643708137081376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413270 + }, + { + "epoch": 2.0043333135299046, + "grad_norm": 1.177727604328993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413280 + }, + { + "epoch": 2.0043818117227405, + "grad_norm": 1.842306396326876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413290 + }, + { + "epoch": 2.004430309915577, + "grad_norm": 1.529581439285721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413300 + }, + { + "epoch": 2.004478808108413, + "grad_norm": 7.261307555239682e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413310 + }, + { + "epoch": 2.0045273063012488, + "grad_norm": 1.1362628171696088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413320 + }, + { + "epoch": 2.004575804494085, + "grad_norm": 1.0597678290480417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413330 + }, + { + "epoch": 2.004624302686921, + "grad_norm": 1.2356725420659131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413340 + }, + { + "epoch": 2.0046728008797574, + "grad_norm": 1.1472885752539241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413350 + }, + { + "epoch": 2.0047212990725933, + "grad_norm": 1.6421328297155924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413360 + }, + { + "epoch": 2.0047697972654293, + "grad_norm": 1.875772071002757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413370 + }, + { + "epoch": 2.0048182954582656, + "grad_norm": 1.591652321053516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413380 + }, + { + "epoch": 2.0048667936511015, + "grad_norm": 1.094728308004278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413390 + }, + { + "epoch": 2.0049152918439375, + "grad_norm": 1.633366686348836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413400 + }, + { + "epoch": 2.004963790036774, + "grad_norm": 2.0983547344144426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413410 + }, + { + "epoch": 2.0050122882296098, + "grad_norm": 1.518836079128505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413420 + }, + { + "epoch": 2.005060786422446, + "grad_norm": 1.39063871529288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413430 + }, + { + "epoch": 2.005109284615282, + "grad_norm": 1.2915624125753311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413440 + }, + { + "epoch": 2.005157782808118, + "grad_norm": 1.7704056887168917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413450 + }, + { + "epoch": 2.0052062810009543, + "grad_norm": 1.169855234905981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413460 + }, + { + "epoch": 2.0052547791937902, + "grad_norm": 1.021206852414025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413470 + }, + { + "epoch": 2.005303277386626, + "grad_norm": 1.4665675784897303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413480 + }, + { + "epoch": 2.0053517755794625, + "grad_norm": 1.1025272250719809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413490 + }, + { + "epoch": 2.0054002737722985, + "grad_norm": 1.0545276651896529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413500 + }, + { + "epoch": 2.005448771965135, + "grad_norm": 6.53051213106437e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413510 + }, + { + "epoch": 2.0054972701579707, + "grad_norm": 8.672287776789744e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413520 + }, + { + "epoch": 2.0055457683508067, + "grad_norm": 2.0073001039122573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413530 + }, + { + "epoch": 2.005594266543643, + "grad_norm": 1.2159691920032856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413540 + }, + { + "epoch": 2.005642764736479, + "grad_norm": 3.022228511895264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413550 + }, + { + "epoch": 2.005691262929315, + "grad_norm": 2.5553033466962916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413560 + }, + { + "epoch": 2.0057397611221512, + "grad_norm": 1.1304634561781768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413570 + }, + { + "epoch": 2.005788259314987, + "grad_norm": 9.394522493266777e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413580 + }, + { + "epoch": 2.0058367575078235, + "grad_norm": 1.0809775297104807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413590 + }, + { + "epoch": 2.0058852557006595, + "grad_norm": 1.051861531209397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413600 + }, + { + "epoch": 2.0059337538934954, + "grad_norm": 1.3253374397947937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413610 + }, + { + "epoch": 2.0059822520863317, + "grad_norm": 7.962652759374578e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413620 + }, + { + "epoch": 2.0060307502791677, + "grad_norm": 1.1526433141284542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413630 + }, + { + "epoch": 2.006079248472004, + "grad_norm": 1.4365880929290142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413640 + }, + { + "epoch": 2.00612774666484, + "grad_norm": 1.2205164878764663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413650 + }, + { + "epoch": 2.006176244857676, + "grad_norm": 1.0024038488154474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413660 + }, + { + "epoch": 2.0062247430505122, + "grad_norm": 1.0192215071924693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413670 + }, + { + "epoch": 2.006273241243348, + "grad_norm": 8.781477767172419e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413680 + }, + { + "epoch": 2.006321739436184, + "grad_norm": 1.181305808728439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413690 + }, + { + "epoch": 2.0063702376290204, + "grad_norm": 9.96626603466666e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413700 + }, + { + "epoch": 2.0064187358218564, + "grad_norm": 2.1763964852539175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413710 + }, + { + "epoch": 2.0064672340146927, + "grad_norm": 1.4144833748730434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413720 + }, + { + "epoch": 2.0065157322075287, + "grad_norm": 1.4682974835977802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413730 + }, + { + "epoch": 2.0065642304003646, + "grad_norm": 9.45653244599498e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413740 + }, + { + "epoch": 2.006612728593201, + "grad_norm": 1.1639126107354514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413750 + }, + { + "epoch": 2.006661226786037, + "grad_norm": 1.760192880340128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413760 + }, + { + "epoch": 2.006709724978873, + "grad_norm": 1.5827085420028197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413770 + }, + { + "epoch": 2.006758223171709, + "grad_norm": 1.4303814133143078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413780 + }, + { + "epoch": 2.006806721364545, + "grad_norm": 1.619565281885116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413790 + }, + { + "epoch": 2.0068552195573814, + "grad_norm": 8.677242036014832e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413800 + }, + { + "epoch": 2.0069037177502174, + "grad_norm": 9.490291219549363e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413810 + }, + { + "epoch": 2.0069522159430533, + "grad_norm": 1.043064923322845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413820 + }, + { + "epoch": 2.0070007141358897, + "grad_norm": 1.4083620492044702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413830 + }, + { + "epoch": 2.0070492123287256, + "grad_norm": 5.568399519262357e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413840 + }, + { + "epoch": 2.0070977105215615, + "grad_norm": 1.3591717973326922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413850 + }, + { + "epoch": 2.007146208714398, + "grad_norm": 1.195110677087996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413860 + }, + { + "epoch": 2.007194706907234, + "grad_norm": 1.3244367380593758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413870 + }, + { + "epoch": 2.00724320510007, + "grad_norm": 1.6101562749781806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413880 + }, + { + "epoch": 2.007291703292906, + "grad_norm": 8.929855965789102e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413890 + }, + { + "epoch": 2.007340201485742, + "grad_norm": 9.957886071276789e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413900 + }, + { + "epoch": 2.0073886996785784, + "grad_norm": 1.0008752049373015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413910 + }, + { + "epoch": 2.0074371978714143, + "grad_norm": 1.3760481643032563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413920 + }, + { + "epoch": 2.00748569606425, + "grad_norm": 9.023527702822776e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413930 + }, + { + "epoch": 2.0075341942570866, + "grad_norm": 1.508692015761426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413940 + }, + { + "epoch": 2.0075826924499225, + "grad_norm": 6.900120919084429e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413950 + }, + { + "epoch": 2.007631190642759, + "grad_norm": 7.579100014254436e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413960 + }, + { + "epoch": 2.0076796888355948, + "grad_norm": 1.0691547203123264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413970 + }, + { + "epoch": 2.0077281870284307, + "grad_norm": 8.632569326039174e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413980 + }, + { + "epoch": 2.007776685221267, + "grad_norm": 9.737483708249783e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 413990 + }, + { + "epoch": 2.007825183414103, + "grad_norm": 8.896476444419932e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414000 + }, + { + "epoch": 2.007873681606939, + "grad_norm": 9.98329507950757e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414010 + }, + { + "epoch": 2.0079221797997753, + "grad_norm": 1.2010273664486704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414020 + }, + { + "epoch": 2.007970677992611, + "grad_norm": 1.494057322304343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414030 + }, + { + "epoch": 2.0080191761854476, + "grad_norm": 1.2406321303615186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414040 + }, + { + "epoch": 2.0080676743782835, + "grad_norm": 1.451697162480059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414050 + }, + { + "epoch": 2.0081161725711194, + "grad_norm": 8.881271718053085e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414060 + }, + { + "epoch": 2.0081646707639558, + "grad_norm": 1.3199692006082842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414070 + }, + { + "epoch": 2.0082131689567917, + "grad_norm": 1.15136451483977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414080 + }, + { + "epoch": 2.0082616671496276, + "grad_norm": 6.027477184034069e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414090 + }, + { + "epoch": 2.008310165342464, + "grad_norm": 1.1225971263684187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414100 + }, + { + "epoch": 2.0083586635353, + "grad_norm": 1.6547447856396502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414110 + }, + { + "epoch": 2.0084071617281363, + "grad_norm": 1.5791785656915636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414120 + }, + { + "epoch": 2.008455659920972, + "grad_norm": 1.240317182293893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414130 + }, + { + "epoch": 2.008504158113808, + "grad_norm": 1.0075342338211613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414140 + }, + { + "epoch": 2.0085526563066445, + "grad_norm": 1.0664418681471943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414150 + }, + { + "epoch": 2.0086011544994804, + "grad_norm": 1.0399298311369876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414160 + }, + { + "epoch": 2.0086496526923168, + "grad_norm": 1.0551793216961869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414170 + }, + { + "epoch": 2.0086981508851527, + "grad_norm": 1.3288763867080888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414180 + }, + { + "epoch": 2.0087466490779886, + "grad_norm": 1.4926841984674866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414190 + }, + { + "epoch": 2.008795147270825, + "grad_norm": 1.1333928462420317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414200 + }, + { + "epoch": 2.008843645463661, + "grad_norm": 1.2628889933807841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414210 + }, + { + "epoch": 2.008892143656497, + "grad_norm": 1.1910932684600084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414220 + }, + { + "epoch": 2.008940641849333, + "grad_norm": 1.646162495205772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414230 + }, + { + "epoch": 2.008989140042169, + "grad_norm": 1.2753106126695002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414240 + }, + { + "epoch": 2.0090376382350055, + "grad_norm": 8.617461411120075e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414250 + }, + { + "epoch": 2.0090861364278414, + "grad_norm": 1.109033931356862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414260 + }, + { + "epoch": 2.0091346346206773, + "grad_norm": 1.0025317465078842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414270 + }, + { + "epoch": 2.0091831328135137, + "grad_norm": 9.120533661644004e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414280 + }, + { + "epoch": 2.0092316310063496, + "grad_norm": 6.705322075362119e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414290 + }, + { + "epoch": 2.0092801291991855, + "grad_norm": 1.111892000693615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414300 + }, + { + "epoch": 2.009328627392022, + "grad_norm": 1.2073225974518209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414310 + }, + { + "epoch": 2.009377125584858, + "grad_norm": 1.0057274124619653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414320 + }, + { + "epoch": 2.009425623777694, + "grad_norm": 7.66444685496026e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414330 + }, + { + "epoch": 2.00947412197053, + "grad_norm": 9.001377421213874e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414340 + }, + { + "epoch": 2.009522620163366, + "grad_norm": 1.2002194793581111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414350 + }, + { + "epoch": 2.0095711183562024, + "grad_norm": 1.446987774045283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414360 + }, + { + "epoch": 2.0096196165490383, + "grad_norm": 9.57577572791024e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414370 + }, + { + "epoch": 2.0096681147418742, + "grad_norm": 1.2153556383509567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414380 + }, + { + "epoch": 2.0097166129347106, + "grad_norm": 1.1071874084223055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414390 + }, + { + "epoch": 2.0097651111275465, + "grad_norm": 1.2364345103321739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414400 + }, + { + "epoch": 2.009813609320383, + "grad_norm": 7.3346266837859275e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414410 + }, + { + "epoch": 2.009862107513219, + "grad_norm": 1.3913892260575267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414420 + }, + { + "epoch": 2.0099106057060547, + "grad_norm": 1.568985652511401e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414430 + }, + { + "epoch": 2.009959103898891, + "grad_norm": 1.0679898743148897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414440 + }, + { + "epoch": 2.010007602091727, + "grad_norm": 1.3315764491039772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414450 + }, + { + "epoch": 2.010056100284563, + "grad_norm": 1.0088934132568284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414460 + }, + { + "epoch": 2.0101045984773993, + "grad_norm": 1.697149976109813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414470 + }, + { + "epoch": 2.0101530966702352, + "grad_norm": 1.24938317469514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414480 + }, + { + "epoch": 2.0102015948630716, + "grad_norm": 1.684681016911327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414490 + }, + { + "epoch": 2.0102500930559075, + "grad_norm": 1.0827751140141118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414500 + }, + { + "epoch": 2.0102985912487434, + "grad_norm": 2.0756862895154882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414510 + }, + { + "epoch": 2.01034708944158, + "grad_norm": 1.0123800464612032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414520 + }, + { + "epoch": 2.0103955876344157, + "grad_norm": 8.126074924064142e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414530 + }, + { + "epoch": 2.0104440858272516, + "grad_norm": 9.91603510414052e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414540 + }, + { + "epoch": 2.010492584020088, + "grad_norm": 1.2165718210610521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414550 + }, + { + "epoch": 2.010541082212924, + "grad_norm": 9.575966686270476e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414560 + }, + { + "epoch": 2.0105895804057603, + "grad_norm": 1.2612162869629628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414570 + }, + { + "epoch": 2.0106380785985962, + "grad_norm": 1.1287223600220386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414580 + }, + { + "epoch": 2.010686576791432, + "grad_norm": 1.943603678000727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414590 + }, + { + "epoch": 2.0107350749842685, + "grad_norm": 9.634522513124466e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414600 + }, + { + "epoch": 2.0107835731771044, + "grad_norm": 1.0298864872027025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414610 + }, + { + "epoch": 2.010832071369941, + "grad_norm": 1.7067712576590566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414620 + }, + { + "epoch": 2.0108805695627767, + "grad_norm": 1.4509682344510111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414630 + }, + { + "epoch": 2.0109290677556126, + "grad_norm": 8.719664990053388e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414640 + }, + { + "epoch": 2.010977565948449, + "grad_norm": 1.3053230496495871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414650 + }, + { + "epoch": 2.011026064141285, + "grad_norm": 9.5927799037554e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414660 + }, + { + "epoch": 2.011074562334121, + "grad_norm": 1.518649561660368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414670 + }, + { + "epoch": 2.011123060526957, + "grad_norm": 1.0958610907607635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414680 + }, + { + "epoch": 2.011171558719793, + "grad_norm": 1.3302341450582844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414690 + }, + { + "epoch": 2.0112200569126295, + "grad_norm": 1.3160535772271942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414700 + }, + { + "epoch": 2.0112685551054654, + "grad_norm": 7.622647402172333e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414710 + }, + { + "epoch": 2.0113170532983013, + "grad_norm": 1.2237898694422711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414720 + }, + { + "epoch": 2.0113655514911377, + "grad_norm": 7.454057815436954e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414730 + }, + { + "epoch": 2.0114140496839736, + "grad_norm": 1.1557588663890783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414740 + }, + { + "epoch": 2.0114625478768096, + "grad_norm": 1.2292851181427977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414750 + }, + { + "epoch": 2.011511046069646, + "grad_norm": 1.7560765286361857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414760 + }, + { + "epoch": 2.011559544262482, + "grad_norm": 1.1168466151900702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414770 + }, + { + "epoch": 2.011608042455318, + "grad_norm": 9.690267255280105e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414780 + }, + { + "epoch": 2.011656540648154, + "grad_norm": 9.579800952508322e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414790 + }, + { + "epoch": 2.01170503884099, + "grad_norm": 8.357861069896444e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414800 + }, + { + "epoch": 2.0117535370338264, + "grad_norm": 1.3102647855589566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414810 + }, + { + "epoch": 2.0118020352266623, + "grad_norm": 1.4522466784683274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414820 + }, + { + "epoch": 2.0118505334194983, + "grad_norm": 1.693809359437637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414830 + }, + { + "epoch": 2.0118990316123346, + "grad_norm": 9.263936284753527e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414840 + }, + { + "epoch": 2.0119475298051706, + "grad_norm": 1.4677286941378043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414850 + }, + { + "epoch": 2.011996027998007, + "grad_norm": 1.323541454212318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414860 + }, + { + "epoch": 2.012044526190843, + "grad_norm": 7.930116119325703e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414870 + }, + { + "epoch": 2.0120930243836788, + "grad_norm": 9.538418943577653e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414880 + }, + { + "epoch": 2.012141522576515, + "grad_norm": 1.179670672257771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414890 + }, + { + "epoch": 2.012190020769351, + "grad_norm": 1.073418154362571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414900 + }, + { + "epoch": 2.012238518962187, + "grad_norm": 1.3360033968012885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414910 + }, + { + "epoch": 2.0122870171550233, + "grad_norm": 1.07819531081077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414920 + }, + { + "epoch": 2.0123355153478593, + "grad_norm": 1.1803497734774737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414930 + }, + { + "epoch": 2.0123840135406956, + "grad_norm": 1.1532307553352439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414940 + }, + { + "epoch": 2.0124325117335315, + "grad_norm": 1.4654081503806538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414950 + }, + { + "epoch": 2.0124810099263675, + "grad_norm": 1.6828170856797442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414960 + }, + { + "epoch": 2.012529508119204, + "grad_norm": 9.289736091488976e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414970 + }, + { + "epoch": 2.0125780063120398, + "grad_norm": 8.905784554258389e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414980 + }, + { + "epoch": 2.0126265045048757, + "grad_norm": 1.2241413216429464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 414990 + }, + { + "epoch": 2.012675002697712, + "grad_norm": 8.581779731287043e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415000 + }, + { + "epoch": 2.012723500890548, + "grad_norm": 1.0246131942892589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415010 + }, + { + "epoch": 2.0127719990833843, + "grad_norm": 1.1244822850642322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415020 + }, + { + "epoch": 2.0128204972762203, + "grad_norm": 1.100844659873701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415030 + }, + { + "epoch": 2.012868995469056, + "grad_norm": 9.825852131939428e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415040 + }, + { + "epoch": 2.0129174936618925, + "grad_norm": 7.977622118460204e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415050 + }, + { + "epoch": 2.0129659918547285, + "grad_norm": 1.262281834613077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415060 + }, + { + "epoch": 2.0130144900475644, + "grad_norm": 5.7719526935784415e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415070 + }, + { + "epoch": 2.0130629882404008, + "grad_norm": 1.296873541889454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415080 + }, + { + "epoch": 2.0131114864332367, + "grad_norm": 1.4134014847400067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415090 + }, + { + "epoch": 2.013159984626073, + "grad_norm": 1.5267596964463337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415100 + }, + { + "epoch": 2.013208482818909, + "grad_norm": 9.652237231705385e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415110 + }, + { + "epoch": 2.013256981011745, + "grad_norm": 1.6455453888397642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415120 + }, + { + "epoch": 2.0133054792045812, + "grad_norm": 8.528678208108431e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415130 + }, + { + "epoch": 2.013353977397417, + "grad_norm": 1.316607978196771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415140 + }, + { + "epoch": 2.0134024755902535, + "grad_norm": 1.3996921843784094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415150 + }, + { + "epoch": 2.0134509737830895, + "grad_norm": 1.105796609834897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415160 + }, + { + "epoch": 2.0134994719759254, + "grad_norm": 1.216590117536498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415170 + }, + { + "epoch": 2.0135479701687617, + "grad_norm": 7.169455251698764e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415180 + }, + { + "epoch": 2.0135964683615977, + "grad_norm": 1.2548739825035682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415190 + }, + { + "epoch": 2.0136449665544336, + "grad_norm": 8.562791364852274e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415200 + }, + { + "epoch": 2.01369346474727, + "grad_norm": 1.9107293525166824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415210 + }, + { + "epoch": 2.013741962940106, + "grad_norm": 1.5579956880173995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415220 + }, + { + "epoch": 2.0137904611329422, + "grad_norm": 1.1789472509349253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415230 + }, + { + "epoch": 2.013838959325778, + "grad_norm": 9.691655478150096e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415240 + }, + { + "epoch": 2.013887457518614, + "grad_norm": 1.2429755891218974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415250 + }, + { + "epoch": 2.0139359557114505, + "grad_norm": 1.8533059531478102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415260 + }, + { + "epoch": 2.0139844539042864, + "grad_norm": 9.505987996760723e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415270 + }, + { + "epoch": 2.0140329520971223, + "grad_norm": 1.0478019341064737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415280 + }, + { + "epoch": 2.0140814502899587, + "grad_norm": 6.400864283051533e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415290 + }, + { + "epoch": 2.0141299484827946, + "grad_norm": 2.1163447883054687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415300 + }, + { + "epoch": 2.014178446675631, + "grad_norm": 7.61253016179353e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415310 + }, + { + "epoch": 2.014226944868467, + "grad_norm": 1.1533423993626002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415320 + }, + { + "epoch": 2.014275443061303, + "grad_norm": 9.943764034403557e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415330 + }, + { + "epoch": 2.014323941254139, + "grad_norm": 9.7177688118677e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415340 + }, + { + "epoch": 2.014372439446975, + "grad_norm": 1.0399086924905987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415350 + }, + { + "epoch": 2.014420937639811, + "grad_norm": 1.0537466010873686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415360 + }, + { + "epoch": 2.0144694358326474, + "grad_norm": 7.16440373693672e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415370 + }, + { + "epoch": 2.0145179340254833, + "grad_norm": 1.8608316665336133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415380 + }, + { + "epoch": 2.0145664322183197, + "grad_norm": 9.761941477393066e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415390 + }, + { + "epoch": 2.0146149304111556, + "grad_norm": 1.1226105378625562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415400 + }, + { + "epoch": 2.0146634286039915, + "grad_norm": 1.0071543599110555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415410 + }, + { + "epoch": 2.014711926796828, + "grad_norm": 8.004277241013824e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415420 + }, + { + "epoch": 2.014760424989664, + "grad_norm": 7.803156343300088e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415430 + }, + { + "epoch": 2.0148089231824997, + "grad_norm": 1.0756493473706996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415440 + }, + { + "epoch": 2.014857421375336, + "grad_norm": 1.1816419842602954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415450 + }, + { + "epoch": 2.014905919568172, + "grad_norm": 7.354424624850253e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415460 + }, + { + "epoch": 2.0149544177610084, + "grad_norm": 1.1502276464625538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415470 + }, + { + "epoch": 2.0150029159538443, + "grad_norm": 1.1407843558686181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415480 + }, + { + "epoch": 2.01505141414668, + "grad_norm": 1.2829627138444266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415490 + }, + { + "epoch": 2.0150999123395166, + "grad_norm": 1.0610223810658681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415500 + }, + { + "epoch": 2.0151484105323525, + "grad_norm": 1.570108310033902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415510 + }, + { + "epoch": 2.0151969087251884, + "grad_norm": 1.1626914542262057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415520 + }, + { + "epoch": 2.015245406918025, + "grad_norm": 1.1549381895292754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415530 + }, + { + "epoch": 2.0152939051108607, + "grad_norm": 2.6433383482071804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415540 + }, + { + "epoch": 2.015342403303697, + "grad_norm": 1.4947664439546315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415550 + }, + { + "epoch": 2.015390901496533, + "grad_norm": 1.342897348877159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415560 + }, + { + "epoch": 2.015439399689369, + "grad_norm": 1.1300326008267803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415570 + }, + { + "epoch": 2.0154878978822053, + "grad_norm": 1.3016778765972958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415580 + }, + { + "epoch": 2.015536396075041, + "grad_norm": 9.547178159152736e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415590 + }, + { + "epoch": 2.015584894267877, + "grad_norm": 1.2230027657267328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415600 + }, + { + "epoch": 2.0156333924607135, + "grad_norm": 1.995474363525318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415610 + }, + { + "epoch": 2.0156818906535494, + "grad_norm": 1.580021269376175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415620 + }, + { + "epoch": 2.015730388846386, + "grad_norm": 1.0037300768317436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415630 + }, + { + "epoch": 2.0157788870392217, + "grad_norm": 1.085152590007965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415640 + }, + { + "epoch": 2.0158273852320576, + "grad_norm": 1.2095090262675967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415650 + }, + { + "epoch": 2.015875883424894, + "grad_norm": 1.2996765441641855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415660 + }, + { + "epoch": 2.01592438161773, + "grad_norm": 1.0954650520034193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415670 + }, + { + "epoch": 2.0159728798105663, + "grad_norm": 1.7968845966720437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415680 + }, + { + "epoch": 2.016021378003402, + "grad_norm": 1.095699708741904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415690 + }, + { + "epoch": 2.016069876196238, + "grad_norm": 1.5528444308188227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415700 + }, + { + "epoch": 2.0161183743890745, + "grad_norm": 1.4661224234657766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415710 + }, + { + "epoch": 2.0161668725819104, + "grad_norm": 1.4742546738943929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415720 + }, + { + "epoch": 2.0162153707747463, + "grad_norm": 1.2020855422179011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415730 + }, + { + "epoch": 2.0162638689675827, + "grad_norm": 1.0408442108200688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415740 + }, + { + "epoch": 2.0163123671604186, + "grad_norm": 1.3885343541630846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415750 + }, + { + "epoch": 2.016360865353255, + "grad_norm": 8.866695822007387e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415760 + }, + { + "epoch": 2.016409363546091, + "grad_norm": 1.3883775906720075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415770 + }, + { + "epoch": 2.016457861738927, + "grad_norm": 1.1842305802645114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415780 + }, + { + "epoch": 2.016506359931763, + "grad_norm": 1.2492427536869855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415790 + }, + { + "epoch": 2.016554858124599, + "grad_norm": 1.4054018393494516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415800 + }, + { + "epoch": 2.016603356317435, + "grad_norm": 1.2000210602991501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415810 + }, + { + "epoch": 2.0166518545102714, + "grad_norm": 1.3160900813602439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415820 + }, + { + "epoch": 2.0167003527031073, + "grad_norm": 8.687148778108167e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415830 + }, + { + "epoch": 2.0167488508959437, + "grad_norm": 1.1765669327701289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415840 + }, + { + "epoch": 2.0167973490887796, + "grad_norm": 1.2730194676180417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415850 + }, + { + "epoch": 2.0168458472816155, + "grad_norm": 9.477092888232619e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415860 + }, + { + "epoch": 2.016894345474452, + "grad_norm": 1.656736259292302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415870 + }, + { + "epoch": 2.016942843667288, + "grad_norm": 1.1620820750124494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415880 + }, + { + "epoch": 2.0169913418601237, + "grad_norm": 1.5648991436023607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415890 + }, + { + "epoch": 2.01703984005296, + "grad_norm": 2.1373690373138743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415900 + }, + { + "epoch": 2.017088338245796, + "grad_norm": 8.062302825351253e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415910 + }, + { + "epoch": 2.0171368364386324, + "grad_norm": 1.3347959182397062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415920 + }, + { + "epoch": 2.0171853346314683, + "grad_norm": 1.03614912205785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415930 + }, + { + "epoch": 2.0172338328243042, + "grad_norm": 2.228787288061085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415940 + }, + { + "epoch": 2.0172823310171406, + "grad_norm": 9.527986399859856e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415950 + }, + { + "epoch": 2.0173308292099765, + "grad_norm": 1.2840386531820513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415960 + }, + { + "epoch": 2.0173793274028125, + "grad_norm": 9.718326587915271e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415970 + }, + { + "epoch": 2.017427825595649, + "grad_norm": 1.488980672093021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415980 + }, + { + "epoch": 2.0174763237884847, + "grad_norm": 1.3740923066052346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 415990 + }, + { + "epoch": 2.017524821981321, + "grad_norm": 1.0051070198358047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416000 + }, + { + "epoch": 2.017573320174157, + "grad_norm": 8.851685606714454e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416010 + }, + { + "epoch": 2.017621818366993, + "grad_norm": 1.3030197365537788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416020 + }, + { + "epoch": 2.0176703165598293, + "grad_norm": 9.09338382371061e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416030 + }, + { + "epoch": 2.0177188147526652, + "grad_norm": 9.642620035776872e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416040 + }, + { + "epoch": 2.017767312945501, + "grad_norm": 1.1582851122682314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416050 + }, + { + "epoch": 2.0178158111383375, + "grad_norm": 1.40957077121584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416060 + }, + { + "epoch": 2.0178643093311734, + "grad_norm": 8.965033160279745e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416070 + }, + { + "epoch": 2.01791280752401, + "grad_norm": 1.3739970938786428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416080 + }, + { + "epoch": 2.0179613057168457, + "grad_norm": 1.171155261658896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416090 + }, + { + "epoch": 2.0180098039096817, + "grad_norm": 1.269790317337538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416100 + }, + { + "epoch": 2.018058302102518, + "grad_norm": 1.2974929575193528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416110 + }, + { + "epoch": 2.018106800295354, + "grad_norm": 7.756662867564046e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416120 + }, + { + "epoch": 2.01815529848819, + "grad_norm": 1.709625330192921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416130 + }, + { + "epoch": 2.0182037966810262, + "grad_norm": 1.4465384445827567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416140 + }, + { + "epoch": 2.018252294873862, + "grad_norm": 1.4951105242744234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416150 + }, + { + "epoch": 2.0183007930666985, + "grad_norm": 1.550539785455385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416160 + }, + { + "epoch": 2.0183492912595344, + "grad_norm": 1.920507841646213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416170 + }, + { + "epoch": 2.0183977894523704, + "grad_norm": 1.3709839485898101e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416180 + }, + { + "epoch": 2.0184462876452067, + "grad_norm": 6.503327654172608e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416190 + }, + { + "epoch": 2.0184947858380426, + "grad_norm": 1.550903938607462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416200 + }, + { + "epoch": 2.018543284030879, + "grad_norm": 1.3409212407111681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416210 + }, + { + "epoch": 2.018591782223715, + "grad_norm": 1.0549781492841248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416220 + }, + { + "epoch": 2.018640280416551, + "grad_norm": 1.0407211981089404e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416230 + }, + { + "epoch": 2.0186887786093872, + "grad_norm": 1.2338743360373883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416240 + }, + { + "epoch": 2.018737276802223, + "grad_norm": 1.2851566921767699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416250 + }, + { + "epoch": 2.018785774995059, + "grad_norm": 1.2635461565935202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416260 + }, + { + "epoch": 2.0188342731878954, + "grad_norm": 1.4025625105773543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416270 + }, + { + "epoch": 2.0188827713807314, + "grad_norm": 1.1620740814066721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416280 + }, + { + "epoch": 2.0189312695735677, + "grad_norm": 1.0469545230762378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416290 + }, + { + "epoch": 2.0189797677664036, + "grad_norm": 9.682003643263215e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416300 + }, + { + "epoch": 2.0190282659592396, + "grad_norm": 1.4121052771542963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416310 + }, + { + "epoch": 2.019076764152076, + "grad_norm": 1.0142416684288946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416320 + }, + { + "epoch": 2.019125262344912, + "grad_norm": 8.470131263038638e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416330 + }, + { + "epoch": 2.0191737605377478, + "grad_norm": 1.4476827736586984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416340 + }, + { + "epoch": 2.019222258730584, + "grad_norm": 1.561353357715234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416350 + }, + { + "epoch": 2.01927075692342, + "grad_norm": 1.3537411192032778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416360 + }, + { + "epoch": 2.0193192551162564, + "grad_norm": 8.075469182244888e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416370 + }, + { + "epoch": 2.0193677533090924, + "grad_norm": 7.5901667173639e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416380 + }, + { + "epoch": 2.0194162515019283, + "grad_norm": 1.8748695040926577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416390 + }, + { + "epoch": 2.0194647496947646, + "grad_norm": 8.028407272320237e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416400 + }, + { + "epoch": 2.0195132478876006, + "grad_norm": 1.0981543674404293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416410 + }, + { + "epoch": 2.0195617460804365, + "grad_norm": 1.236052060704651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416420 + }, + { + "epoch": 2.019610244273273, + "grad_norm": 1.2367056712037083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416430 + }, + { + "epoch": 2.0196587424661088, + "grad_norm": 1.3866621628721987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416440 + }, + { + "epoch": 2.019707240658945, + "grad_norm": 9.586695881580454e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416450 + }, + { + "epoch": 2.019755738851781, + "grad_norm": 1.185363096567471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416460 + }, + { + "epoch": 2.019804237044617, + "grad_norm": 1.1857383519497944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416470 + }, + { + "epoch": 2.0198527352374533, + "grad_norm": 1.3141128185623074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416480 + }, + { + "epoch": 2.0199012334302893, + "grad_norm": 2.163045920156037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416490 + }, + { + "epoch": 2.019949731623125, + "grad_norm": 1.2595147147465013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416500 + }, + { + "epoch": 2.0199982298159616, + "grad_norm": 1.6188735685318534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416510 + }, + { + "epoch": 2.0200467280087975, + "grad_norm": 1.1759807350131268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416520 + }, + { + "epoch": 2.020095226201634, + "grad_norm": 1.3743806981381113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416530 + }, + { + "epoch": 2.0201437243944698, + "grad_norm": 1.0361723923324462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416540 + }, + { + "epoch": 2.0201922225873057, + "grad_norm": 1.284054462757922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416550 + }, + { + "epoch": 2.020240720780142, + "grad_norm": 9.899232544796632e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416560 + }, + { + "epoch": 2.020289218972978, + "grad_norm": 7.86117482221016e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416570 + }, + { + "epoch": 2.020337717165814, + "grad_norm": 1.0494696667251446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416580 + }, + { + "epoch": 2.0203862153586503, + "grad_norm": 9.43233491312867e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416590 + }, + { + "epoch": 2.020434713551486, + "grad_norm": 1.3447104763031348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416600 + }, + { + "epoch": 2.0204832117443225, + "grad_norm": 1.4025570038711521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416610 + }, + { + "epoch": 2.0205317099371585, + "grad_norm": 1.226755141914282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416620 + }, + { + "epoch": 2.0205802081299944, + "grad_norm": 1.1225390394997703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416630 + }, + { + "epoch": 2.0206287063228308, + "grad_norm": 9.967277669886698e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416640 + }, + { + "epoch": 2.0206772045156667, + "grad_norm": 1.0906063607762917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416650 + }, + { + "epoch": 2.0207257027085026, + "grad_norm": 8.775743687294835e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416660 + }, + { + "epoch": 2.020774200901339, + "grad_norm": 6.796901708128189e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416670 + }, + { + "epoch": 2.020822699094175, + "grad_norm": 8.18493273158083e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416680 + }, + { + "epoch": 2.0208711972870113, + "grad_norm": 8.641182880353426e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416690 + }, + { + "epoch": 2.020919695479847, + "grad_norm": 9.321158955799547e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416700 + }, + { + "epoch": 2.020968193672683, + "grad_norm": 1.131401905496432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416710 + }, + { + "epoch": 2.0210166918655195, + "grad_norm": 1.4011932059077026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416720 + }, + { + "epoch": 2.0210651900583554, + "grad_norm": 1.1516216424922732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416730 + }, + { + "epoch": 2.0211136882511918, + "grad_norm": 9.418962498841665e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416740 + }, + { + "epoch": 2.0211621864440277, + "grad_norm": 7.516480771130318e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416750 + }, + { + "epoch": 2.0212106846368636, + "grad_norm": 1.3732949000200279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416760 + }, + { + "epoch": 2.0212591828297, + "grad_norm": 1.2890116529717943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416770 + }, + { + "epoch": 2.021307681022536, + "grad_norm": 1.2129418358597377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416780 + }, + { + "epoch": 2.021356179215372, + "grad_norm": 1.6979312178477812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416790 + }, + { + "epoch": 2.021404677408208, + "grad_norm": 1.2302504792671698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416800 + }, + { + "epoch": 2.021453175601044, + "grad_norm": 7.799305201672269e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416810 + }, + { + "epoch": 2.0215016737938805, + "grad_norm": 9.212328677676851e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416820 + }, + { + "epoch": 2.0215501719867164, + "grad_norm": 1.4645798351864414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416830 + }, + { + "epoch": 2.0215986701795523, + "grad_norm": 1.0508276915288661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416840 + }, + { + "epoch": 2.0216471683723887, + "grad_norm": 1.0897689861621984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416850 + }, + { + "epoch": 2.0216956665652246, + "grad_norm": 1.3968888268323099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416860 + }, + { + "epoch": 2.0217441647580605, + "grad_norm": 1.775931401937214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416870 + }, + { + "epoch": 2.021792662950897, + "grad_norm": 1.2354615996912344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416880 + }, + { + "epoch": 2.021841161143733, + "grad_norm": 1.5523335505918112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416890 + }, + { + "epoch": 2.021889659336569, + "grad_norm": 1.8583365957169917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416900 + }, + { + "epoch": 2.021938157529405, + "grad_norm": 7.913476984811041e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416910 + }, + { + "epoch": 2.021986655722241, + "grad_norm": 1.1504753594238082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416920 + }, + { + "epoch": 2.0220351539150774, + "grad_norm": 1.1047233350325314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416930 + }, + { + "epoch": 2.0220836521079133, + "grad_norm": 8.702449427744341e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416940 + }, + { + "epoch": 2.022132150300749, + "grad_norm": 8.707941923091767e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416950 + }, + { + "epoch": 2.0221806484935856, + "grad_norm": 1.3223691475161559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416960 + }, + { + "epoch": 2.0222291466864215, + "grad_norm": 1.0046959708631675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416970 + }, + { + "epoch": 2.022277644879258, + "grad_norm": 9.771894404764225e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416980 + }, + { + "epoch": 2.022326143072094, + "grad_norm": 6.239443628430763e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 416990 + }, + { + "epoch": 2.0223746412649297, + "grad_norm": 1.1630979734889024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417000 + }, + { + "epoch": 2.022423139457766, + "grad_norm": 1.5162614275254782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417010 + }, + { + "epoch": 2.022471637650602, + "grad_norm": 1.5415219323244855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417020 + }, + { + "epoch": 2.022520135843438, + "grad_norm": 9.205614937002338e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417030 + }, + { + "epoch": 2.0225686340362743, + "grad_norm": 1.8602408502488288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417040 + }, + { + "epoch": 2.02261713222911, + "grad_norm": 1.5442637391060998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417050 + }, + { + "epoch": 2.0226656304219466, + "grad_norm": 9.702001868561183e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417060 + }, + { + "epoch": 2.0227141286147825, + "grad_norm": 8.509082327634587e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417070 + }, + { + "epoch": 2.0227626268076184, + "grad_norm": 1.2041120101002889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417080 + }, + { + "epoch": 2.022811125000455, + "grad_norm": 1.3440349277971109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417090 + }, + { + "epoch": 2.0228596231932907, + "grad_norm": 1.0686179940933016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417100 + }, + { + "epoch": 2.0229081213861266, + "grad_norm": 1.4047276231110573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417110 + }, + { + "epoch": 2.022956619578963, + "grad_norm": 1.3445789370791772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417120 + }, + { + "epoch": 2.023005117771799, + "grad_norm": 1.0043504694579042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417130 + }, + { + "epoch": 2.0230536159646353, + "grad_norm": 9.694391067682773e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417140 + }, + { + "epoch": 2.023102114157471, + "grad_norm": 7.578565330845777e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417150 + }, + { + "epoch": 2.023150612350307, + "grad_norm": 1.1212905270951978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417160 + }, + { + "epoch": 2.0231991105431435, + "grad_norm": 9.794387523243131e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417170 + }, + { + "epoch": 2.0232476087359794, + "grad_norm": 1.1160245172447958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417180 + }, + { + "epoch": 2.023296106928816, + "grad_norm": 7.281764524691425e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417190 + }, + { + "epoch": 2.0233446051216517, + "grad_norm": 8.460093070539187e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417200 + }, + { + "epoch": 2.0233931033144876, + "grad_norm": 1.3755313332808328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417210 + }, + { + "epoch": 2.023441601507324, + "grad_norm": 1.8262834799998018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417220 + }, + { + "epoch": 2.02349009970016, + "grad_norm": 1.0161900654281908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417230 + }, + { + "epoch": 2.023538597892996, + "grad_norm": 1.0318975007805875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417240 + }, + { + "epoch": 2.023587096085832, + "grad_norm": 1.2423182482734774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417250 + }, + { + "epoch": 2.023635594278668, + "grad_norm": 1.7142744113129993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417260 + }, + { + "epoch": 2.0236840924715045, + "grad_norm": 2.135948307113722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417270 + }, + { + "epoch": 2.0237325906643404, + "grad_norm": 1.3363594675297463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417280 + }, + { + "epoch": 2.0237810888571763, + "grad_norm": 1.0783420378857045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417290 + }, + { + "epoch": 2.0238295870500127, + "grad_norm": 1.1133469257629258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417300 + }, + { + "epoch": 2.0238780852428486, + "grad_norm": 8.116908034594417e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417310 + }, + { + "epoch": 2.0239265834356845, + "grad_norm": 9.696367264666605e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417320 + }, + { + "epoch": 2.023975081628521, + "grad_norm": 1.0084821866485072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417330 + }, + { + "epoch": 2.024023579821357, + "grad_norm": 5.997839558347096e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417340 + }, + { + "epoch": 2.024072078014193, + "grad_norm": 1.2861289810928156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417350 + }, + { + "epoch": 2.024120576207029, + "grad_norm": 1.1473282768292847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417360 + }, + { + "epoch": 2.024169074399865, + "grad_norm": 1.1904777608151562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417370 + }, + { + "epoch": 2.0242175725927014, + "grad_norm": 7.697909865100883e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417380 + }, + { + "epoch": 2.0242660707855373, + "grad_norm": 9.474736550885154e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417390 + }, + { + "epoch": 2.0243145689783733, + "grad_norm": 9.473811957150247e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417400 + }, + { + "epoch": 2.0243630671712096, + "grad_norm": 1.1100375729711232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417410 + }, + { + "epoch": 2.0244115653640455, + "grad_norm": 1.206025057598481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417420 + }, + { + "epoch": 2.024460063556882, + "grad_norm": 1.2018613659847688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417430 + }, + { + "epoch": 2.024508561749718, + "grad_norm": 9.73778835344774e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417440 + }, + { + "epoch": 2.0245570599425537, + "grad_norm": 1.5717214196797613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417450 + }, + { + "epoch": 2.02460555813539, + "grad_norm": 1.154275786063863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417460 + }, + { + "epoch": 2.024654056328226, + "grad_norm": 1.1996318605156375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417470 + }, + { + "epoch": 2.024702554521062, + "grad_norm": 1.4383888746749562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417480 + }, + { + "epoch": 2.0247510527138983, + "grad_norm": 7.900104570524036e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417490 + }, + { + "epoch": 2.0247995509067342, + "grad_norm": 9.361024844167787e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417500 + }, + { + "epoch": 2.0248480490995706, + "grad_norm": 1.0572668074360081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417510 + }, + { + "epoch": 2.0248965472924065, + "grad_norm": 1.2921835157442274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417520 + }, + { + "epoch": 2.0249450454852425, + "grad_norm": 1.0832782670888719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417530 + }, + { + "epoch": 2.024993543678079, + "grad_norm": 1.1865076032790967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417540 + }, + { + "epoch": 2.0250420418709147, + "grad_norm": 1.0483422130391773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417550 + }, + { + "epoch": 2.0250905400637507, + "grad_norm": 1.319237963315345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417560 + }, + { + "epoch": 2.025139038256587, + "grad_norm": 1.4821425864397497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417570 + }, + { + "epoch": 2.025187536449423, + "grad_norm": 2.054047598676334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417580 + }, + { + "epoch": 2.0252360346422593, + "grad_norm": 1.3380252461558939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417590 + }, + { + "epoch": 2.0252845328350952, + "grad_norm": 2.2680243461081773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417600 + }, + { + "epoch": 2.025333031027931, + "grad_norm": 7.878241170544698e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417610 + }, + { + "epoch": 2.0253815292207675, + "grad_norm": 1.2267237892160665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417620 + }, + { + "epoch": 2.0254300274136035, + "grad_norm": 9.748320373148545e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417630 + }, + { + "epoch": 2.0254785256064394, + "grad_norm": 1.19427543410211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417640 + }, + { + "epoch": 2.0255270237992757, + "grad_norm": 1.2887586109400218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417650 + }, + { + "epoch": 2.0255755219921117, + "grad_norm": 7.238692756317278e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417660 + }, + { + "epoch": 2.025624020184948, + "grad_norm": 9.989800986431874e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417670 + }, + { + "epoch": 2.025672518377784, + "grad_norm": 1.0500103009292161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417680 + }, + { + "epoch": 2.02572101657062, + "grad_norm": 1.0051443233294322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417690 + }, + { + "epoch": 2.0257695147634562, + "grad_norm": 1.176142916392564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417700 + }, + { + "epoch": 2.025818012956292, + "grad_norm": 6.2450666860058845e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417710 + }, + { + "epoch": 2.0258665111491285, + "grad_norm": 1.3972153212193916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417720 + }, + { + "epoch": 2.0259150093419644, + "grad_norm": 1.433783936022337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417730 + }, + { + "epoch": 2.0259635075348004, + "grad_norm": 1.447452735447996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417740 + }, + { + "epoch": 2.0260120057276367, + "grad_norm": 1.6294476878897513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417750 + }, + { + "epoch": 2.0260605039204727, + "grad_norm": 1.2358646550580943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417760 + }, + { + "epoch": 2.0261090021133086, + "grad_norm": 1.276934202820712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417770 + }, + { + "epoch": 2.026157500306145, + "grad_norm": 1.4924115276926386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417780 + }, + { + "epoch": 2.026205998498981, + "grad_norm": 1.4138067605529159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417790 + }, + { + "epoch": 2.0262544966918172, + "grad_norm": 1.0424920482421385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417800 + }, + { + "epoch": 2.026302994884653, + "grad_norm": 1.1205955274817825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417810 + }, + { + "epoch": 2.026351493077489, + "grad_norm": 7.573031979291045e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417820 + }, + { + "epoch": 2.0263999912703254, + "grad_norm": 1.0925666593664118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417830 + }, + { + "epoch": 2.0264484894631614, + "grad_norm": 1.0274216144523507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417840 + }, + { + "epoch": 2.0264969876559973, + "grad_norm": 1.9055169886428303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417850 + }, + { + "epoch": 2.0265454858488336, + "grad_norm": 1.092041834738211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417860 + }, + { + "epoch": 2.0265939840416696, + "grad_norm": 1.3748918448186487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417870 + }, + { + "epoch": 2.026642482234506, + "grad_norm": 1.3729012593444168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417880 + }, + { + "epoch": 2.026690980427342, + "grad_norm": 1.0805637273847424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417890 + }, + { + "epoch": 2.026739478620178, + "grad_norm": 8.628091130447046e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417900 + }, + { + "epoch": 2.026787976813014, + "grad_norm": 1.4894437683210526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417910 + }, + { + "epoch": 2.02683647500585, + "grad_norm": 1.120404569121547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417920 + }, + { + "epoch": 2.026884973198686, + "grad_norm": 1.017930806312961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417930 + }, + { + "epoch": 2.0269334713915224, + "grad_norm": 1.0378419013079565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417940 + }, + { + "epoch": 2.0269819695843583, + "grad_norm": 1.0556542306972005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417950 + }, + { + "epoch": 2.0270304677771946, + "grad_norm": 8.410764529287462e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417960 + }, + { + "epoch": 2.0270789659700306, + "grad_norm": 1.3943907362090613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417970 + }, + { + "epoch": 2.0271274641628665, + "grad_norm": 8.880182811310533e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417980 + }, + { + "epoch": 2.027175962355703, + "grad_norm": 8.255281791491598e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 417990 + }, + { + "epoch": 2.0272244605485388, + "grad_norm": 1.911826252865012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418000 + }, + { + "epoch": 2.0272729587413747, + "grad_norm": 8.542593299409873e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418010 + }, + { + "epoch": 2.027321456934211, + "grad_norm": 8.319024580316636e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418020 + }, + { + "epoch": 2.027369955127047, + "grad_norm": 1.590882092727952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418030 + }, + { + "epoch": 2.0274184533198834, + "grad_norm": 8.097068793233575e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418040 + }, + { + "epoch": 2.0274669515127193, + "grad_norm": 1.2715804409424436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418050 + }, + { + "epoch": 2.027515449705555, + "grad_norm": 1.013455896980986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418060 + }, + { + "epoch": 2.0275639478983916, + "grad_norm": 1.7186485123943385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418070 + }, + { + "epoch": 2.0276124460912275, + "grad_norm": 1.8087854769532896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418080 + }, + { + "epoch": 2.0276609442840634, + "grad_norm": 1.18218546063531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418090 + }, + { + "epoch": 2.0277094424768998, + "grad_norm": 1.0495798896670294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418100 + }, + { + "epoch": 2.0277579406697357, + "grad_norm": 1.432199248085908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418110 + }, + { + "epoch": 2.027806438862572, + "grad_norm": 1.0025402730207134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418120 + }, + { + "epoch": 2.027854937055408, + "grad_norm": 9.261959199591274e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418130 + }, + { + "epoch": 2.027903435248244, + "grad_norm": 2.345793603808488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418140 + }, + { + "epoch": 2.0279519334410803, + "grad_norm": 1.0891152868452991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418150 + }, + { + "epoch": 2.028000431633916, + "grad_norm": 1.1202067717874797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418160 + }, + { + "epoch": 2.028048929826752, + "grad_norm": 1.2366162316368445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418170 + }, + { + "epoch": 2.0280974280195885, + "grad_norm": 1.1221121809512624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418180 + }, + { + "epoch": 2.0281459262124244, + "grad_norm": 1.2457600284676573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418190 + }, + { + "epoch": 2.0281944244052608, + "grad_norm": 1.3053570668830616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418200 + }, + { + "epoch": 2.0282429225980967, + "grad_norm": 1.1925500587040005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418210 + }, + { + "epoch": 2.0282914207909326, + "grad_norm": 8.22041457126943e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418220 + }, + { + "epoch": 2.028339918983769, + "grad_norm": 1.3656266339978629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418230 + }, + { + "epoch": 2.028388417176605, + "grad_norm": 2.3629416645576384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418240 + }, + { + "epoch": 2.0284369153694413, + "grad_norm": 1.0371940639686272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418250 + }, + { + "epoch": 2.028485413562277, + "grad_norm": 1.7299925048064324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418260 + }, + { + "epoch": 2.028533911755113, + "grad_norm": 8.807342410932506e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418270 + }, + { + "epoch": 2.0285824099479495, + "grad_norm": 1.0670548000746294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418280 + }, + { + "epoch": 2.0286309081407854, + "grad_norm": 2.054995285050154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418290 + }, + { + "epoch": 2.0286794063336213, + "grad_norm": 8.593551648061748e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418300 + }, + { + "epoch": 2.0287279045264577, + "grad_norm": 1.4931456959743628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418310 + }, + { + "epoch": 2.0287764027192936, + "grad_norm": 1.2551763184376341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418320 + }, + { + "epoch": 2.02882490091213, + "grad_norm": 1.0745665690592432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418330 + }, + { + "epoch": 2.028873399104966, + "grad_norm": 2.4587009761489753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418340 + }, + { + "epoch": 2.028921897297802, + "grad_norm": 1.590568565745798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418350 + }, + { + "epoch": 2.028970395490638, + "grad_norm": 1.163546681226535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418360 + }, + { + "epoch": 2.029018893683474, + "grad_norm": 1.0524989768612159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418370 + }, + { + "epoch": 2.02906739187631, + "grad_norm": 1.081370371025514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418380 + }, + { + "epoch": 2.0291158900691464, + "grad_norm": 1.2111684988269644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418390 + }, + { + "epoch": 2.0291643882619823, + "grad_norm": 1.573348740180336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418400 + }, + { + "epoch": 2.0292128864548187, + "grad_norm": 1.8044403304884327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418410 + }, + { + "epoch": 2.0292613846476546, + "grad_norm": 8.335202750231474e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418420 + }, + { + "epoch": 2.0293098828404905, + "grad_norm": 1.3849070334970293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418430 + }, + { + "epoch": 2.029358381033327, + "grad_norm": 1.615435962776246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418440 + }, + { + "epoch": 2.029406879226163, + "grad_norm": 1.5033203126790795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418450 + }, + { + "epoch": 2.0294553774189987, + "grad_norm": 1.2778973434990348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418460 + }, + { + "epoch": 2.029503875611835, + "grad_norm": 9.07955044482378e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418470 + }, + { + "epoch": 2.029552373804671, + "grad_norm": 1.0631619140610837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418480 + }, + { + "epoch": 2.0296008719975074, + "grad_norm": 1.810271221813764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418490 + }, + { + "epoch": 2.0296493701903433, + "grad_norm": 8.484336788683322e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418500 + }, + { + "epoch": 2.0296978683831792, + "grad_norm": 8.594652989302176e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418510 + }, + { + "epoch": 2.0297463665760156, + "grad_norm": 1.136184835104359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418520 + }, + { + "epoch": 2.0297948647688515, + "grad_norm": 1.0864812161059945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418530 + }, + { + "epoch": 2.0298433629616874, + "grad_norm": 1.1566136493001977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418540 + }, + { + "epoch": 2.029891861154524, + "grad_norm": 9.271460044146806e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418550 + }, + { + "epoch": 2.0299403593473597, + "grad_norm": 8.973645826415577e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418560 + }, + { + "epoch": 2.029988857540196, + "grad_norm": 1.0017449092458719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418570 + }, + { + "epoch": 2.030037355733032, + "grad_norm": 8.028705700269256e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418580 + }, + { + "epoch": 2.030085853925868, + "grad_norm": 1.4758751554211358e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418590 + }, + { + "epoch": 2.0301343521187043, + "grad_norm": 1.3647641239344921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418600 + }, + { + "epoch": 2.03018285031154, + "grad_norm": 9.909690845688601e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418610 + }, + { + "epoch": 2.030231348504376, + "grad_norm": 2.3427071838000302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418620 + }, + { + "epoch": 2.0302798466972125, + "grad_norm": 9.246586607503104e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418630 + }, + { + "epoch": 2.0303283448900484, + "grad_norm": 1.3305593071777366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418640 + }, + { + "epoch": 2.030376843082885, + "grad_norm": 1.3364526374459729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418650 + }, + { + "epoch": 2.0304253412757207, + "grad_norm": 1.7169599075828046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418660 + }, + { + "epoch": 2.0304738394685566, + "grad_norm": 1.2186469611208395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418670 + }, + { + "epoch": 2.030522337661393, + "grad_norm": 1.1646047681779237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418680 + }, + { + "epoch": 2.030570835854229, + "grad_norm": 1.4952199478557304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418690 + }, + { + "epoch": 2.0306193340470653, + "grad_norm": 1.0275202022569374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418700 + }, + { + "epoch": 2.030667832239901, + "grad_norm": 1.8292491077431805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418710 + }, + { + "epoch": 2.030716330432737, + "grad_norm": 1.2862257925405629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418720 + }, + { + "epoch": 2.0307648286255735, + "grad_norm": 1.8359429532210925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418730 + }, + { + "epoch": 2.0308133268184094, + "grad_norm": 6.535177732303055e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418740 + }, + { + "epoch": 2.0308618250112453, + "grad_norm": 9.861992111837026e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418750 + }, + { + "epoch": 2.0309103232040817, + "grad_norm": 7.403432089603257e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418760 + }, + { + "epoch": 2.0309588213969176, + "grad_norm": 1.337228283659897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418770 + }, + { + "epoch": 2.031007319589754, + "grad_norm": 9.472177708857998e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418780 + }, + { + "epoch": 2.03105581778259, + "grad_norm": 1.2674924221300898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418790 + }, + { + "epoch": 2.031104315975426, + "grad_norm": 7.919711109138916e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418800 + }, + { + "epoch": 2.031152814168262, + "grad_norm": 1.4409669013559778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418810 + }, + { + "epoch": 2.031201312361098, + "grad_norm": 1.0593140586934169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418820 + }, + { + "epoch": 2.031249810553934, + "grad_norm": 1.80854744513681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418830 + }, + { + "epoch": 2.0312983087467704, + "grad_norm": 9.460544347916766e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418840 + }, + { + "epoch": 2.0313468069396063, + "grad_norm": 9.735704686875124e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418850 + }, + { + "epoch": 2.0313953051324427, + "grad_norm": 1.4061805053700027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418860 + }, + { + "epoch": 2.0314438033252786, + "grad_norm": 1.233502189279534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418870 + }, + { + "epoch": 2.0314923015181146, + "grad_norm": 1.0821264773142047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418880 + }, + { + "epoch": 2.031540799710951, + "grad_norm": 1.0417052109801261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418890 + }, + { + "epoch": 2.031589297903787, + "grad_norm": 1.2616845346258287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418900 + }, + { + "epoch": 2.0316377960966228, + "grad_norm": 1.1855850523545541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418910 + }, + { + "epoch": 2.031686294289459, + "grad_norm": 9.077377072230775e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418920 + }, + { + "epoch": 2.031734792482295, + "grad_norm": 2.0194791616745533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418930 + }, + { + "epoch": 2.0317832906751314, + "grad_norm": 7.72161090623058e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418940 + }, + { + "epoch": 2.0318317888679673, + "grad_norm": 8.471185530822822e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418950 + }, + { + "epoch": 2.0318802870608033, + "grad_norm": 9.774741904777784e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418960 + }, + { + "epoch": 2.0319287852536396, + "grad_norm": 9.689045121774598e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418970 + }, + { + "epoch": 2.0319772834464755, + "grad_norm": 9.656093702403723e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418980 + }, + { + "epoch": 2.0320257816393115, + "grad_norm": 1.708188612781214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 418990 + }, + { + "epoch": 2.032074279832148, + "grad_norm": 1.0089649116196142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419000 + }, + { + "epoch": 2.0321227780249838, + "grad_norm": 7.530764456475936e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419010 + }, + { + "epoch": 2.03217127621782, + "grad_norm": 1.3870693038597892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419020 + }, + { + "epoch": 2.032219774410656, + "grad_norm": 7.695880377411868e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419030 + }, + { + "epoch": 2.032268272603492, + "grad_norm": 7.423759385005724e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419040 + }, + { + "epoch": 2.0323167707963283, + "grad_norm": 9.132667955213947e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419050 + }, + { + "epoch": 2.0323652689891643, + "grad_norm": 9.794739241897332e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419060 + }, + { + "epoch": 2.032413767182, + "grad_norm": 1.2532060722492133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419070 + }, + { + "epoch": 2.0324622653748365, + "grad_norm": 1.049815345766092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419080 + }, + { + "epoch": 2.0325107635676725, + "grad_norm": 7.568789150980137e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419090 + }, + { + "epoch": 2.032559261760509, + "grad_norm": 1.2207193478275258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419100 + }, + { + "epoch": 2.0326077599533448, + "grad_norm": 9.85706272160769e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419110 + }, + { + "epoch": 2.0326562581461807, + "grad_norm": 1.1256837240125606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419120 + }, + { + "epoch": 2.032704756339017, + "grad_norm": 8.953308316961284e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419130 + }, + { + "epoch": 2.032753254531853, + "grad_norm": 1.4799228509332352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419140 + }, + { + "epoch": 2.032801752724689, + "grad_norm": 8.274482432568675e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419150 + }, + { + "epoch": 2.0328502509175252, + "grad_norm": 1.0671311834187236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419160 + }, + { + "epoch": 2.032898749110361, + "grad_norm": 1.2896378187576829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419170 + }, + { + "epoch": 2.0329472473031975, + "grad_norm": 1.6826199100705708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419180 + }, + { + "epoch": 2.0329957454960335, + "grad_norm": 1.1431325219746213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419190 + }, + { + "epoch": 2.0330442436888694, + "grad_norm": 1.7305303856574028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419200 + }, + { + "epoch": 2.0330927418817057, + "grad_norm": 1.3984623237206506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419210 + }, + { + "epoch": 2.0331412400745417, + "grad_norm": 1.2014085726264057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419220 + }, + { + "epoch": 2.033189738267378, + "grad_norm": 1.1471604999258034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419230 + }, + { + "epoch": 2.033238236460214, + "grad_norm": 1.6300946370506608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419240 + }, + { + "epoch": 2.03328673465305, + "grad_norm": 1.2100190183161885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419250 + }, + { + "epoch": 2.0333352328458862, + "grad_norm": 2.2830935364481775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419260 + }, + { + "epoch": 2.033383731038722, + "grad_norm": 8.876981816285934e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419270 + }, + { + "epoch": 2.033432229231558, + "grad_norm": 9.074920370721884e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419280 + }, + { + "epoch": 2.0334807274243945, + "grad_norm": 1.178980735261348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419290 + }, + { + "epoch": 2.0335292256172304, + "grad_norm": 1.519567938146338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419300 + }, + { + "epoch": 2.0335777238100667, + "grad_norm": 1.0501286062947202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419310 + }, + { + "epoch": 2.0336262220029027, + "grad_norm": 1.3489271033506611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419320 + }, + { + "epoch": 2.0336747201957386, + "grad_norm": 1.5761285609983133e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419330 + }, + { + "epoch": 2.033723218388575, + "grad_norm": 1.4430406203302937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419340 + }, + { + "epoch": 2.033771716581411, + "grad_norm": 1.7508945404642873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419350 + }, + { + "epoch": 2.033820214774247, + "grad_norm": 1.2906962609804395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419360 + }, + { + "epoch": 2.033868712967083, + "grad_norm": 7.527628298475975e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419370 + }, + { + "epoch": 2.033917211159919, + "grad_norm": 1.1576219094422413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419380 + }, + { + "epoch": 2.0339657093527554, + "grad_norm": 1.9261541694959305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419390 + }, + { + "epoch": 2.0340142075455914, + "grad_norm": 1.1355752782549189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419400 + }, + { + "epoch": 2.0340627057384273, + "grad_norm": 9.545439105806963e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419410 + }, + { + "epoch": 2.0341112039312637, + "grad_norm": 1.0784136250663323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419420 + }, + { + "epoch": 2.0341597021240996, + "grad_norm": 1.5129437258565304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419430 + }, + { + "epoch": 2.0342082003169355, + "grad_norm": 8.08277889063902e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419440 + }, + { + "epoch": 2.034256698509772, + "grad_norm": 9.100978637377466e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419450 + }, + { + "epoch": 2.034305196702608, + "grad_norm": 1.1585569836825016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419460 + }, + { + "epoch": 2.034353694895444, + "grad_norm": 1.368984925420591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419470 + }, + { + "epoch": 2.03440219308828, + "grad_norm": 1.3241598928459553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419480 + }, + { + "epoch": 2.034450691281116, + "grad_norm": 9.288614322144895e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419490 + }, + { + "epoch": 2.0344991894739524, + "grad_norm": 8.43372305325829e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419500 + }, + { + "epoch": 2.0345476876667883, + "grad_norm": 1.3670082843475484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419510 + }, + { + "epoch": 2.034596185859624, + "grad_norm": 1.6965982396754953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419520 + }, + { + "epoch": 2.0346446840524606, + "grad_norm": 2.2973125624048407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419530 + }, + { + "epoch": 2.0346931822452965, + "grad_norm": 9.270286760454383e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419540 + }, + { + "epoch": 2.034741680438133, + "grad_norm": 1.1215578687995276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419550 + }, + { + "epoch": 2.034790178630969, + "grad_norm": 2.370095764092639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419560 + }, + { + "epoch": 2.0348386768238047, + "grad_norm": 1.357255907663557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419570 + }, + { + "epoch": 2.034887175016641, + "grad_norm": 1.3308892654606552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419580 + }, + { + "epoch": 2.034935673209477, + "grad_norm": 1.0563973695809636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419590 + }, + { + "epoch": 2.034984171402313, + "grad_norm": 1.6814148295907216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419600 + }, + { + "epoch": 2.0350326695951493, + "grad_norm": 1.8941095802915697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419610 + }, + { + "epoch": 2.035081167787985, + "grad_norm": 1.3545556676319848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419620 + }, + { + "epoch": 2.0351296659808216, + "grad_norm": 1.3683787436491457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419630 + }, + { + "epoch": 2.0351781641736575, + "grad_norm": 1.18262954984516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419640 + }, + { + "epoch": 2.0352266623664934, + "grad_norm": 7.909483734636069e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419650 + }, + { + "epoch": 2.0352751605593298, + "grad_norm": 8.888890512537273e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419660 + }, + { + "epoch": 2.0353236587521657, + "grad_norm": 1.3548496546889055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419670 + }, + { + "epoch": 2.0353721569450016, + "grad_norm": 1.7378642525045507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419680 + }, + { + "epoch": 2.035420655137838, + "grad_norm": 1.4696674988101677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419690 + }, + { + "epoch": 2.035469153330674, + "grad_norm": 1.4734593989373934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419700 + }, + { + "epoch": 2.0355176515235103, + "grad_norm": 1.57165374048418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419710 + }, + { + "epoch": 2.035566149716346, + "grad_norm": 8.883582758301145e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419720 + }, + { + "epoch": 2.035614647909182, + "grad_norm": 7.858619532896682e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419730 + }, + { + "epoch": 2.0356631461020185, + "grad_norm": 2.0064874206582317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419740 + }, + { + "epoch": 2.0357116442948544, + "grad_norm": 1.5946515219411594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419750 + }, + { + "epoch": 2.0357601424876908, + "grad_norm": 1.330297383361767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419760 + }, + { + "epoch": 2.0358086406805267, + "grad_norm": 1.6969556426715826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419770 + }, + { + "epoch": 2.0358571388733626, + "grad_norm": 1.0790469850974205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419780 + }, + { + "epoch": 2.035905637066199, + "grad_norm": 9.997024541519295e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419790 + }, + { + "epoch": 2.035954135259035, + "grad_norm": 2.483628414040595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419800 + }, + { + "epoch": 2.036002633451871, + "grad_norm": 1.0382097848093963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419810 + }, + { + "epoch": 2.036051131644707, + "grad_norm": 1.1994996995667861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419820 + }, + { + "epoch": 2.036099629837543, + "grad_norm": 7.82838505131167e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419830 + }, + { + "epoch": 2.0361481280303795, + "grad_norm": 6.7810428383552335e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419840 + }, + { + "epoch": 2.0361966262232154, + "grad_norm": 1.2118369419056307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419850 + }, + { + "epoch": 2.0362451244160513, + "grad_norm": 1.267871230226092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419860 + }, + { + "epoch": 2.0362936226088877, + "grad_norm": 1.0888098422867643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419870 + }, + { + "epoch": 2.0363421208017236, + "grad_norm": 9.47498790537793e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419880 + }, + { + "epoch": 2.0363906189945595, + "grad_norm": 9.507906462147275e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419890 + }, + { + "epoch": 2.036439117187396, + "grad_norm": 1.1746992711891835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419900 + }, + { + "epoch": 2.036487615380232, + "grad_norm": 9.156369884522064e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419910 + }, + { + "epoch": 2.036536113573068, + "grad_norm": 1.0678924411422486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419920 + }, + { + "epoch": 2.036584611765904, + "grad_norm": 6.518876549677088e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419930 + }, + { + "epoch": 2.03663310995874, + "grad_norm": 1.634039037412549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419940 + }, + { + "epoch": 2.0366816081515764, + "grad_norm": 1.1990091586255858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419950 + }, + { + "epoch": 2.0367301063444123, + "grad_norm": 1.4201455122986317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419960 + }, + { + "epoch": 2.0367786045372482, + "grad_norm": 1.4532919756504725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419970 + }, + { + "epoch": 2.0368271027300846, + "grad_norm": 1.3974503332292443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419980 + }, + { + "epoch": 2.0368756009229205, + "grad_norm": 1.4282209193083872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 419990 + }, + { + "epoch": 2.036924099115757, + "grad_norm": 9.029136549543182e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420000 + }, + { + "epoch": 2.036972597308593, + "grad_norm": 1.142751315796886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420010 + }, + { + "epoch": 2.0370210955014287, + "grad_norm": 1.5763950145242234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420020 + }, + { + "epoch": 2.037069593694265, + "grad_norm": 1.1621208884093903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420030 + }, + { + "epoch": 2.037118091887101, + "grad_norm": 9.133374945236028e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420040 + }, + { + "epoch": 2.037166590079937, + "grad_norm": 1.0962795116142843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420050 + }, + { + "epoch": 2.0372150882727733, + "grad_norm": 1.978799168966816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420060 + }, + { + "epoch": 2.0372635864656092, + "grad_norm": 1.945526584279378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420070 + }, + { + "epoch": 2.0373120846584456, + "grad_norm": 1.1022788015679907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420080 + }, + { + "epoch": 2.0373605828512815, + "grad_norm": 1.4581408969149834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420090 + }, + { + "epoch": 2.0374090810441174, + "grad_norm": 1.0817421625120005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420100 + }, + { + "epoch": 2.037457579236954, + "grad_norm": 1.2466504273334067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420110 + }, + { + "epoch": 2.0375060774297897, + "grad_norm": 8.289327446675543e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420120 + }, + { + "epoch": 2.0375545756226257, + "grad_norm": 1.0338703226864254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420130 + }, + { + "epoch": 2.037603073815462, + "grad_norm": 1.4079303944924959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420140 + }, + { + "epoch": 2.037651572008298, + "grad_norm": 1.0183436316424377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420150 + }, + { + "epoch": 2.0377000702011343, + "grad_norm": 1.666681193057684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420160 + }, + { + "epoch": 2.0377485683939702, + "grad_norm": 9.997012107021419e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420170 + }, + { + "epoch": 2.037797066586806, + "grad_norm": 1.0499895175541951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420180 + }, + { + "epoch": 2.0378455647796425, + "grad_norm": 1.7000031604652577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420190 + }, + { + "epoch": 2.0378940629724784, + "grad_norm": 9.285258784075268e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420200 + }, + { + "epoch": 2.0379425611653144, + "grad_norm": 1.1227933249813304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420210 + }, + { + "epoch": 2.0379910593581507, + "grad_norm": 1.231156332437422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420220 + }, + { + "epoch": 2.0380395575509866, + "grad_norm": 1.2545234184813125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420230 + }, + { + "epoch": 2.038088055743823, + "grad_norm": 1.183278008909383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420240 + }, + { + "epoch": 2.038136553936659, + "grad_norm": 1.729460663568716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420250 + }, + { + "epoch": 2.038185052129495, + "grad_norm": 1.785128134201841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420260 + }, + { + "epoch": 2.0382335503223312, + "grad_norm": 1.3752205596517797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420270 + }, + { + "epoch": 2.038282048515167, + "grad_norm": 9.78870140500021e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420280 + }, + { + "epoch": 2.0383305467080035, + "grad_norm": 7.196995888136826e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420290 + }, + { + "epoch": 2.0383790449008394, + "grad_norm": 6.507643313113931e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420300 + }, + { + "epoch": 2.0384275430936754, + "grad_norm": 1.103051161521762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420310 + }, + { + "epoch": 2.0384760412865117, + "grad_norm": 1.4982260765350475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420320 + }, + { + "epoch": 2.0385245394793476, + "grad_norm": 9.217745677858602e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420330 + }, + { + "epoch": 2.0385730376721836, + "grad_norm": 1.3722689651274322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420340 + }, + { + "epoch": 2.03862153586502, + "grad_norm": 1.0235036818073695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420350 + }, + { + "epoch": 2.038670034057856, + "grad_norm": 1.4047898844182782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420360 + }, + { + "epoch": 2.038718532250692, + "grad_norm": 1.6361404675535596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420370 + }, + { + "epoch": 2.038767030443528, + "grad_norm": 1.8366060672292406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420380 + }, + { + "epoch": 2.038815528636364, + "grad_norm": 9.226180708310494e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420390 + }, + { + "epoch": 2.0388640268292004, + "grad_norm": 2.0082531193565956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420400 + }, + { + "epoch": 2.0389125250220363, + "grad_norm": 1.2868292209589072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420410 + }, + { + "epoch": 2.0389610232148723, + "grad_norm": 1.1243958653039954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420420 + }, + { + "epoch": 2.0390095214077086, + "grad_norm": 1.9700722830862105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420430 + }, + { + "epoch": 2.0390580196005446, + "grad_norm": 1.037144503612808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420440 + }, + { + "epoch": 2.039106517793381, + "grad_norm": 8.374025917134986e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420450 + }, + { + "epoch": 2.039155015986217, + "grad_norm": 1.004522776071326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420460 + }, + { + "epoch": 2.0392035141790528, + "grad_norm": 1.1459748705533457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420470 + }, + { + "epoch": 2.039252012371889, + "grad_norm": 1.7255946005434453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420480 + }, + { + "epoch": 2.039300510564725, + "grad_norm": 1.7999616019892528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420490 + }, + { + "epoch": 2.039349008757561, + "grad_norm": 1.502927560181888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420500 + }, + { + "epoch": 2.0393975069503973, + "grad_norm": 1.2390835912867715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420510 + }, + { + "epoch": 2.0394460051432333, + "grad_norm": 1.499326174325688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420520 + }, + { + "epoch": 2.0394945033360696, + "grad_norm": 9.097466779905972e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420530 + }, + { + "epoch": 2.0395430015289056, + "grad_norm": 2.1093324420462523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420540 + }, + { + "epoch": 2.0395914997217415, + "grad_norm": 1.011343808698939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420550 + }, + { + "epoch": 2.039639997914578, + "grad_norm": 1.506408509044377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420560 + }, + { + "epoch": 2.0396884961074138, + "grad_norm": 1.2549460137734059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420570 + }, + { + "epoch": 2.0397369943002497, + "grad_norm": 1.3301574952606643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420580 + }, + { + "epoch": 2.039785492493086, + "grad_norm": 8.368160386851287e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420590 + }, + { + "epoch": 2.039833990685922, + "grad_norm": 1.0275489792377357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420600 + }, + { + "epoch": 2.0398824888787583, + "grad_norm": 1.190727427768934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420610 + }, + { + "epoch": 2.0399309870715943, + "grad_norm": 1.1187355042352465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420620 + }, + { + "epoch": 2.03997948526443, + "grad_norm": 1.1572735658660349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420630 + }, + { + "epoch": 2.0400279834572665, + "grad_norm": 1.7692542542135925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420640 + }, + { + "epoch": 2.0400764816501025, + "grad_norm": 7.818498737321988e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420650 + }, + { + "epoch": 2.0401249798429384, + "grad_norm": 9.741496498349989e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420660 + }, + { + "epoch": 2.0401734780357748, + "grad_norm": 1.501633484224385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420670 + }, + { + "epoch": 2.0402219762286107, + "grad_norm": 9.722007199286509e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420680 + }, + { + "epoch": 2.040270474421447, + "grad_norm": 9.957975777297179e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420690 + }, + { + "epoch": 2.040318972614283, + "grad_norm": 7.510438493341098e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420700 + }, + { + "epoch": 2.040367470807119, + "grad_norm": 1.7726389245353857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420710 + }, + { + "epoch": 2.0404159689999553, + "grad_norm": 1.2140343841338108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420720 + }, + { + "epoch": 2.040464467192791, + "grad_norm": 9.40902555868206e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420730 + }, + { + "epoch": 2.040512965385627, + "grad_norm": 1.0983995046842665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420740 + }, + { + "epoch": 2.0405614635784635, + "grad_norm": 1.229367097010936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420750 + }, + { + "epoch": 2.0406099617712994, + "grad_norm": 1.360081736123675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420760 + }, + { + "epoch": 2.0406584599641358, + "grad_norm": 1.5121253582606187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420770 + }, + { + "epoch": 2.0407069581569717, + "grad_norm": 1.8896310294280738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420780 + }, + { + "epoch": 2.0407554563498076, + "grad_norm": 1.571644858699983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420790 + }, + { + "epoch": 2.040803954542644, + "grad_norm": 9.829217439971671e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420800 + }, + { + "epoch": 2.04085245273548, + "grad_norm": 1.0211469891885372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420810 + }, + { + "epoch": 2.0409009509283162, + "grad_norm": 6.990815926144478e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420820 + }, + { + "epoch": 2.040949449121152, + "grad_norm": 1.280603978415229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420830 + }, + { + "epoch": 2.040997947313988, + "grad_norm": 1.2715380748318239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420840 + }, + { + "epoch": 2.0410464455068245, + "grad_norm": 1.1214947193138869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420850 + }, + { + "epoch": 2.0410949436996604, + "grad_norm": 1.2976983931878294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420860 + }, + { + "epoch": 2.0411434418924963, + "grad_norm": 1.0888187240709613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420870 + }, + { + "epoch": 2.0411919400853327, + "grad_norm": 1.3336745041669928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420880 + }, + { + "epoch": 2.0412404382781686, + "grad_norm": 1.255816428624712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420890 + }, + { + "epoch": 2.041288936471005, + "grad_norm": 1.4471616793798603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420900 + }, + { + "epoch": 2.041337434663841, + "grad_norm": 1.5761310478978885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420910 + }, + { + "epoch": 2.041385932856677, + "grad_norm": 1.3739188453598672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420920 + }, + { + "epoch": 2.041434431049513, + "grad_norm": 1.3692034173118373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420930 + }, + { + "epoch": 2.041482929242349, + "grad_norm": 1.912356850652941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420940 + }, + { + "epoch": 2.041531427435185, + "grad_norm": 9.31481292099079e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420950 + }, + { + "epoch": 2.0415799256280214, + "grad_norm": 1.1220639528630727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420960 + }, + { + "epoch": 2.0416284238208573, + "grad_norm": 1.5994265467611513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420970 + }, + { + "epoch": 2.0416769220136937, + "grad_norm": 7.011619285179904e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420980 + }, + { + "epoch": 2.0417254202065296, + "grad_norm": 7.371089072449877e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 420990 + }, + { + "epoch": 2.0417739183993655, + "grad_norm": 1.485316936111758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421000 + }, + { + "epoch": 2.041822416592202, + "grad_norm": 9.267017375691466e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421010 + }, + { + "epoch": 2.041870914785038, + "grad_norm": 8.152596819854807e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421020 + }, + { + "epoch": 2.0419194129778737, + "grad_norm": 1.1117394116411106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421030 + }, + { + "epoch": 2.04196791117071, + "grad_norm": 7.296840021098205e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421040 + }, + { + "epoch": 2.042016409363546, + "grad_norm": 8.024204412038216e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421050 + }, + { + "epoch": 2.0420649075563824, + "grad_norm": 1.0228791147426364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421060 + }, + { + "epoch": 2.0421134057492183, + "grad_norm": 1.2128674065081668e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421070 + }, + { + "epoch": 2.042161903942054, + "grad_norm": 1.5264898678424288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421080 + }, + { + "epoch": 2.0422104021348906, + "grad_norm": 1.7228147797254678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421090 + }, + { + "epoch": 2.0422589003277265, + "grad_norm": 1.2511972791173775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421100 + }, + { + "epoch": 2.0423073985205624, + "grad_norm": 1.2747891631192942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421110 + }, + { + "epoch": 2.042355896713399, + "grad_norm": 1.4263562775340688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421120 + }, + { + "epoch": 2.0424043949062347, + "grad_norm": 9.087042229793951e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421130 + }, + { + "epoch": 2.042452893099071, + "grad_norm": 7.962088766078068e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421140 + }, + { + "epoch": 2.042501391291907, + "grad_norm": 1.1435221658473438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421150 + }, + { + "epoch": 2.042549889484743, + "grad_norm": 1.4377277146593315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421160 + }, + { + "epoch": 2.0425983876775793, + "grad_norm": 1.0800035532554375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421170 + }, + { + "epoch": 2.042646885870415, + "grad_norm": 1.0966108021648324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421180 + }, + { + "epoch": 2.042695384063251, + "grad_norm": 9.302040915315501e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421190 + }, + { + "epoch": 2.0427438822560875, + "grad_norm": 1.4006748649819656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421200 + }, + { + "epoch": 2.0427923804489234, + "grad_norm": 1.248651937402201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421210 + }, + { + "epoch": 2.04284087864176, + "grad_norm": 7.45226191867232e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421220 + }, + { + "epoch": 2.0428893768345957, + "grad_norm": 6.473944491602879e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421230 + }, + { + "epoch": 2.0429378750274316, + "grad_norm": 1.849041986190514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421240 + }, + { + "epoch": 2.042986373220268, + "grad_norm": 2.0857461535683797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421250 + }, + { + "epoch": 2.043034871413104, + "grad_norm": 9.501809117296034e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421260 + }, + { + "epoch": 2.04308336960594, + "grad_norm": 1.3044541447015945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421270 + }, + { + "epoch": 2.043131867798776, + "grad_norm": 1.4821988969515587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421280 + }, + { + "epoch": 2.043180365991612, + "grad_norm": 1.0878677514369883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421290 + }, + { + "epoch": 2.0432288641844485, + "grad_norm": 9.464493189170753e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421300 + }, + { + "epoch": 2.0432773623772844, + "grad_norm": 8.897968584165028e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421310 + }, + { + "epoch": 2.0433258605701203, + "grad_norm": 1.4369811118797315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421320 + }, + { + "epoch": 2.0433743587629567, + "grad_norm": 7.429096449129702e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421330 + }, + { + "epoch": 2.0434228569557926, + "grad_norm": 1.1526692489383095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421340 + }, + { + "epoch": 2.043471355148629, + "grad_norm": 1.8976193061348567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421350 + }, + { + "epoch": 2.043519853341465, + "grad_norm": 1.0974247288686456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421360 + }, + { + "epoch": 2.043568351534301, + "grad_norm": 1.2797379156381794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421370 + }, + { + "epoch": 2.043616849727137, + "grad_norm": 1.591169684900251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421380 + }, + { + "epoch": 2.043665347919973, + "grad_norm": 1.3600902626365041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421390 + }, + { + "epoch": 2.043713846112809, + "grad_norm": 7.770891485847642e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421400 + }, + { + "epoch": 2.0437623443056454, + "grad_norm": 1.0462827937374186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421410 + }, + { + "epoch": 2.0438108424984813, + "grad_norm": 9.140422640996348e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421420 + }, + { + "epoch": 2.0438593406913177, + "grad_norm": 1.6626914955963912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421430 + }, + { + "epoch": 2.0439078388841536, + "grad_norm": 1.046601738607933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421440 + }, + { + "epoch": 2.0439563370769895, + "grad_norm": 1.101827873384309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421450 + }, + { + "epoch": 2.044004835269826, + "grad_norm": 8.688033403814188e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421460 + }, + { + "epoch": 2.044053333462662, + "grad_norm": 6.546460706857715e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421470 + }, + { + "epoch": 2.0441018316554977, + "grad_norm": 1.3121430164630965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421480 + }, + { + "epoch": 2.044150329848334, + "grad_norm": 8.80848372020182e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421490 + }, + { + "epoch": 2.04419882804117, + "grad_norm": 7.477516383858074e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421500 + }, + { + "epoch": 2.0442473262340064, + "grad_norm": 1.3672599052938494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421510 + }, + { + "epoch": 2.0442958244268423, + "grad_norm": 1.2467959109585536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421520 + }, + { + "epoch": 2.0443443226196782, + "grad_norm": 1.1340223871059152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421530 + }, + { + "epoch": 2.0443928208125146, + "grad_norm": 7.520568168217778e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421540 + }, + { + "epoch": 2.0444413190053505, + "grad_norm": 7.49938422472951e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421550 + }, + { + "epoch": 2.0444898171981865, + "grad_norm": 1.98119831651411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421560 + }, + { + "epoch": 2.044538315391023, + "grad_norm": 1.3624211092633232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421570 + }, + { + "epoch": 2.0445868135838587, + "grad_norm": 1.0115122961451561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421580 + }, + { + "epoch": 2.044635311776695, + "grad_norm": 1.6284984027947758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421590 + }, + { + "epoch": 2.044683809969531, + "grad_norm": 1.31388029345203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421600 + }, + { + "epoch": 2.044732308162367, + "grad_norm": 1.2615172906293992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421610 + }, + { + "epoch": 2.0447808063552033, + "grad_norm": 7.2242927195986795e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421620 + }, + { + "epoch": 2.0448293045480392, + "grad_norm": 1.2174294461431145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421630 + }, + { + "epoch": 2.044877802740875, + "grad_norm": 1.118825920798372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421640 + }, + { + "epoch": 2.0449263009337115, + "grad_norm": 1.10722728763335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421650 + }, + { + "epoch": 2.0449747991265474, + "grad_norm": 1.7372927985093156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421660 + }, + { + "epoch": 2.045023297319384, + "grad_norm": 1.596506571388545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421670 + }, + { + "epoch": 2.0450717955122197, + "grad_norm": 9.589567362411344e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421680 + }, + { + "epoch": 2.0451202937050557, + "grad_norm": 1.0500590619244576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421690 + }, + { + "epoch": 2.045168791897892, + "grad_norm": 1.2213202005284529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421700 + }, + { + "epoch": 2.045217290090728, + "grad_norm": 1.108166003405131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421710 + }, + { + "epoch": 2.045265788283564, + "grad_norm": 1.1077231576450686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421720 + }, + { + "epoch": 2.0453142864764002, + "grad_norm": 6.913521310991655e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421730 + }, + { + "epoch": 2.045362784669236, + "grad_norm": 1.1772475438931451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421740 + }, + { + "epoch": 2.0454112828620725, + "grad_norm": 1.0396802530010518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421750 + }, + { + "epoch": 2.0454597810549084, + "grad_norm": 1.0214151302534447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421760 + }, + { + "epoch": 2.0455082792477444, + "grad_norm": 1.0370658998226645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421770 + }, + { + "epoch": 2.0455567774405807, + "grad_norm": 1.1216557460613785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421780 + }, + { + "epoch": 2.0456052756334167, + "grad_norm": 9.345874296684542e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421790 + }, + { + "epoch": 2.0456537738262526, + "grad_norm": 1.418497941330088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421800 + }, + { + "epoch": 2.045702272019089, + "grad_norm": 9.77294511983473e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421810 + }, + { + "epoch": 2.045750770211925, + "grad_norm": 1.0220722046483388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421820 + }, + { + "epoch": 2.0457992684047612, + "grad_norm": 1.3619916749973981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421830 + }, + { + "epoch": 2.045847766597597, + "grad_norm": 1.631993029604928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421840 + }, + { + "epoch": 2.045896264790433, + "grad_norm": 1.2540875005129237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421850 + }, + { + "epoch": 2.0459447629832694, + "grad_norm": 7.227407117227358e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421860 + }, + { + "epoch": 2.0459932611761054, + "grad_norm": 1.945623573362809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421870 + }, + { + "epoch": 2.0460417593689417, + "grad_norm": 1.1584703862865808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421880 + }, + { + "epoch": 2.0460902575617776, + "grad_norm": 9.43414235621276e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421890 + }, + { + "epoch": 2.0461387557546136, + "grad_norm": 1.4099757805752233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421900 + }, + { + "epoch": 2.04618725394745, + "grad_norm": 6.237266259034868e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421910 + }, + { + "epoch": 2.046235752140286, + "grad_norm": 1.4574366602460032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421920 + }, + { + "epoch": 2.046284250333122, + "grad_norm": 1.2436911056568078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421930 + }, + { + "epoch": 2.046332748525958, + "grad_norm": 5.862839991266355e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421940 + }, + { + "epoch": 2.046381246718794, + "grad_norm": 9.335242801000732e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421950 + }, + { + "epoch": 2.0464297449116304, + "grad_norm": 9.380222820709605e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421960 + }, + { + "epoch": 2.0464782431044664, + "grad_norm": 1.6692547788466072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421970 + }, + { + "epoch": 2.0465267412973023, + "grad_norm": 1.566083440707189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421980 + }, + { + "epoch": 2.0465752394901386, + "grad_norm": 1.2325316767203276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 421990 + }, + { + "epoch": 2.0466237376829746, + "grad_norm": 8.480099289442933e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422000 + }, + { + "epoch": 2.0466722358758105, + "grad_norm": 1.1213211692506775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422010 + }, + { + "epoch": 2.046720734068647, + "grad_norm": 1.1371260377757153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422020 + }, + { + "epoch": 2.0467692322614828, + "grad_norm": 1.1345192341138954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422030 + }, + { + "epoch": 2.046817730454319, + "grad_norm": 1.1914043085425874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422040 + }, + { + "epoch": 2.046866228647155, + "grad_norm": 1.1477280459359918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422050 + }, + { + "epoch": 2.046914726839991, + "grad_norm": 9.629721020587567e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422060 + }, + { + "epoch": 2.0469632250328273, + "grad_norm": 1.2766446566558898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422070 + }, + { + "epoch": 2.0470117232256633, + "grad_norm": 1.4242001356024048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422080 + }, + { + "epoch": 2.047060221418499, + "grad_norm": 9.764383079868821e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422090 + }, + { + "epoch": 2.0471087196113356, + "grad_norm": 1.2280042760437482e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422100 + }, + { + "epoch": 2.0471572178041715, + "grad_norm": 7.889586761677947e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422110 + }, + { + "epoch": 2.047205715997008, + "grad_norm": 1.4628891875645422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422120 + }, + { + "epoch": 2.0472542141898438, + "grad_norm": 1.1205067096398125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422130 + }, + { + "epoch": 2.0473027123826797, + "grad_norm": 1.4221524402557861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422140 + }, + { + "epoch": 2.047351210575516, + "grad_norm": 1.120087400607872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422150 + }, + { + "epoch": 2.047399708768352, + "grad_norm": 8.166674447807054e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422160 + }, + { + "epoch": 2.047448206961188, + "grad_norm": 1.1777049557792907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422170 + }, + { + "epoch": 2.0474967051540243, + "grad_norm": 9.146927659742232e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422180 + }, + { + "epoch": 2.04754520334686, + "grad_norm": 1.2771132595901236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422190 + }, + { + "epoch": 2.0475937015396966, + "grad_norm": 1.3174248358893692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422200 + }, + { + "epoch": 2.0476421997325325, + "grad_norm": 1.0475748268845564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422210 + }, + { + "epoch": 2.0476906979253684, + "grad_norm": 1.101851587748115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422220 + }, + { + "epoch": 2.0477391961182048, + "grad_norm": 1.34658444395086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422230 + }, + { + "epoch": 2.0477876943110407, + "grad_norm": 1.0186755439178796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422240 + }, + { + "epoch": 2.0478361925038766, + "grad_norm": 1.2725827502890752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422250 + }, + { + "epoch": 2.047884690696713, + "grad_norm": 1.3216978622665465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422260 + }, + { + "epoch": 2.047933188889549, + "grad_norm": 6.355791448697801e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422270 + }, + { + "epoch": 2.0479816870823853, + "grad_norm": 1.4679713444820663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422280 + }, + { + "epoch": 2.048030185275221, + "grad_norm": 1.0858467014429607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422290 + }, + { + "epoch": 2.048078683468057, + "grad_norm": 1.3856556790869945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422300 + }, + { + "epoch": 2.0481271816608935, + "grad_norm": 1.0111120829492393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422310 + }, + { + "epoch": 2.0481756798537294, + "grad_norm": 2.017362099593356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422320 + }, + { + "epoch": 2.0482241780465658, + "grad_norm": 1.512239400369708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422330 + }, + { + "epoch": 2.0482726762394017, + "grad_norm": 9.498218211945186e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422340 + }, + { + "epoch": 2.0483211744322376, + "grad_norm": 1.2029338414265567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422350 + }, + { + "epoch": 2.048369672625074, + "grad_norm": 1.0780656367614938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422360 + }, + { + "epoch": 2.04841817081791, + "grad_norm": 1.4668376735471611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422370 + }, + { + "epoch": 2.048466669010746, + "grad_norm": 1.13838591886406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422380 + }, + { + "epoch": 2.048515167203582, + "grad_norm": 1.3501234796819972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422390 + }, + { + "epoch": 2.048563665396418, + "grad_norm": 1.7763419180027995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422400 + }, + { + "epoch": 2.0486121635892545, + "grad_norm": 8.482126112596688e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422410 + }, + { + "epoch": 2.0486606617820904, + "grad_norm": 1.0612525080944124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422420 + }, + { + "epoch": 2.0487091599749263, + "grad_norm": 1.219835965571292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422430 + }, + { + "epoch": 2.0487576581677627, + "grad_norm": 1.8193832218571515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422440 + }, + { + "epoch": 2.0488061563605986, + "grad_norm": 1.0567594799226754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422450 + }, + { + "epoch": 2.0488546545534345, + "grad_norm": 1.647201486321137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422460 + }, + { + "epoch": 2.048903152746271, + "grad_norm": 1.84081923038093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422470 + }, + { + "epoch": 2.048951650939107, + "grad_norm": 1.6393395085856355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422480 + }, + { + "epoch": 2.049000149131943, + "grad_norm": 1.5351950821695937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422490 + }, + { + "epoch": 2.049048647324779, + "grad_norm": 1.2089588885544345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422500 + }, + { + "epoch": 2.049097145517615, + "grad_norm": 1.1842903546721573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422510 + }, + { + "epoch": 2.0491456437104514, + "grad_norm": 1.3222872574658595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422520 + }, + { + "epoch": 2.0491941419032873, + "grad_norm": 1.6378901790403688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422530 + }, + { + "epoch": 2.0492426400961232, + "grad_norm": 7.45910710975295e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422540 + }, + { + "epoch": 2.0492911382889596, + "grad_norm": 9.980518633767588e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422550 + }, + { + "epoch": 2.0493396364817955, + "grad_norm": 1.3327666081863754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422560 + }, + { + "epoch": 2.049388134674632, + "grad_norm": 6.7963288330474825e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422570 + }, + { + "epoch": 2.049436632867468, + "grad_norm": 1.62975961615075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422580 + }, + { + "epoch": 2.0494851310603037, + "grad_norm": 1.6022818627448032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422590 + }, + { + "epoch": 2.04953362925314, + "grad_norm": 1.002837457519945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422600 + }, + { + "epoch": 2.049582127445976, + "grad_norm": 6.932213914012664e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422610 + }, + { + "epoch": 2.049630625638812, + "grad_norm": 1.7138001240368794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422620 + }, + { + "epoch": 2.0496791238316483, + "grad_norm": 8.991322353324449e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422630 + }, + { + "epoch": 2.049727622024484, + "grad_norm": 1.18904468493497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422640 + }, + { + "epoch": 2.0497761202173206, + "grad_norm": 9.942639600524217e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422650 + }, + { + "epoch": 2.0498246184101565, + "grad_norm": 9.456246452543837e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422660 + }, + { + "epoch": 2.0498731166029924, + "grad_norm": 1.0418820473034884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422670 + }, + { + "epoch": 2.049921614795829, + "grad_norm": 8.741862345118534e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422680 + }, + { + "epoch": 2.0499701129886647, + "grad_norm": 1.639048186063974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422690 + }, + { + "epoch": 2.0500186111815006, + "grad_norm": 1.2445115160630849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422700 + }, + { + "epoch": 2.050067109374337, + "grad_norm": 1.4976610174244342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422710 + }, + { + "epoch": 2.050115607567173, + "grad_norm": 1.5269854714006215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422720 + }, + { + "epoch": 2.0501641057600093, + "grad_norm": 1.260963866656084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422730 + }, + { + "epoch": 2.050212603952845, + "grad_norm": 1.1295512081233028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422740 + }, + { + "epoch": 2.050261102145681, + "grad_norm": 1.1212645922853426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422750 + }, + { + "epoch": 2.0503096003385175, + "grad_norm": 1.269218774524461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422760 + }, + { + "epoch": 2.0503580985313534, + "grad_norm": 1.036626251504913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422770 + }, + { + "epoch": 2.05040659672419, + "grad_norm": 1.0252457549597693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422780 + }, + { + "epoch": 2.0504550949170257, + "grad_norm": 1.0323512711352123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422790 + }, + { + "epoch": 2.0505035931098616, + "grad_norm": 9.33379507017662e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422800 + }, + { + "epoch": 2.050552091302698, + "grad_norm": 1.1079809958403075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422810 + }, + { + "epoch": 2.050600589495534, + "grad_norm": 6.375866501429073e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422820 + }, + { + "epoch": 2.05064908768837, + "grad_norm": 1.65830513765286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422830 + }, + { + "epoch": 2.050697585881206, + "grad_norm": 9.414986124056668e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422840 + }, + { + "epoch": 2.050746084074042, + "grad_norm": 6.48691367288734e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422850 + }, + { + "epoch": 2.0507945822668785, + "grad_norm": 1.2698255780208001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422860 + }, + { + "epoch": 2.0508430804597144, + "grad_norm": 1.1999428117803745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422870 + }, + { + "epoch": 2.0508915786525503, + "grad_norm": 9.800449340957584e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422880 + }, + { + "epoch": 2.0509400768453867, + "grad_norm": 2.481259464559571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422890 + }, + { + "epoch": 2.0509885750382226, + "grad_norm": 1.4537190118346643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422900 + }, + { + "epoch": 2.0510370732310585, + "grad_norm": 1.668349547401249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422910 + }, + { + "epoch": 2.051085571423895, + "grad_norm": 1.4339730292078912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422920 + }, + { + "epoch": 2.051134069616731, + "grad_norm": 8.039097387779748e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422930 + }, + { + "epoch": 2.051182567809567, + "grad_norm": 1.4840503048674236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422940 + }, + { + "epoch": 2.051231066002403, + "grad_norm": 1.2149363293190163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422950 + }, + { + "epoch": 2.051279564195239, + "grad_norm": 8.399939410708157e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422960 + }, + { + "epoch": 2.0513280623880754, + "grad_norm": 1.5280487986046865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422970 + }, + { + "epoch": 2.0513765605809113, + "grad_norm": 1.109892799888712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422980 + }, + { + "epoch": 2.0514250587737473, + "grad_norm": 1.1032182278825076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 422990 + }, + { + "epoch": 2.0514735569665836, + "grad_norm": 9.42753874966229e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423000 + }, + { + "epoch": 2.0515220551594195, + "grad_norm": 1.0257794613721671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423010 + }, + { + "epoch": 2.051570553352256, + "grad_norm": 1.5133105435438665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423020 + }, + { + "epoch": 2.051619051545092, + "grad_norm": 7.002423974000749e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423030 + }, + { + "epoch": 2.0516675497379278, + "grad_norm": 7.659614276178672e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423040 + }, + { + "epoch": 2.051716047930764, + "grad_norm": 8.294200881664437e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423050 + }, + { + "epoch": 2.0517645461236, + "grad_norm": 1.2387943115754751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423060 + }, + { + "epoch": 2.051813044316436, + "grad_norm": 9.578853266134502e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423070 + }, + { + "epoch": 2.0518615425092723, + "grad_norm": 1.1661546395203004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423080 + }, + { + "epoch": 2.0519100407021083, + "grad_norm": 9.936097278284706e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423090 + }, + { + "epoch": 2.0519585388949446, + "grad_norm": 1.1188972415254739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423100 + }, + { + "epoch": 2.0520070370877805, + "grad_norm": 1.2261252457790306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423110 + }, + { + "epoch": 2.0520555352806165, + "grad_norm": 1.0701010744185169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423120 + }, + { + "epoch": 2.052104033473453, + "grad_norm": 1.3540212506768512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423130 + }, + { + "epoch": 2.0521525316662887, + "grad_norm": 9.529355970983033e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423140 + }, + { + "epoch": 2.0522010298591247, + "grad_norm": 1.3625820471929728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423150 + }, + { + "epoch": 2.052249528051961, + "grad_norm": 9.938996292646607e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423160 + }, + { + "epoch": 2.052298026244797, + "grad_norm": 9.80158798569164e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423170 + }, + { + "epoch": 2.0523465244376333, + "grad_norm": 6.904711913335859e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423180 + }, + { + "epoch": 2.0523950226304692, + "grad_norm": 2.3036850649305052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423190 + }, + { + "epoch": 2.052443520823305, + "grad_norm": 1.1246051201396767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423200 + }, + { + "epoch": 2.0524920190161415, + "grad_norm": 1.7871848001504986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423210 + }, + { + "epoch": 2.0525405172089775, + "grad_norm": 1.3380588193001586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423220 + }, + { + "epoch": 2.0525890154018134, + "grad_norm": 1.6173510530848034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423230 + }, + { + "epoch": 2.0526375135946497, + "grad_norm": 1.3539216858760028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423240 + }, + { + "epoch": 2.0526860117874857, + "grad_norm": 1.0908147274335533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423250 + }, + { + "epoch": 2.052734509980322, + "grad_norm": 2.195472248445185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423260 + }, + { + "epoch": 2.052783008173158, + "grad_norm": 1.6656134249615206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423270 + }, + { + "epoch": 2.052831506365994, + "grad_norm": 9.088324759431998e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423280 + }, + { + "epoch": 2.0528800045588302, + "grad_norm": 9.393880340269334e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423290 + }, + { + "epoch": 2.052928502751666, + "grad_norm": 1.1738559457796782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423300 + }, + { + "epoch": 2.0529770009445025, + "grad_norm": 1.0404600736535485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423310 + }, + { + "epoch": 2.0530254991373384, + "grad_norm": 7.868715456993414e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423320 + }, + { + "epoch": 2.0530739973301744, + "grad_norm": 1.0560389007707727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423330 + }, + { + "epoch": 2.0531224955230107, + "grad_norm": 1.3215797345367264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423340 + }, + { + "epoch": 2.0531709937158467, + "grad_norm": 1.135477312175226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423350 + }, + { + "epoch": 2.0532194919086826, + "grad_norm": 1.0273311978892252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423360 + }, + { + "epoch": 2.053267990101519, + "grad_norm": 1.1795376231305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423370 + }, + { + "epoch": 2.053316488294355, + "grad_norm": 1.2372082913714166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423380 + }, + { + "epoch": 2.0533649864871912, + "grad_norm": 8.204527723876254e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423390 + }, + { + "epoch": 2.053413484680027, + "grad_norm": 1.5451261603516286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423400 + }, + { + "epoch": 2.053461982872863, + "grad_norm": 1.7181321254611248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423410 + }, + { + "epoch": 2.0535104810656994, + "grad_norm": 2.227282536182429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423420 + }, + { + "epoch": 2.0535589792585354, + "grad_norm": 1.2692074058406888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423430 + }, + { + "epoch": 2.0536074774513713, + "grad_norm": 1.117131454009268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423440 + }, + { + "epoch": 2.0536559756442077, + "grad_norm": 8.884889268756524e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423450 + }, + { + "epoch": 2.0537044738370436, + "grad_norm": 1.4392886882319544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423460 + }, + { + "epoch": 2.05375297202988, + "grad_norm": 8.816384955423473e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423470 + }, + { + "epoch": 2.053801470222716, + "grad_norm": 1.781834768621593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423480 + }, + { + "epoch": 2.053849968415552, + "grad_norm": 7.435156934576526e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423490 + }, + { + "epoch": 2.053898466608388, + "grad_norm": 7.705742710584218e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423500 + }, + { + "epoch": 2.053946964801224, + "grad_norm": 8.100094817109493e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423510 + }, + { + "epoch": 2.05399546299406, + "grad_norm": 1.600528953815683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423520 + }, + { + "epoch": 2.0540439611868964, + "grad_norm": 6.629773618982426e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423530 + }, + { + "epoch": 2.0540924593797323, + "grad_norm": 9.759689056920706e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423540 + }, + { + "epoch": 2.0541409575725686, + "grad_norm": 1.001395055766352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423550 + }, + { + "epoch": 2.0541894557654046, + "grad_norm": 9.98134197516265e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423560 + }, + { + "epoch": 2.0542379539582405, + "grad_norm": 1.0710270892388962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423570 + }, + { + "epoch": 2.054286452151077, + "grad_norm": 8.222166059113079e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423580 + }, + { + "epoch": 2.054334950343913, + "grad_norm": 1.3209370486322314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423590 + }, + { + "epoch": 2.0543834485367487, + "grad_norm": 7.749094699249781e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423600 + }, + { + "epoch": 2.054431946729585, + "grad_norm": 7.399119539286403e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423610 + }, + { + "epoch": 2.054480444922421, + "grad_norm": 1.2453413411606107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423620 + }, + { + "epoch": 2.0545289431152574, + "grad_norm": 1.2643359248443176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423630 + }, + { + "epoch": 2.0545774413080933, + "grad_norm": 1.4214186272454299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423640 + }, + { + "epoch": 2.054625939500929, + "grad_norm": 1.5427694677327963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423650 + }, + { + "epoch": 2.0546744376937656, + "grad_norm": 1.1785536990771561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423660 + }, + { + "epoch": 2.0547229358866015, + "grad_norm": 1.714927222451479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423670 + }, + { + "epoch": 2.0547714340794374, + "grad_norm": 1.4463997999314415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423680 + }, + { + "epoch": 2.0548199322722738, + "grad_norm": 1.4207275356170612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423690 + }, + { + "epoch": 2.0548684304651097, + "grad_norm": 1.2708022190111024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423700 + }, + { + "epoch": 2.054916928657946, + "grad_norm": 9.312929094562605e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423710 + }, + { + "epoch": 2.054965426850782, + "grad_norm": 1.3515180086187684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423720 + }, + { + "epoch": 2.055013925043618, + "grad_norm": 2.0061055039377607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423730 + }, + { + "epoch": 2.0550624232364543, + "grad_norm": 7.02315228195971e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423740 + }, + { + "epoch": 2.05511092142929, + "grad_norm": 1.3037983137564879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423750 + }, + { + "epoch": 2.055159419622126, + "grad_norm": 1.3716870306268447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423760 + }, + { + "epoch": 2.0552079178149625, + "grad_norm": 1.283761097425895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423770 + }, + { + "epoch": 2.0552564160077984, + "grad_norm": 1.081092193544464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423780 + }, + { + "epoch": 2.0553049142006348, + "grad_norm": 8.486361835480238e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423790 + }, + { + "epoch": 2.0553534123934707, + "grad_norm": 1.3702003087701087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423800 + }, + { + "epoch": 2.0554019105863066, + "grad_norm": 1.4893500654977743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423810 + }, + { + "epoch": 2.055450408779143, + "grad_norm": 1.2730437148888996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423820 + }, + { + "epoch": 2.055498906971979, + "grad_norm": 1.4917107549194952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423830 + }, + { + "epoch": 2.0555474051648153, + "grad_norm": 9.74167146949867e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423840 + }, + { + "epoch": 2.055595903357651, + "grad_norm": 1.2943276672672255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423850 + }, + { + "epoch": 2.055644401550487, + "grad_norm": 1.7198951596242296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423860 + }, + { + "epoch": 2.0556928997433235, + "grad_norm": 1.204170896329515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423870 + }, + { + "epoch": 2.0557413979361594, + "grad_norm": 1.7694443243954083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423880 + }, + { + "epoch": 2.0557898961289953, + "grad_norm": 1.0975250042122298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423890 + }, + { + "epoch": 2.0558383943218317, + "grad_norm": 1.366677526704052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423900 + }, + { + "epoch": 2.0558868925146676, + "grad_norm": 1.2319700815055512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423910 + }, + { + "epoch": 2.055935390707504, + "grad_norm": 9.969530978537477e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423920 + }, + { + "epoch": 2.05598388890034, + "grad_norm": 1.1232680563466602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423930 + }, + { + "epoch": 2.056032387093176, + "grad_norm": 1.1915171072018893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423940 + }, + { + "epoch": 2.056080885286012, + "grad_norm": 1.1758967133346232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423950 + }, + { + "epoch": 2.056129383478848, + "grad_norm": 1.1114602571637988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423960 + }, + { + "epoch": 2.056177881671684, + "grad_norm": 1.0950524931274686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423970 + }, + { + "epoch": 2.0562263798645204, + "grad_norm": 1.0619539914102916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423980 + }, + { + "epoch": 2.0562748780573563, + "grad_norm": 1.4956878402472285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 423990 + }, + { + "epoch": 2.0563233762501927, + "grad_norm": 1.0183062393309683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424000 + }, + { + "epoch": 2.0563718744430286, + "grad_norm": 1.17511254060787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424010 + }, + { + "epoch": 2.0564203726358645, + "grad_norm": 1.61855737701444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424020 + }, + { + "epoch": 2.056468870828701, + "grad_norm": 1.526933246509543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424030 + }, + { + "epoch": 2.056517369021537, + "grad_norm": 1.1563900947919592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424040 + }, + { + "epoch": 2.0565658672143727, + "grad_norm": 1.586847986345674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424050 + }, + { + "epoch": 2.056614365407209, + "grad_norm": 1.669933702430626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424060 + }, + { + "epoch": 2.056662863600045, + "grad_norm": 1.0596360233705582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424070 + }, + { + "epoch": 2.0567113617928814, + "grad_norm": 8.938696005600377e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424080 + }, + { + "epoch": 2.0567598599857173, + "grad_norm": 1.245073910638439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424090 + }, + { + "epoch": 2.0568083581785532, + "grad_norm": 1.5053275959076018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424100 + }, + { + "epoch": 2.0568568563713896, + "grad_norm": 1.1607523831003164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424110 + }, + { + "epoch": 2.0569053545642255, + "grad_norm": 9.698315928119428e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424120 + }, + { + "epoch": 2.0569538527570614, + "grad_norm": 1.5320408053298706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424130 + }, + { + "epoch": 2.057002350949898, + "grad_norm": 6.832849841487132e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424140 + }, + { + "epoch": 2.0570508491427337, + "grad_norm": 1.4774664158778705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424150 + }, + { + "epoch": 2.05709934733557, + "grad_norm": 1.4288830563202737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424160 + }, + { + "epoch": 2.057147845528406, + "grad_norm": 1.0372073866449227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424170 + }, + { + "epoch": 2.057196343721242, + "grad_norm": 1.0741816325321452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424180 + }, + { + "epoch": 2.0572448419140783, + "grad_norm": 1.6724285956115637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424190 + }, + { + "epoch": 2.0572933401069142, + "grad_norm": 1.192545795447586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424200 + }, + { + "epoch": 2.05734183829975, + "grad_norm": 8.814298624315597e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424210 + }, + { + "epoch": 2.0573903364925865, + "grad_norm": 1.602851540383199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424220 + }, + { + "epoch": 2.0574388346854224, + "grad_norm": 8.258600026067597e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424230 + }, + { + "epoch": 2.057487332878259, + "grad_norm": 9.994684191383385e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424240 + }, + { + "epoch": 2.0575358310710947, + "grad_norm": 9.18318843190491e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424250 + }, + { + "epoch": 2.0575843292639306, + "grad_norm": 1.5456043556127952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424260 + }, + { + "epoch": 2.057632827456767, + "grad_norm": 1.1734455185319348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424270 + }, + { + "epoch": 2.057681325649603, + "grad_norm": 1.5108547302133957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424280 + }, + { + "epoch": 2.057729823842439, + "grad_norm": 1.1930366028423123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424290 + }, + { + "epoch": 2.057778322035275, + "grad_norm": 1.2723033293582375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424300 + }, + { + "epoch": 2.057826820228111, + "grad_norm": 1.2491115697343957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424310 + }, + { + "epoch": 2.0578753184209475, + "grad_norm": 1.1107385233799505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424320 + }, + { + "epoch": 2.0579238166137834, + "grad_norm": 1.1398822330477287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424330 + }, + { + "epoch": 2.0579723148066194, + "grad_norm": 2.2252333309324968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424340 + }, + { + "epoch": 2.0580208129994557, + "grad_norm": 1.573372543361984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424350 + }, + { + "epoch": 2.0580693111922916, + "grad_norm": 9.947748402794332e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424360 + }, + { + "epoch": 2.058117809385128, + "grad_norm": 1.686724004912321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424370 + }, + { + "epoch": 2.058166307577964, + "grad_norm": 9.117210986175905e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424380 + }, + { + "epoch": 2.0582148057708, + "grad_norm": 1.1155121271144708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424390 + }, + { + "epoch": 2.058263303963636, + "grad_norm": 7.138911239934487e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424400 + }, + { + "epoch": 2.058311802156472, + "grad_norm": 1.0580539999693883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424410 + }, + { + "epoch": 2.058360300349308, + "grad_norm": 1.1703050084577171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424420 + }, + { + "epoch": 2.0584087985421444, + "grad_norm": 1.6142521985784697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424430 + }, + { + "epoch": 2.0584572967349803, + "grad_norm": 1.6022536186710568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424440 + }, + { + "epoch": 2.0585057949278167, + "grad_norm": 1.0815677242703714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424450 + }, + { + "epoch": 2.0585542931206526, + "grad_norm": 1.2647450198244314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424460 + }, + { + "epoch": 2.0586027913134886, + "grad_norm": 9.467568062859755e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424470 + }, + { + "epoch": 2.058651289506325, + "grad_norm": 1.3226313377856513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424480 + }, + { + "epoch": 2.058699787699161, + "grad_norm": 1.0389193505488947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424490 + }, + { + "epoch": 2.0587482858919968, + "grad_norm": 1.1811468247913126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424500 + }, + { + "epoch": 2.058796784084833, + "grad_norm": 1.4674943926706874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424510 + }, + { + "epoch": 2.058845282277669, + "grad_norm": 1.5366431682650727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424520 + }, + { + "epoch": 2.0588937804705054, + "grad_norm": 1.9582373056437063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424530 + }, + { + "epoch": 2.0589422786633413, + "grad_norm": 1.2751554478995786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424540 + }, + { + "epoch": 2.0589907768561773, + "grad_norm": 8.306527909951456e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424550 + }, + { + "epoch": 2.0590392750490136, + "grad_norm": 9.827604507961496e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424560 + }, + { + "epoch": 2.0590877732418496, + "grad_norm": 1.0570309072477357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424570 + }, + { + "epoch": 2.0591362714346855, + "grad_norm": 2.4367709627881595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424580 + }, + { + "epoch": 2.059184769627522, + "grad_norm": 1.2788164305277405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424590 + }, + { + "epoch": 2.0592332678203578, + "grad_norm": 1.2218444922496019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424600 + }, + { + "epoch": 2.059281766013194, + "grad_norm": 1.661599213775844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424610 + }, + { + "epoch": 2.05933026420603, + "grad_norm": 1.8422607439561034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424620 + }, + { + "epoch": 2.059378762398866, + "grad_norm": 9.858081462255086e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424630 + }, + { + "epoch": 2.0594272605917023, + "grad_norm": 1.4491895683477196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424640 + }, + { + "epoch": 2.0594757587845383, + "grad_norm": 1.1447295555910841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424650 + }, + { + "epoch": 2.059524256977374, + "grad_norm": 8.865582046269083e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424660 + }, + { + "epoch": 2.0595727551702105, + "grad_norm": 1.0576068021350693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424670 + }, + { + "epoch": 2.0596212533630465, + "grad_norm": 1.3060273751364093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424680 + }, + { + "epoch": 2.059669751555883, + "grad_norm": 8.404687612539874e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424690 + }, + { + "epoch": 2.0597182497487188, + "grad_norm": 1.6112737810658473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424700 + }, + { + "epoch": 2.0597667479415547, + "grad_norm": 1.758320777867084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424710 + }, + { + "epoch": 2.059815246134391, + "grad_norm": 1.2485412703711063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424720 + }, + { + "epoch": 2.059863744327227, + "grad_norm": 1.6855906892487837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424730 + }, + { + "epoch": 2.059912242520063, + "grad_norm": 8.666305006954644e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424740 + }, + { + "epoch": 2.0599607407128993, + "grad_norm": 1.52148640353289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424750 + }, + { + "epoch": 2.060009238905735, + "grad_norm": 1.0847750253617505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424760 + }, + { + "epoch": 2.0600577370985715, + "grad_norm": 1.4341431153752637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424770 + }, + { + "epoch": 2.0601062352914075, + "grad_norm": 1.0333653932548259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424780 + }, + { + "epoch": 2.0601547334842434, + "grad_norm": 1.4577989482233988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424790 + }, + { + "epoch": 2.0602032316770797, + "grad_norm": 1.5120013685532285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424800 + }, + { + "epoch": 2.0602517298699157, + "grad_norm": 1.3434201306949944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424810 + }, + { + "epoch": 2.0603002280627516, + "grad_norm": 9.4766141600644e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424820 + }, + { + "epoch": 2.060348726255588, + "grad_norm": 9.137947287740644e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424830 + }, + { + "epoch": 2.060397224448424, + "grad_norm": 1.288114948039265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424840 + }, + { + "epoch": 2.0604457226412602, + "grad_norm": 1.069718802426678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424850 + }, + { + "epoch": 2.060494220834096, + "grad_norm": 1.0738332001380968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424860 + }, + { + "epoch": 2.060542719026932, + "grad_norm": 7.0638530580424685e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424870 + }, + { + "epoch": 2.0605912172197685, + "grad_norm": 1.8479692442952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424880 + }, + { + "epoch": 2.0606397154126044, + "grad_norm": 7.543019542310958e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424890 + }, + { + "epoch": 2.0606882136054407, + "grad_norm": 1.0373258696461107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424900 + }, + { + "epoch": 2.0607367117982767, + "grad_norm": 6.487437254065753e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424910 + }, + { + "epoch": 2.0607852099911126, + "grad_norm": 8.336630941130352e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424920 + }, + { + "epoch": 2.060833708183949, + "grad_norm": 8.429840825385781e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424930 + }, + { + "epoch": 2.060882206376785, + "grad_norm": 1.605718225050623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424940 + }, + { + "epoch": 2.060930704569621, + "grad_norm": 7.467924056925312e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424950 + }, + { + "epoch": 2.060979202762457, + "grad_norm": 1.5434675759706806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424960 + }, + { + "epoch": 2.061027700955293, + "grad_norm": 8.752503610764961e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424970 + }, + { + "epoch": 2.0610761991481295, + "grad_norm": 1.3279656485565283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424980 + }, + { + "epoch": 2.0611246973409654, + "grad_norm": 1.185807807502215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 424990 + }, + { + "epoch": 2.0611731955338013, + "grad_norm": 1.2495971368764458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425000 + }, + { + "epoch": 2.0612216937266377, + "grad_norm": 1.1026697777083427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425010 + }, + { + "epoch": 2.0612701919194736, + "grad_norm": 7.944175095531136e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425020 + }, + { + "epoch": 2.0613186901123095, + "grad_norm": 9.82714709607535e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425030 + }, + { + "epoch": 2.061367188305146, + "grad_norm": 2.3539787008530766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425040 + }, + { + "epoch": 2.061415686497982, + "grad_norm": 1.1636186236785306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425050 + }, + { + "epoch": 2.061464184690818, + "grad_norm": 7.046471850458147e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425060 + }, + { + "epoch": 2.061512682883654, + "grad_norm": 2.138684607189134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425070 + }, + { + "epoch": 2.06156118107649, + "grad_norm": 1.776139413323108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425080 + }, + { + "epoch": 2.0616096792693264, + "grad_norm": 1.1680286959858677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425090 + }, + { + "epoch": 2.0616581774621623, + "grad_norm": 7.73097230677422e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425100 + }, + { + "epoch": 2.061706675654998, + "grad_norm": 9.802219480548047e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425110 + }, + { + "epoch": 2.0617551738478346, + "grad_norm": 9.679595791567408e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425120 + }, + { + "epoch": 2.0618036720406705, + "grad_norm": 1.1947490996533361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425130 + }, + { + "epoch": 2.061852170233507, + "grad_norm": 7.3325154836823e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425140 + }, + { + "epoch": 2.061900668426343, + "grad_norm": 9.385559884833583e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425150 + }, + { + "epoch": 2.0619491666191787, + "grad_norm": 1.500019131128738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425160 + }, + { + "epoch": 2.061997664812015, + "grad_norm": 1.2836721019482411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425170 + }, + { + "epoch": 2.062046163004851, + "grad_norm": 9.229811581690228e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425180 + }, + { + "epoch": 2.062094661197687, + "grad_norm": 1.672297145205448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425190 + }, + { + "epoch": 2.0621431593905233, + "grad_norm": 1.1065632854467822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425200 + }, + { + "epoch": 2.062191657583359, + "grad_norm": 9.187119509590502e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425210 + }, + { + "epoch": 2.0622401557761956, + "grad_norm": 7.851736150144006e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425220 + }, + { + "epoch": 2.0622886539690315, + "grad_norm": 1.020700413079112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425230 + }, + { + "epoch": 2.0623371521618674, + "grad_norm": 1.2864791010258614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425240 + }, + { + "epoch": 2.062385650354704, + "grad_norm": 9.63300195166994e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425250 + }, + { + "epoch": 2.0624341485475397, + "grad_norm": 1.111803449305171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425260 + }, + { + "epoch": 2.0624826467403756, + "grad_norm": 1.3851436442280374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425270 + }, + { + "epoch": 2.062531144933212, + "grad_norm": 1.2192836962299225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425280 + }, + { + "epoch": 2.062579643126048, + "grad_norm": 1.78003709550012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425290 + }, + { + "epoch": 2.0626281413188843, + "grad_norm": 1.8717935645895523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425300 + }, + { + "epoch": 2.06267663951172, + "grad_norm": 7.95088705984881e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425310 + }, + { + "epoch": 2.062725137704556, + "grad_norm": 8.475878665592518e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425320 + }, + { + "epoch": 2.0627736358973925, + "grad_norm": 8.02525779164398e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425330 + }, + { + "epoch": 2.0628221340902284, + "grad_norm": 1.631516255429233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425340 + }, + { + "epoch": 2.0628706322830643, + "grad_norm": 7.472482188575214e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425350 + }, + { + "epoch": 2.0629191304759007, + "grad_norm": 9.259420785667771e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425360 + }, + { + "epoch": 2.0629676286687366, + "grad_norm": 1.2222250767024434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425370 + }, + { + "epoch": 2.063016126861573, + "grad_norm": 8.038135490551213e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425380 + }, + { + "epoch": 2.063064625054409, + "grad_norm": 1.5220212645772335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425390 + }, + { + "epoch": 2.063113123247245, + "grad_norm": 1.3023309541893013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425400 + }, + { + "epoch": 2.063161621440081, + "grad_norm": 1.7871430557647727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425410 + }, + { + "epoch": 2.063210119632917, + "grad_norm": 1.3208297566791316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425420 + }, + { + "epoch": 2.0632586178257535, + "grad_norm": 1.7188439116466725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425430 + }, + { + "epoch": 2.0633071160185894, + "grad_norm": 9.813835077920885e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425440 + }, + { + "epoch": 2.0633556142114253, + "grad_norm": 1.534415261517097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425450 + }, + { + "epoch": 2.0634041124042617, + "grad_norm": 1.6220305099068355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425460 + }, + { + "epoch": 2.0634526105970976, + "grad_norm": 1.6137500224999712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425470 + }, + { + "epoch": 2.0635011087899335, + "grad_norm": 6.014249542829475e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425480 + }, + { + "epoch": 2.06354960698277, + "grad_norm": 7.951173053299954e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425490 + }, + { + "epoch": 2.063598105175606, + "grad_norm": 1.5326749647215365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425500 + }, + { + "epoch": 2.063646603368442, + "grad_norm": 3.7705163435930444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425510 + }, + { + "epoch": 2.063695101561278, + "grad_norm": 1.4461055464209949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425520 + }, + { + "epoch": 2.063743599754114, + "grad_norm": 1.1593120241570887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425530 + }, + { + "epoch": 2.0637920979469504, + "grad_norm": 7.739699547926193e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425540 + }, + { + "epoch": 2.0638405961397863, + "grad_norm": 9.132373079978606e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425550 + }, + { + "epoch": 2.0638890943326222, + "grad_norm": 1.1366326546635719e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425560 + }, + { + "epoch": 2.0639375925254586, + "grad_norm": 9.823398094965796e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425570 + }, + { + "epoch": 2.0639860907182945, + "grad_norm": 1.7449577782713277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425580 + }, + { + "epoch": 2.064034588911131, + "grad_norm": 1.4395661551702688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425590 + }, + { + "epoch": 2.064083087103967, + "grad_norm": 1.3974875479050297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425600 + }, + { + "epoch": 2.0641315852968027, + "grad_norm": 1.40130760328816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425610 + }, + { + "epoch": 2.064180083489639, + "grad_norm": 1.3872014648086406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425620 + }, + { + "epoch": 2.064228581682475, + "grad_norm": 9.331681205537734e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425630 + }, + { + "epoch": 2.064277079875311, + "grad_norm": 1.0269627814807336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425640 + }, + { + "epoch": 2.0643255780681473, + "grad_norm": 6.891277770648685e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425650 + }, + { + "epoch": 2.0643740762609832, + "grad_norm": 1.077902300750111e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425660 + }, + { + "epoch": 2.0644225744538196, + "grad_norm": 9.859518534938161e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425670 + }, + { + "epoch": 2.0644710726466555, + "grad_norm": 1.1298227242662051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425680 + }, + { + "epoch": 2.0645195708394914, + "grad_norm": 6.985327871689151e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425690 + }, + { + "epoch": 2.064568069032328, + "grad_norm": 1.417976669415566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425700 + }, + { + "epoch": 2.0646165672251637, + "grad_norm": 9.45916855954465e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425710 + }, + { + "epoch": 2.0646650654179997, + "grad_norm": 8.996314804221583e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425720 + }, + { + "epoch": 2.064713563610836, + "grad_norm": 1.1391536602900487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425730 + }, + { + "epoch": 2.064762061803672, + "grad_norm": 1.0921296755839194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425740 + }, + { + "epoch": 2.0648105599965083, + "grad_norm": 1.37246498610466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425750 + }, + { + "epoch": 2.0648590581893442, + "grad_norm": 1.561148721407335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425760 + }, + { + "epoch": 2.06490755638218, + "grad_norm": 1.4470718845416286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425770 + }, + { + "epoch": 2.0649560545750165, + "grad_norm": 2.1835168340089695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425780 + }, + { + "epoch": 2.0650045527678524, + "grad_norm": 1.1346838135750659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425790 + }, + { + "epoch": 2.0650530509606884, + "grad_norm": 1.0020368534924273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425800 + }, + { + "epoch": 2.0651015491535247, + "grad_norm": 1.3697477818652715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425810 + }, + { + "epoch": 2.0651500473463607, + "grad_norm": 9.813025947380538e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425820 + }, + { + "epoch": 2.065198545539197, + "grad_norm": 1.181788178428178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425830 + }, + { + "epoch": 2.065247043732033, + "grad_norm": 1.5303124101251342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425840 + }, + { + "epoch": 2.065295541924869, + "grad_norm": 1.1269653654721878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425850 + }, + { + "epoch": 2.0653440401177052, + "grad_norm": 1.438478403059662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425860 + }, + { + "epoch": 2.065392538310541, + "grad_norm": 9.262491218464675e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425870 + }, + { + "epoch": 2.065441036503377, + "grad_norm": 1.475302990883165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425880 + }, + { + "epoch": 2.0654895346962134, + "grad_norm": 1.550873562905508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425890 + }, + { + "epoch": 2.0655380328890494, + "grad_norm": 8.026096232072177e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425900 + }, + { + "epoch": 2.0655865310818857, + "grad_norm": 7.925316403145644e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425910 + }, + { + "epoch": 2.0656350292747216, + "grad_norm": 1.5320328117240933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425920 + }, + { + "epoch": 2.0656835274675576, + "grad_norm": 8.645223203984642e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425930 + }, + { + "epoch": 2.065732025660394, + "grad_norm": 1.1493936469264554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425940 + }, + { + "epoch": 2.06578052385323, + "grad_norm": 1.1761376761398878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425950 + }, + { + "epoch": 2.065829022046066, + "grad_norm": 1.3651781927137563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425960 + }, + { + "epoch": 2.065877520238902, + "grad_norm": 1.6452888829121548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425970 + }, + { + "epoch": 2.065926018431738, + "grad_norm": 8.039402032977705e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425980 + }, + { + "epoch": 2.0659745166245744, + "grad_norm": 1.1311135139635553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 425990 + }, + { + "epoch": 2.0660230148174104, + "grad_norm": 7.452536365804008e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426000 + }, + { + "epoch": 2.0660715130102463, + "grad_norm": 7.478863750520759e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426010 + }, + { + "epoch": 2.0661200112030826, + "grad_norm": 8.502508030971967e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426020 + }, + { + "epoch": 2.0661685093959186, + "grad_norm": 1.694809981245271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426030 + }, + { + "epoch": 2.066217007588755, + "grad_norm": 1.026511231572158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426040 + }, + { + "epoch": 2.066265505781591, + "grad_norm": 1.148346484569629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426050 + }, + { + "epoch": 2.0663140039744268, + "grad_norm": 2.493591999552791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426060 + }, + { + "epoch": 2.066362502167263, + "grad_norm": 8.980205024045063e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426070 + }, + { + "epoch": 2.066411000360099, + "grad_norm": 1.0507409164972614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426080 + }, + { + "epoch": 2.066459498552935, + "grad_norm": 1.3370762275144443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426090 + }, + { + "epoch": 2.0665079967457713, + "grad_norm": 1.0101298464348929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426100 + }, + { + "epoch": 2.0665564949386073, + "grad_norm": 1.3600851112016699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426110 + }, + { + "epoch": 2.0666049931314436, + "grad_norm": 8.547901053646001e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426120 + }, + { + "epoch": 2.0666534913242796, + "grad_norm": 1.1445822067912559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426130 + }, + { + "epoch": 2.0667019895171155, + "grad_norm": 1.6855317142017157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426140 + }, + { + "epoch": 2.066750487709952, + "grad_norm": 1.1187653470301484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426150 + }, + { + "epoch": 2.0667989859027878, + "grad_norm": 1.4418170657393148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426160 + }, + { + "epoch": 2.0668474840956237, + "grad_norm": 1.5654720186830673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426170 + }, + { + "epoch": 2.06689598228846, + "grad_norm": 8.520308014681177e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426180 + }, + { + "epoch": 2.066944480481296, + "grad_norm": 1.2272669991375551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426190 + }, + { + "epoch": 2.0669929786741323, + "grad_norm": 1.598841059546885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426200 + }, + { + "epoch": 2.0670414768669683, + "grad_norm": 8.37265190511971e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426210 + }, + { + "epoch": 2.067089975059804, + "grad_norm": 1.1548620726387071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426220 + }, + { + "epoch": 2.0671384732526406, + "grad_norm": 1.2821214312452867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426230 + }, + { + "epoch": 2.0671869714454765, + "grad_norm": 2.0121012411777883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426240 + }, + { + "epoch": 2.0672354696383124, + "grad_norm": 1.4130851155869095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426250 + }, + { + "epoch": 2.0672839678311488, + "grad_norm": 9.175617599055386e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426260 + }, + { + "epoch": 2.0673324660239847, + "grad_norm": 1.445012109968502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426270 + }, + { + "epoch": 2.067380964216821, + "grad_norm": 6.978524869083458e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426280 + }, + { + "epoch": 2.067429462409657, + "grad_norm": 7.827543058169795e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426290 + }, + { + "epoch": 2.067477960602493, + "grad_norm": 1.3901330753185448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426300 + }, + { + "epoch": 2.0675264587953293, + "grad_norm": 1.3555416344956939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426310 + }, + { + "epoch": 2.067574956988165, + "grad_norm": 6.989325118667011e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426320 + }, + { + "epoch": 2.0676234551810015, + "grad_norm": 8.82640360799769e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426330 + }, + { + "epoch": 2.0676719533738375, + "grad_norm": 7.285561931524853e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426340 + }, + { + "epoch": 2.0677204515666734, + "grad_norm": 1.403389582321779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426350 + }, + { + "epoch": 2.0677689497595098, + "grad_norm": 1.1646100084306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426360 + }, + { + "epoch": 2.0678174479523457, + "grad_norm": 9.230055830755646e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426370 + }, + { + "epoch": 2.0678659461451816, + "grad_norm": 2.6754007009799352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426380 + }, + { + "epoch": 2.067914444338018, + "grad_norm": 9.69585034482634e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426390 + }, + { + "epoch": 2.067962942530854, + "grad_norm": 1.2433934770683663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426400 + }, + { + "epoch": 2.06801144072369, + "grad_norm": 1.624333734184802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426410 + }, + { + "epoch": 2.068059938916526, + "grad_norm": 1.2413616801154603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426420 + }, + { + "epoch": 2.068108437109362, + "grad_norm": 8.918703109372927e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426430 + }, + { + "epoch": 2.0681569353021985, + "grad_norm": 1.216385747682125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426440 + }, + { + "epoch": 2.0682054334950344, + "grad_norm": 1.382716163789155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426450 + }, + { + "epoch": 2.0682539316878703, + "grad_norm": 8.776035897994916e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426460 + }, + { + "epoch": 2.0683024298807067, + "grad_norm": 1.5521038676524768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426470 + }, + { + "epoch": 2.0683509280735426, + "grad_norm": 9.582238114091979e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426480 + }, + { + "epoch": 2.068399426266379, + "grad_norm": 7.957941861036488e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426490 + }, + { + "epoch": 2.068447924459215, + "grad_norm": 9.867304306965252e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426500 + }, + { + "epoch": 2.068496422652051, + "grad_norm": 1.3006501653478608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426510 + }, + { + "epoch": 2.068544920844887, + "grad_norm": 1.3321748149053292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426520 + }, + { + "epoch": 2.068593419037723, + "grad_norm": 1.474657285172043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426530 + }, + { + "epoch": 2.068641917230559, + "grad_norm": 1.3132335219268043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426540 + }, + { + "epoch": 2.0686904154233954, + "grad_norm": 1.8985240046731633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426550 + }, + { + "epoch": 2.0687389136162313, + "grad_norm": 8.754323488346927e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426560 + }, + { + "epoch": 2.0687874118090677, + "grad_norm": 2.090096273832387e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426570 + }, + { + "epoch": 2.0688359100019036, + "grad_norm": 1.1269511546174726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426580 + }, + { + "epoch": 2.0688844081947395, + "grad_norm": 6.579422340280416e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426590 + }, + { + "epoch": 2.068932906387576, + "grad_norm": 1.4086265487378569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426600 + }, + { + "epoch": 2.068981404580412, + "grad_norm": 1.3734451798086411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426610 + }, + { + "epoch": 2.0690299027732477, + "grad_norm": 1.1467372829088163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426620 + }, + { + "epoch": 2.069078400966084, + "grad_norm": 9.487703067634357e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426630 + }, + { + "epoch": 2.06912689915892, + "grad_norm": 1.867466892235825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426640 + }, + { + "epoch": 2.0691753973517564, + "grad_norm": 1.0789755755524766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426650 + }, + { + "epoch": 2.0692238955445923, + "grad_norm": 1.2121684100918628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426660 + }, + { + "epoch": 2.069272393737428, + "grad_norm": 9.76699165988748e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426670 + }, + { + "epoch": 2.0693208919302646, + "grad_norm": 1.4382329993622989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426680 + }, + { + "epoch": 2.0693693901231005, + "grad_norm": 1.6378340461642438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426690 + }, + { + "epoch": 2.0694178883159364, + "grad_norm": 1.4817850946258204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426700 + }, + { + "epoch": 2.069466386508773, + "grad_norm": 1.1785668441177677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426710 + }, + { + "epoch": 2.0695148847016087, + "grad_norm": 9.665289901761298e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426720 + }, + { + "epoch": 2.069563382894445, + "grad_norm": 1.4914073531713257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426730 + }, + { + "epoch": 2.069611881087281, + "grad_norm": 8.458592049009894e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426740 + }, + { + "epoch": 2.069660379280117, + "grad_norm": 7.168567073279064e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426750 + }, + { + "epoch": 2.0697088774729533, + "grad_norm": 1.0397990024557657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426760 + }, + { + "epoch": 2.069757375665789, + "grad_norm": 1.1620810980161878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426770 + }, + { + "epoch": 2.069805873858625, + "grad_norm": 8.168571596911534e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426780 + }, + { + "epoch": 2.0698543720514615, + "grad_norm": 1.0341995704266083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426790 + }, + { + "epoch": 2.0699028702442974, + "grad_norm": 7.214485009399141e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426800 + }, + { + "epoch": 2.069951368437134, + "grad_norm": 1.6140727865376903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426810 + }, + { + "epoch": 2.0699998666299697, + "grad_norm": 1.0977301734271805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426820 + }, + { + "epoch": 2.0700483648228056, + "grad_norm": 1.169600682970895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426830 + }, + { + "epoch": 2.070096863015642, + "grad_norm": 1.3470088155997928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426840 + }, + { + "epoch": 2.070145361208478, + "grad_norm": 8.866305023502719e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426850 + }, + { + "epoch": 2.0701938594013143, + "grad_norm": 1.4014491789282602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426860 + }, + { + "epoch": 2.07024235759415, + "grad_norm": 9.791573774009521e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426870 + }, + { + "epoch": 2.070290855786986, + "grad_norm": 1.699543084043853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426880 + }, + { + "epoch": 2.0703393539798225, + "grad_norm": 9.849515869575498e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426890 + }, + { + "epoch": 2.0703878521726584, + "grad_norm": 9.927325628211747e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426900 + }, + { + "epoch": 2.0704363503654943, + "grad_norm": 1.5101599970535062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426910 + }, + { + "epoch": 2.0704848485583307, + "grad_norm": 9.982072946002063e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426920 + }, + { + "epoch": 2.0705333467511666, + "grad_norm": 1.1569871283256816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426930 + }, + { + "epoch": 2.0705818449440025, + "grad_norm": 1.446869113408411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426940 + }, + { + "epoch": 2.070630343136839, + "grad_norm": 1.1431249724580539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426950 + }, + { + "epoch": 2.070678841329675, + "grad_norm": 9.78731140577338e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426960 + }, + { + "epoch": 2.070727339522511, + "grad_norm": 1.2003096294677107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426970 + }, + { + "epoch": 2.070775837715347, + "grad_norm": 5.651127121808486e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426980 + }, + { + "epoch": 2.070824335908183, + "grad_norm": 9.93060389475886e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 426990 + }, + { + "epoch": 2.0708728341010194, + "grad_norm": 1.889008949262916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427000 + }, + { + "epoch": 2.0709213322938553, + "grad_norm": 1.3117721131550297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427010 + }, + { + "epoch": 2.0709698304866917, + "grad_norm": 1.4928385638768304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427020 + }, + { + "epoch": 2.0710183286795276, + "grad_norm": 1.2228527523916455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427030 + }, + { + "epoch": 2.0710668268723635, + "grad_norm": 9.76874137137429e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427040 + }, + { + "epoch": 2.0711153250652, + "grad_norm": 1.925041992478782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427050 + }, + { + "epoch": 2.071163823258036, + "grad_norm": 8.346092705835417e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427060 + }, + { + "epoch": 2.0712123214508718, + "grad_norm": 1.3753933991722533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427070 + }, + { + "epoch": 2.071260819643708, + "grad_norm": 5.833337812788386e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427080 + }, + { + "epoch": 2.071309317836544, + "grad_norm": 1.2658320613923024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427090 + }, + { + "epoch": 2.0713578160293804, + "grad_norm": 2.1270755823366017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427100 + }, + { + "epoch": 2.0714063142222163, + "grad_norm": 1.3776374707674677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427110 + }, + { + "epoch": 2.0714548124150522, + "grad_norm": 7.650039712814305e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427120 + }, + { + "epoch": 2.0715033106078886, + "grad_norm": 9.006997814253737e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427130 + }, + { + "epoch": 2.0715518088007245, + "grad_norm": 1.0653711690622458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427140 + }, + { + "epoch": 2.0716003069935605, + "grad_norm": 9.642943332721643e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427150 + }, + { + "epoch": 2.071648805186397, + "grad_norm": 8.843310972395102e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427160 + }, + { + "epoch": 2.0716973033792327, + "grad_norm": 1.371176594489043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427170 + }, + { + "epoch": 2.071745801572069, + "grad_norm": 1.1160154578249148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427180 + }, + { + "epoch": 2.071794299764905, + "grad_norm": 1.231147095381857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427190 + }, + { + "epoch": 2.071842797957741, + "grad_norm": 9.801130573805494e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427200 + }, + { + "epoch": 2.0718912961505773, + "grad_norm": 1.150203399191696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427210 + }, + { + "epoch": 2.0719397943434132, + "grad_norm": 9.182992144474156e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427220 + }, + { + "epoch": 2.071988292536249, + "grad_norm": 1.1771267516280659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427230 + }, + { + "epoch": 2.0720367907290855, + "grad_norm": 1.8892055919650375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427240 + }, + { + "epoch": 2.0720852889219215, + "grad_norm": 1.5046358825543393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427250 + }, + { + "epoch": 2.072133787114758, + "grad_norm": 1.2989173292510259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427260 + }, + { + "epoch": 2.0721822853075937, + "grad_norm": 7.62529150932778e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427270 + }, + { + "epoch": 2.0722307835004297, + "grad_norm": 1.0195794430956084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427280 + }, + { + "epoch": 2.072279281693266, + "grad_norm": 8.932531159189239e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427290 + }, + { + "epoch": 2.072327779886102, + "grad_norm": 1.575788566299252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427300 + }, + { + "epoch": 2.072376278078938, + "grad_norm": 1.1773646058088616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427310 + }, + { + "epoch": 2.0724247762717742, + "grad_norm": 7.434509008419354e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427320 + }, + { + "epoch": 2.07247327446461, + "grad_norm": 9.09507136270804e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427330 + }, + { + "epoch": 2.0725217726574465, + "grad_norm": 9.11784869828125e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427340 + }, + { + "epoch": 2.0725702708502824, + "grad_norm": 1.1209948524992797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427350 + }, + { + "epoch": 2.0726187690431184, + "grad_norm": 1.0803151262450683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427360 + }, + { + "epoch": 2.0726672672359547, + "grad_norm": 1.3055883485435515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427370 + }, + { + "epoch": 2.0727157654287907, + "grad_norm": 7.893387277135844e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427380 + }, + { + "epoch": 2.072764263621627, + "grad_norm": 8.837766962699334e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427390 + }, + { + "epoch": 2.072812761814463, + "grad_norm": 1.2437206819981839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427400 + }, + { + "epoch": 2.072861260007299, + "grad_norm": 1.6019306769976538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427410 + }, + { + "epoch": 2.0729097582001352, + "grad_norm": 1.2582382247217083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427420 + }, + { + "epoch": 2.072958256392971, + "grad_norm": 1.507326530258979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427430 + }, + { + "epoch": 2.073006754585807, + "grad_norm": 1.2352352918298948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427440 + }, + { + "epoch": 2.0730552527786434, + "grad_norm": 7.291720560687054e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427450 + }, + { + "epoch": 2.0731037509714794, + "grad_norm": 1.2277301841834287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427460 + }, + { + "epoch": 2.0731522491643153, + "grad_norm": 1.2585338104997845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427470 + }, + { + "epoch": 2.0732007473571517, + "grad_norm": 9.077015583613957e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427480 + }, + { + "epoch": 2.0732492455499876, + "grad_norm": 1.462744769753499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427490 + }, + { + "epoch": 2.073297743742824, + "grad_norm": 9.484918628288597e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427500 + }, + { + "epoch": 2.07334624193566, + "grad_norm": 1.1619603945689505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427510 + }, + { + "epoch": 2.073394740128496, + "grad_norm": 1.2286993644750055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427520 + }, + { + "epoch": 2.073443238321332, + "grad_norm": 1.1232367924662867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427530 + }, + { + "epoch": 2.073491736514168, + "grad_norm": 8.918927818513112e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427540 + }, + { + "epoch": 2.0735402347070044, + "grad_norm": 1.2628302847872419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427550 + }, + { + "epoch": 2.0735887328998404, + "grad_norm": 1.330087062711982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427560 + }, + { + "epoch": 2.0736372310926763, + "grad_norm": 1.0450361465075275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427570 + }, + { + "epoch": 2.0736857292855126, + "grad_norm": 1.222762158192836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427580 + }, + { + "epoch": 2.0737342274783486, + "grad_norm": 1.1879554229210498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427590 + }, + { + "epoch": 2.0737827256711845, + "grad_norm": 1.2678507133045969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427600 + }, + { + "epoch": 2.073831223864021, + "grad_norm": 1.9667135475742725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427610 + }, + { + "epoch": 2.073879722056857, + "grad_norm": 1.4320588270777534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427620 + }, + { + "epoch": 2.073928220249693, + "grad_norm": 6.757449710903529e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427630 + }, + { + "epoch": 2.073976718442529, + "grad_norm": 1.4681278415196175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427640 + }, + { + "epoch": 2.074025216635365, + "grad_norm": 1.2746937727570185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427650 + }, + { + "epoch": 2.0740737148282014, + "grad_norm": 2.0758458063596663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427660 + }, + { + "epoch": 2.0741222130210373, + "grad_norm": 7.464503681831047e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427670 + }, + { + "epoch": 2.074170711213873, + "grad_norm": 1.0855006671306455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427680 + }, + { + "epoch": 2.0742192094067096, + "grad_norm": 1.464489240987632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427690 + }, + { + "epoch": 2.0742677075995455, + "grad_norm": 1.866143328754788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427700 + }, + { + "epoch": 2.074316205792382, + "grad_norm": 1.1563376922651969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427710 + }, + { + "epoch": 2.0743647039852178, + "grad_norm": 9.660185540383281e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427720 + }, + { + "epoch": 2.0744132021780537, + "grad_norm": 1.1779619946139519e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427730 + }, + { + "epoch": 2.07446170037089, + "grad_norm": 1.1852971937287293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427740 + }, + { + "epoch": 2.074510198563726, + "grad_norm": 9.046211779661917e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427750 + }, + { + "epoch": 2.074558696756562, + "grad_norm": 1.1177346159740864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427760 + }, + { + "epoch": 2.0746071949493983, + "grad_norm": 1.4316126062396961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427770 + }, + { + "epoch": 2.074655693142234, + "grad_norm": 1.129248694553553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427780 + }, + { + "epoch": 2.0747041913350706, + "grad_norm": 1.5619383120224484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427790 + }, + { + "epoch": 2.0747526895279065, + "grad_norm": 1.819293338201078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427800 + }, + { + "epoch": 2.0748011877207424, + "grad_norm": 1.508244373837897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427810 + }, + { + "epoch": 2.0748496859135788, + "grad_norm": 1.0282713347464778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427820 + }, + { + "epoch": 2.0748981841064147, + "grad_norm": 1.2527267223561012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427830 + }, + { + "epoch": 2.0749466822992506, + "grad_norm": 1.0095556390865568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427840 + }, + { + "epoch": 2.074995180492087, + "grad_norm": 1.4149954985498425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427850 + }, + { + "epoch": 2.075043678684923, + "grad_norm": 7.389930445356185e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427860 + }, + { + "epoch": 2.0750921768777593, + "grad_norm": 1.2098916535308035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427870 + }, + { + "epoch": 2.075140675070595, + "grad_norm": 7.257860090703616e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427880 + }, + { + "epoch": 2.075189173263431, + "grad_norm": 1.1218384443623108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427890 + }, + { + "epoch": 2.0752376714562675, + "grad_norm": 1.2590165354708915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427900 + }, + { + "epoch": 2.0752861696491034, + "grad_norm": 1.1001618283046355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427910 + }, + { + "epoch": 2.0753346678419398, + "grad_norm": 8.403830520364863e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427920 + }, + { + "epoch": 2.0753831660347757, + "grad_norm": 9.540459977586124e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427930 + }, + { + "epoch": 2.0754316642276116, + "grad_norm": 1.7007359076615103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427940 + }, + { + "epoch": 2.075480162420448, + "grad_norm": 1.3667717624343823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427950 + }, + { + "epoch": 2.075528660613284, + "grad_norm": 1.9147149643572448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427960 + }, + { + "epoch": 2.07557715880612, + "grad_norm": 1.2434457907772867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427970 + }, + { + "epoch": 2.075625656998956, + "grad_norm": 9.77706182681004e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427980 + }, + { + "epoch": 2.075674155191792, + "grad_norm": 1.2871431032124292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 427990 + }, + { + "epoch": 2.0757226533846285, + "grad_norm": 1.1313946224333904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428000 + }, + { + "epoch": 2.0757711515774644, + "grad_norm": 7.356619757814542e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428010 + }, + { + "epoch": 2.0758196497703003, + "grad_norm": 9.934410627465695e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428020 + }, + { + "epoch": 2.0758681479631367, + "grad_norm": 1.1289911228118399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428030 + }, + { + "epoch": 2.0759166461559726, + "grad_norm": 4.973977674183061e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428040 + }, + { + "epoch": 2.0759651443488085, + "grad_norm": 1.0636181713152837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428050 + }, + { + "epoch": 2.076013642541645, + "grad_norm": 1.0197480193596675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428060 + }, + { + "epoch": 2.076062140734481, + "grad_norm": 1.2483048372757821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428070 + }, + { + "epoch": 2.076110638927317, + "grad_norm": 1.3883812322035283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428080 + }, + { + "epoch": 2.076159137120153, + "grad_norm": 9.562627134585e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428090 + }, + { + "epoch": 2.076207635312989, + "grad_norm": 1.016050443780614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428100 + }, + { + "epoch": 2.0762561335058254, + "grad_norm": 1.3449828806244568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428110 + }, + { + "epoch": 2.0763046316986613, + "grad_norm": 1.2246854197428547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428120 + }, + { + "epoch": 2.0763531298914972, + "grad_norm": 1.4929234737337538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428130 + }, + { + "epoch": 2.0764016280843336, + "grad_norm": 1.2987906750083766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428140 + }, + { + "epoch": 2.0764501262771695, + "grad_norm": 1.086989165344221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428150 + }, + { + "epoch": 2.076498624470006, + "grad_norm": 1.3119789699089779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428160 + }, + { + "epoch": 2.076547122662842, + "grad_norm": 9.92592763537914e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428170 + }, + { + "epoch": 2.0765956208556777, + "grad_norm": 1.0962503793621181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428180 + }, + { + "epoch": 2.076644119048514, + "grad_norm": 1.0922691195958123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428190 + }, + { + "epoch": 2.07669261724135, + "grad_norm": 1.0033518904606353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428200 + }, + { + "epoch": 2.076741115434186, + "grad_norm": 9.533370537440078e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428210 + }, + { + "epoch": 2.0767896136270223, + "grad_norm": 6.5909673274688885e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428220 + }, + { + "epoch": 2.0768381118198582, + "grad_norm": 9.844616677412432e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428230 + }, + { + "epoch": 2.0768866100126946, + "grad_norm": 1.4918402513330875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428240 + }, + { + "epoch": 2.0769351082055305, + "grad_norm": 1.3339178650539907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428250 + }, + { + "epoch": 2.0769836063983664, + "grad_norm": 1.2153031470063524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428260 + }, + { + "epoch": 2.077032104591203, + "grad_norm": 1.5779352935396673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428270 + }, + { + "epoch": 2.0770806027840387, + "grad_norm": 2.0030459069175777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428280 + }, + { + "epoch": 2.0771291009768746, + "grad_norm": 1.1914768727194769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428290 + }, + { + "epoch": 2.077177599169711, + "grad_norm": 1.3396521225672586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428300 + }, + { + "epoch": 2.077226097362547, + "grad_norm": 9.870703365777445e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428310 + }, + { + "epoch": 2.0772745955553833, + "grad_norm": 1.506499991421606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428320 + }, + { + "epoch": 2.077323093748219, + "grad_norm": 1.0192119148655365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428330 + }, + { + "epoch": 2.077371591941055, + "grad_norm": 1.049320275114951e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428340 + }, + { + "epoch": 2.0774200901338915, + "grad_norm": 1.526702320120421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428350 + }, + { + "epoch": 2.0774685883267274, + "grad_norm": 1.0474392020398682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428360 + }, + { + "epoch": 2.0775170865195633, + "grad_norm": 1.1270303801325099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428370 + }, + { + "epoch": 2.0775655847123997, + "grad_norm": 1.461343934749948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428380 + }, + { + "epoch": 2.0776140829052356, + "grad_norm": 1.6547389236620802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428390 + }, + { + "epoch": 2.077662581098072, + "grad_norm": 9.093693797979086e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428400 + }, + { + "epoch": 2.077711079290908, + "grad_norm": 1.7349618630646546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428410 + }, + { + "epoch": 2.077759577483744, + "grad_norm": 1.5736439706870442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428420 + }, + { + "epoch": 2.07780807567658, + "grad_norm": 1.8766561638017265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428430 + }, + { + "epoch": 2.077856573869416, + "grad_norm": 1.326461784856292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428440 + }, + { + "epoch": 2.0779050720622525, + "grad_norm": 9.460909389247263e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428450 + }, + { + "epoch": 2.0779535702550884, + "grad_norm": 5.815632420080874e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428460 + }, + { + "epoch": 2.0780020684479243, + "grad_norm": 1.463017085256979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428470 + }, + { + "epoch": 2.0780505666407607, + "grad_norm": 1.349756839630345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428480 + }, + { + "epoch": 2.0780990648335966, + "grad_norm": 1.6265113700342226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428490 + }, + { + "epoch": 2.0781475630264326, + "grad_norm": 1.1361765750450559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428500 + }, + { + "epoch": 2.078196061219269, + "grad_norm": 9.83351711170144e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428510 + }, + { + "epoch": 2.078244559412105, + "grad_norm": 2.2008585176536144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428520 + }, + { + "epoch": 2.078293057604941, + "grad_norm": 1.0526385985087927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428530 + }, + { + "epoch": 2.078341555797777, + "grad_norm": 1.7279861097563298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428540 + }, + { + "epoch": 2.078390053990613, + "grad_norm": 1.0801538330440508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428550 + }, + { + "epoch": 2.0784385521834494, + "grad_norm": 7.028676751730245e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428560 + }, + { + "epoch": 2.0784870503762853, + "grad_norm": 7.208182495332949e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428570 + }, + { + "epoch": 2.0785355485691213, + "grad_norm": 1.6836517957585784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428580 + }, + { + "epoch": 2.0785840467619576, + "grad_norm": 1.1679306410883328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428590 + }, + { + "epoch": 2.0786325449547935, + "grad_norm": 1.3672774912265595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428600 + }, + { + "epoch": 2.07868104314763, + "grad_norm": 1.3873449056234222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428610 + }, + { + "epoch": 2.078729541340466, + "grad_norm": 1.0946537010170232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428620 + }, + { + "epoch": 2.0787780395333018, + "grad_norm": 1.871229038385991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428630 + }, + { + "epoch": 2.078826537726138, + "grad_norm": 1.3705508727923643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428640 + }, + { + "epoch": 2.078875035918974, + "grad_norm": 1.0459947574759099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428650 + }, + { + "epoch": 2.07892353411181, + "grad_norm": 7.283087022358359e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428660 + }, + { + "epoch": 2.0789720323046463, + "grad_norm": 7.877295260527717e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428670 + }, + { + "epoch": 2.0790205304974823, + "grad_norm": 7.895983422656627e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428680 + }, + { + "epoch": 2.0790690286903186, + "grad_norm": 1.013103290148365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428690 + }, + { + "epoch": 2.0791175268831545, + "grad_norm": 1.065071408845597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428700 + }, + { + "epoch": 2.0791660250759905, + "grad_norm": 1.2722510156493172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428710 + }, + { + "epoch": 2.079214523268827, + "grad_norm": 1.3563934864180283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428720 + }, + { + "epoch": 2.0792630214616628, + "grad_norm": 1.3649417596184321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428730 + }, + { + "epoch": 2.0793115196544987, + "grad_norm": 1.0015821061415409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428740 + }, + { + "epoch": 2.079360017847335, + "grad_norm": 1.590654541416825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428750 + }, + { + "epoch": 2.079408516040171, + "grad_norm": 1.2389119952160854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428760 + }, + { + "epoch": 2.0794570142330073, + "grad_norm": 1.540095873053815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428770 + }, + { + "epoch": 2.0795055124258432, + "grad_norm": 1.0651624471336163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428780 + }, + { + "epoch": 2.079554010618679, + "grad_norm": 1.1117250231507114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428790 + }, + { + "epoch": 2.0796025088115155, + "grad_norm": 6.17760953502966e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428800 + }, + { + "epoch": 2.0796510070043515, + "grad_norm": 1.1325002269302331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428810 + }, + { + "epoch": 2.0796995051971874, + "grad_norm": 1.749660327732272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428820 + }, + { + "epoch": 2.0797480033900237, + "grad_norm": 9.738607253950704e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428830 + }, + { + "epoch": 2.0797965015828597, + "grad_norm": 1.1880488592908023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428840 + }, + { + "epoch": 2.079844999775696, + "grad_norm": 1.084510081739154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428850 + }, + { + "epoch": 2.079893497968532, + "grad_norm": 6.210257197380997e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428860 + }, + { + "epoch": 2.079941996161368, + "grad_norm": 2.0612530349239933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428870 + }, + { + "epoch": 2.0799904943542042, + "grad_norm": 1.2774890478794987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428880 + }, + { + "epoch": 2.08003899254704, + "grad_norm": 1.4632698608352257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428890 + }, + { + "epoch": 2.080087490739876, + "grad_norm": 1.2599440601945844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428900 + }, + { + "epoch": 2.0801359889327125, + "grad_norm": 7.976511007257159e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428910 + }, + { + "epoch": 2.0801844871255484, + "grad_norm": 1.900198398629982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428920 + }, + { + "epoch": 2.0802329853183847, + "grad_norm": 1.1831050805710674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428930 + }, + { + "epoch": 2.0802814835112207, + "grad_norm": 1.4780184187657142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428940 + }, + { + "epoch": 2.0803299817040566, + "grad_norm": 1.4731890374264367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428950 + }, + { + "epoch": 2.080378479896893, + "grad_norm": 1.1216874540309618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428960 + }, + { + "epoch": 2.080426978089729, + "grad_norm": 1.0286862028863197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428970 + }, + { + "epoch": 2.0804754762825652, + "grad_norm": 1.2872399146601765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428980 + }, + { + "epoch": 2.080523974475401, + "grad_norm": 6.937769914117098e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 428990 + }, + { + "epoch": 2.080572472668237, + "grad_norm": 1.0878379086420864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429000 + }, + { + "epoch": 2.0806209708610734, + "grad_norm": 1.3160612155616036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429010 + }, + { + "epoch": 2.0806694690539094, + "grad_norm": 6.449737632863162e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429020 + }, + { + "epoch": 2.0807179672467453, + "grad_norm": 1.2711399044462723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429030 + }, + { + "epoch": 2.0807664654395817, + "grad_norm": 8.588822986155265e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429040 + }, + { + "epoch": 2.0808149636324176, + "grad_norm": 7.165180893053957e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429050 + }, + { + "epoch": 2.080863461825254, + "grad_norm": 1.05786561732657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429060 + }, + { + "epoch": 2.08091196001809, + "grad_norm": 1.369722291144626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429070 + }, + { + "epoch": 2.080960458210926, + "grad_norm": 6.730089374684667e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429080 + }, + { + "epoch": 2.081008956403762, + "grad_norm": 1.4839531381483084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429090 + }, + { + "epoch": 2.081057454596598, + "grad_norm": 1.3450093483413639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429100 + }, + { + "epoch": 2.081105952789434, + "grad_norm": 1.4701555528517929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429110 + }, + { + "epoch": 2.0811544509822704, + "grad_norm": 7.926885814413254e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429120 + }, + { + "epoch": 2.0812029491751063, + "grad_norm": 7.401440793586289e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429130 + }, + { + "epoch": 2.0812514473679427, + "grad_norm": 7.824726644400926e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429140 + }, + { + "epoch": 2.0812999455607786, + "grad_norm": 1.788753145604005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429150 + }, + { + "epoch": 2.0813484437536145, + "grad_norm": 1.3333997905817796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429160 + }, + { + "epoch": 2.081396941946451, + "grad_norm": 1.3083280236969586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429170 + }, + { + "epoch": 2.081445440139287, + "grad_norm": 1.2918674130446561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429180 + }, + { + "epoch": 2.0814939383321227, + "grad_norm": 1.078583267144495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429190 + }, + { + "epoch": 2.081542436524959, + "grad_norm": 2.2413750855321268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429200 + }, + { + "epoch": 2.081590934717795, + "grad_norm": 1.2780160929537487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429210 + }, + { + "epoch": 2.0816394329106314, + "grad_norm": 1.7883607483781816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429220 + }, + { + "epoch": 2.0816879311034673, + "grad_norm": 1.4686365901184217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429230 + }, + { + "epoch": 2.081736429296303, + "grad_norm": 1.2225537027177324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429240 + }, + { + "epoch": 2.0817849274891396, + "grad_norm": 1.1843669156519354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429250 + }, + { + "epoch": 2.0818334256819755, + "grad_norm": 1.4190884023435046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429260 + }, + { + "epoch": 2.0818819238748114, + "grad_norm": 8.610235191497395e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429270 + }, + { + "epoch": 2.081930422067648, + "grad_norm": 1.2432014528940272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429280 + }, + { + "epoch": 2.0819789202604837, + "grad_norm": 1.2597629606148075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429290 + }, + { + "epoch": 2.08202741845332, + "grad_norm": 1.0462622768159235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429300 + }, + { + "epoch": 2.082075916646156, + "grad_norm": 1.966940033071296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429310 + }, + { + "epoch": 2.082124414838992, + "grad_norm": 1.326224818853916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429320 + }, + { + "epoch": 2.0821729130318283, + "grad_norm": 1.1545448153071902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429330 + }, + { + "epoch": 2.082221411224664, + "grad_norm": 8.018275821086718e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429340 + }, + { + "epoch": 2.0822699094175, + "grad_norm": 9.572584502848258e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429350 + }, + { + "epoch": 2.0823184076103365, + "grad_norm": 1.3342583038422617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429360 + }, + { + "epoch": 2.0823669058031724, + "grad_norm": 1.3659750663919112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429370 + }, + { + "epoch": 2.0824154039960088, + "grad_norm": 7.453247796718188e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429380 + }, + { + "epoch": 2.0824639021888447, + "grad_norm": 1.240186975337565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429390 + }, + { + "epoch": 2.0825124003816806, + "grad_norm": 9.588879024136077e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429400 + }, + { + "epoch": 2.082560898574517, + "grad_norm": 1.696024298780685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429410 + }, + { + "epoch": 2.082609396767353, + "grad_norm": 9.337663087194414e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429420 + }, + { + "epoch": 2.082657894960189, + "grad_norm": 1.024343543321038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429430 + }, + { + "epoch": 2.082706393153025, + "grad_norm": 1.6173389738582955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429440 + }, + { + "epoch": 2.082754891345861, + "grad_norm": 7.039748339821017e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429450 + }, + { + "epoch": 2.0828033895386975, + "grad_norm": 9.799981270930402e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429460 + }, + { + "epoch": 2.0828518877315334, + "grad_norm": 1.4543792836718694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429470 + }, + { + "epoch": 2.0829003859243693, + "grad_norm": 9.380453747098727e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429480 + }, + { + "epoch": 2.0829488841172057, + "grad_norm": 1.8710409221966984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429490 + }, + { + "epoch": 2.0829973823100416, + "grad_norm": 6.504007554752889e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429500 + }, + { + "epoch": 2.083045880502878, + "grad_norm": 1.3177885449522364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429510 + }, + { + "epoch": 2.083094378695714, + "grad_norm": 1.6052258189347413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429520 + }, + { + "epoch": 2.08314287688855, + "grad_norm": 1.476410727008215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429530 + }, + { + "epoch": 2.083191375081386, + "grad_norm": 9.139563772464498e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429540 + }, + { + "epoch": 2.083239873274222, + "grad_norm": 1.6468320040985418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429550 + }, + { + "epoch": 2.083288371467058, + "grad_norm": 9.831289560224832e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429560 + }, + { + "epoch": 2.0833368696598944, + "grad_norm": 1.6428167270987615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429570 + }, + { + "epoch": 2.0833853678527303, + "grad_norm": 1.0382174231438057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429580 + }, + { + "epoch": 2.0834338660455667, + "grad_norm": 9.323159133600711e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429590 + }, + { + "epoch": 2.0834823642384026, + "grad_norm": 6.8306595935041514e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429600 + }, + { + "epoch": 2.0835308624312385, + "grad_norm": 1.6489213550130444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429610 + }, + { + "epoch": 2.083579360624075, + "grad_norm": 1.827899609452288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429620 + }, + { + "epoch": 2.083627858816911, + "grad_norm": 1.4939471881803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429630 + }, + { + "epoch": 2.0836763570097467, + "grad_norm": 1.2869460164210977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429640 + }, + { + "epoch": 2.083724855202583, + "grad_norm": 9.856266025565219e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429650 + }, + { + "epoch": 2.083773353395419, + "grad_norm": 1.1037409208825011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429660 + }, + { + "epoch": 2.0838218515882554, + "grad_norm": 8.71329319807046e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429670 + }, + { + "epoch": 2.0838703497810913, + "grad_norm": 7.833913961974304e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429680 + }, + { + "epoch": 2.0839188479739272, + "grad_norm": 1.09574145312763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429690 + }, + { + "epoch": 2.0839673461667636, + "grad_norm": 1.0200432498663758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429700 + }, + { + "epoch": 2.0840158443595995, + "grad_norm": 1.2879562305556647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429710 + }, + { + "epoch": 2.0840643425524354, + "grad_norm": 8.913226601237056e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429720 + }, + { + "epoch": 2.084112840745272, + "grad_norm": 1.3003494281349504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429730 + }, + { + "epoch": 2.0841613389381077, + "grad_norm": 6.287485643241553e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429740 + }, + { + "epoch": 2.084209837130944, + "grad_norm": 9.847815007901772e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429750 + }, + { + "epoch": 2.08425833532378, + "grad_norm": 1.17747003258728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429760 + }, + { + "epoch": 2.084306833516616, + "grad_norm": 8.496846781724798e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429770 + }, + { + "epoch": 2.0843553317094523, + "grad_norm": 1.2286498929370282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429780 + }, + { + "epoch": 2.0844038299022882, + "grad_norm": 9.56544088381861e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429790 + }, + { + "epoch": 2.084452328095124, + "grad_norm": 1.6161399329917003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429800 + }, + { + "epoch": 2.0845008262879605, + "grad_norm": 1.0470387223904254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429810 + }, + { + "epoch": 2.0845493244807964, + "grad_norm": 1.6023102844542336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429820 + }, + { + "epoch": 2.084597822673633, + "grad_norm": 1.1734326399448491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429830 + }, + { + "epoch": 2.0846463208664687, + "grad_norm": 1.2258214887594931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429840 + }, + { + "epoch": 2.0846948190593046, + "grad_norm": 1.0243318193658979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429850 + }, + { + "epoch": 2.084743317252141, + "grad_norm": 7.0904238036462175e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429860 + }, + { + "epoch": 2.084791815444977, + "grad_norm": 9.398814171390768e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429870 + }, + { + "epoch": 2.084840313637813, + "grad_norm": 1.0295006624971847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429880 + }, + { + "epoch": 2.0848888118306492, + "grad_norm": 8.226523462440127e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429890 + }, + { + "epoch": 2.084937310023485, + "grad_norm": 1.0346723477994146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429900 + }, + { + "epoch": 2.0849858082163215, + "grad_norm": 1.3808430843198494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429910 + }, + { + "epoch": 2.0850343064091574, + "grad_norm": 8.234545489926859e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429920 + }, + { + "epoch": 2.0850828046019934, + "grad_norm": 1.302734897734581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429930 + }, + { + "epoch": 2.0851313027948297, + "grad_norm": 1.3253873554219808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429940 + }, + { + "epoch": 2.0851798009876656, + "grad_norm": 1.4734938602600778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429950 + }, + { + "epoch": 2.0852282991805016, + "grad_norm": 1.1422574885955328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429960 + }, + { + "epoch": 2.085276797373338, + "grad_norm": 1.0255900129152451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429970 + }, + { + "epoch": 2.085325295566174, + "grad_norm": 1.1371237285118241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429980 + }, + { + "epoch": 2.08537379375901, + "grad_norm": 2.430609136183648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 429990 + }, + { + "epoch": 2.085422291951846, + "grad_norm": 8.940068241258814e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430000 + }, + { + "epoch": 2.085470790144682, + "grad_norm": 1.1346988237903588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430010 + }, + { + "epoch": 2.0855192883375184, + "grad_norm": 1.241614278058023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430020 + }, + { + "epoch": 2.0855677865303544, + "grad_norm": 2.2669395249863555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430030 + }, + { + "epoch": 2.0856162847231907, + "grad_norm": 1.1790951326418053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430040 + }, + { + "epoch": 2.0856647829160266, + "grad_norm": 1.4588329655396137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430050 + }, + { + "epoch": 2.0857132811088626, + "grad_norm": 6.601196034239365e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430060 + }, + { + "epoch": 2.085761779301699, + "grad_norm": 9.11885500443077e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430070 + }, + { + "epoch": 2.085810277494535, + "grad_norm": 1.74153278464928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430080 + }, + { + "epoch": 2.0858587756873708, + "grad_norm": 6.773038130347686e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430090 + }, + { + "epoch": 2.085907273880207, + "grad_norm": 1.1243009190309294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430100 + }, + { + "epoch": 2.085955772073043, + "grad_norm": 7.706876026247755e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430110 + }, + { + "epoch": 2.0860042702658794, + "grad_norm": 1.3967535572589895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430120 + }, + { + "epoch": 2.0860527684587153, + "grad_norm": 9.854346672000247e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430130 + }, + { + "epoch": 2.0861012666515513, + "grad_norm": 7.051137007607622e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430140 + }, + { + "epoch": 2.0861497648443876, + "grad_norm": 1.1475837169427905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430150 + }, + { + "epoch": 2.0861982630372236, + "grad_norm": 1.1044713588148625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430160 + }, + { + "epoch": 2.0862467612300595, + "grad_norm": 1.117422065988194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430170 + }, + { + "epoch": 2.086295259422896, + "grad_norm": 8.145020657934765e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430180 + }, + { + "epoch": 2.0863437576157318, + "grad_norm": 1.197728849433588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430190 + }, + { + "epoch": 2.086392255808568, + "grad_norm": 1.0266971273154013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430200 + }, + { + "epoch": 2.086440754001404, + "grad_norm": 1.6320415241466435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430210 + }, + { + "epoch": 2.08648925219424, + "grad_norm": 2.3884778599381207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430220 + }, + { + "epoch": 2.0865377503870763, + "grad_norm": 9.616643481535903e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430230 + }, + { + "epoch": 2.0865862485799123, + "grad_norm": 9.290178404341987e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430240 + }, + { + "epoch": 2.086634746772748, + "grad_norm": 2.1379818804234674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430250 + }, + { + "epoch": 2.0866832449655845, + "grad_norm": 2.5055451047251154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430260 + }, + { + "epoch": 2.0867317431584205, + "grad_norm": 1.0841210595913253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430270 + }, + { + "epoch": 2.086780241351257, + "grad_norm": 1.2940578386633206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430280 + }, + { + "epoch": 2.0868287395440928, + "grad_norm": 7.431667281565524e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430290 + }, + { + "epoch": 2.0868772377369287, + "grad_norm": 1.1636362096112407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430300 + }, + { + "epoch": 2.086925735929765, + "grad_norm": 8.857897526581837e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430310 + }, + { + "epoch": 2.086974234122601, + "grad_norm": 1.1969510715914566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430320 + }, + { + "epoch": 2.087022732315437, + "grad_norm": 7.678372604402739e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430330 + }, + { + "epoch": 2.0870712305082733, + "grad_norm": 1.548925432359738e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430340 + }, + { + "epoch": 2.087119728701109, + "grad_norm": 1.0161556929233484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430350 + }, + { + "epoch": 2.0871682268939455, + "grad_norm": 1.0847101883371124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430360 + }, + { + "epoch": 2.0872167250867815, + "grad_norm": 1.5123955421358914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430370 + }, + { + "epoch": 2.0872652232796174, + "grad_norm": 7.208463159713574e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430380 + }, + { + "epoch": 2.0873137214724538, + "grad_norm": 1.3956599431708128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430390 + }, + { + "epoch": 2.0873622196652897, + "grad_norm": 1.1521121834334735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430400 + }, + { + "epoch": 2.0874107178581256, + "grad_norm": 1.3719354541308348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430410 + }, + { + "epoch": 2.087459216050962, + "grad_norm": 1.0474302314378292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430420 + }, + { + "epoch": 2.087507714243798, + "grad_norm": 1.2863541343222096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430430 + }, + { + "epoch": 2.0875562124366343, + "grad_norm": 8.318239430593621e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430440 + }, + { + "epoch": 2.08760471062947, + "grad_norm": 1.2382225023088722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430450 + }, + { + "epoch": 2.087653208822306, + "grad_norm": 8.098131054623536e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430460 + }, + { + "epoch": 2.0877017070151425, + "grad_norm": 1.4947554305422273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430470 + }, + { + "epoch": 2.0877502052079784, + "grad_norm": 1.0956540563711314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430480 + }, + { + "epoch": 2.0877987034008143, + "grad_norm": 6.273590980043764e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430490 + }, + { + "epoch": 2.0878472015936507, + "grad_norm": 1.0169829423034571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430500 + }, + { + "epoch": 2.0878956997864866, + "grad_norm": 7.75350983417411e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430510 + }, + { + "epoch": 2.087944197979323, + "grad_norm": 1.8427670056553325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430520 + }, + { + "epoch": 2.087992696172159, + "grad_norm": 1.2382961322998653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430530 + }, + { + "epoch": 2.088041194364995, + "grad_norm": 9.738212014553937e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430540 + }, + { + "epoch": 2.088089692557831, + "grad_norm": 1.4822941096781506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430550 + }, + { + "epoch": 2.088138190750667, + "grad_norm": 9.90306237014238e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430560 + }, + { + "epoch": 2.0881866889435035, + "grad_norm": 9.356168284568867e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430570 + }, + { + "epoch": 2.0882351871363394, + "grad_norm": 1.160679463652059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430580 + }, + { + "epoch": 2.0882836853291753, + "grad_norm": 1.022785323101516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430590 + }, + { + "epoch": 2.0883321835220117, + "grad_norm": 1.3490641492808209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430600 + }, + { + "epoch": 2.0883806817148476, + "grad_norm": 1.4595234354430886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430610 + }, + { + "epoch": 2.0884291799076835, + "grad_norm": 1.493666701435359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430620 + }, + { + "epoch": 2.08847767810052, + "grad_norm": 1.232098156833672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430630 + }, + { + "epoch": 2.088526176293356, + "grad_norm": 7.617685149341469e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430640 + }, + { + "epoch": 2.088574674486192, + "grad_norm": 1.434376173392593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430650 + }, + { + "epoch": 2.088623172679028, + "grad_norm": 1.7203257485221002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430660 + }, + { + "epoch": 2.088671670871864, + "grad_norm": 1.0534892069813395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430670 + }, + { + "epoch": 2.0887201690647004, + "grad_norm": 1.7856033096563806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430680 + }, + { + "epoch": 2.0887686672575363, + "grad_norm": 8.319587685434726e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430690 + }, + { + "epoch": 2.088817165450372, + "grad_norm": 7.3419514912131945e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430700 + }, + { + "epoch": 2.0888656636432086, + "grad_norm": 2.0067339789875405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430710 + }, + { + "epoch": 2.0889141618360445, + "grad_norm": 1.4318387364653518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430720 + }, + { + "epoch": 2.088962660028881, + "grad_norm": 1.1860989523881926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430730 + }, + { + "epoch": 2.089011158221717, + "grad_norm": 1.4729771180554962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430740 + }, + { + "epoch": 2.0890596564145527, + "grad_norm": 1.0983366216521517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430750 + }, + { + "epoch": 2.089108154607389, + "grad_norm": 1.0615473833297528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430760 + }, + { + "epoch": 2.089156652800225, + "grad_norm": 1.4088445077220513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430770 + }, + { + "epoch": 2.089205150993061, + "grad_norm": 1.2829076467824052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430780 + }, + { + "epoch": 2.0892536491858973, + "grad_norm": 8.018428587774906e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430790 + }, + { + "epoch": 2.089302147378733, + "grad_norm": 1.941545768602282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430800 + }, + { + "epoch": 2.0893506455715696, + "grad_norm": 1.21544641018545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430810 + }, + { + "epoch": 2.0893991437644055, + "grad_norm": 1.1275131051036169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430820 + }, + { + "epoch": 2.0894476419572414, + "grad_norm": 1.5161038646738234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430830 + }, + { + "epoch": 2.089496140150078, + "grad_norm": 8.13982214964426e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430840 + }, + { + "epoch": 2.0895446383429137, + "grad_norm": 1.1923705578453792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430850 + }, + { + "epoch": 2.0895931365357496, + "grad_norm": 1.5117491258820337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430860 + }, + { + "epoch": 2.089641634728586, + "grad_norm": 9.903306619207797e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430870 + }, + { + "epoch": 2.089690132921422, + "grad_norm": 7.706897342529828e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430880 + }, + { + "epoch": 2.0897386311142583, + "grad_norm": 1.938272831125687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430890 + }, + { + "epoch": 2.089787129307094, + "grad_norm": 9.4878718215341e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430900 + }, + { + "epoch": 2.08983562749993, + "grad_norm": 5.6562190486886266e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430910 + }, + { + "epoch": 2.0898841256927665, + "grad_norm": 1.1281460210454952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430920 + }, + { + "epoch": 2.0899326238856024, + "grad_norm": 8.308599142026196e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430930 + }, + { + "epoch": 2.089981122078439, + "grad_norm": 1.0305786446451748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430940 + }, + { + "epoch": 2.0900296202712747, + "grad_norm": 1.2077838285051712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430950 + }, + { + "epoch": 2.0900781184641106, + "grad_norm": 9.324689465017855e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430960 + }, + { + "epoch": 2.090126616656947, + "grad_norm": 1.5420571486401968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430970 + }, + { + "epoch": 2.090175114849783, + "grad_norm": 1.0591291399464353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430980 + }, + { + "epoch": 2.090223613042619, + "grad_norm": 7.927892120562774e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 430990 + }, + { + "epoch": 2.090272111235455, + "grad_norm": 7.2221184588272536e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431000 + }, + { + "epoch": 2.090320609428291, + "grad_norm": 1.402465699129607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431010 + }, + { + "epoch": 2.090369107621127, + "grad_norm": 1.6587092588338237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431020 + }, + { + "epoch": 2.0904176058139634, + "grad_norm": 1.2076323940846123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431030 + }, + { + "epoch": 2.0904661040067993, + "grad_norm": 9.431515124447287e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431040 + }, + { + "epoch": 2.0905146021996357, + "grad_norm": 1.165510088441124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431050 + }, + { + "epoch": 2.0905631003924716, + "grad_norm": 1.1564217139437005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431060 + }, + { + "epoch": 2.0906115985853075, + "grad_norm": 1.0330787780787887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431070 + }, + { + "epoch": 2.090660096778144, + "grad_norm": 1.7721385248137267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431080 + }, + { + "epoch": 2.09070859497098, + "grad_norm": 9.859970617753788e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431090 + }, + { + "epoch": 2.090757093163816, + "grad_norm": 1.1551842149515323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431100 + }, + { + "epoch": 2.090805591356652, + "grad_norm": 1.1447290226840323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431110 + }, + { + "epoch": 2.090854089549488, + "grad_norm": 9.900525732575716e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431120 + }, + { + "epoch": 2.0909025877423244, + "grad_norm": 2.418320654840045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431130 + }, + { + "epoch": 2.0909510859351603, + "grad_norm": 1.1006331845919703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431140 + }, + { + "epoch": 2.0909995841279962, + "grad_norm": 1.6250545797902305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431150 + }, + { + "epoch": 2.0910480823208326, + "grad_norm": 8.790799199687171e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431160 + }, + { + "epoch": 2.0910965805136685, + "grad_norm": 8.959870179126028e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431170 + }, + { + "epoch": 2.091145078706505, + "grad_norm": 1.3065545978463433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431180 + }, + { + "epoch": 2.091193576899341, + "grad_norm": 1.653498316045443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431190 + }, + { + "epoch": 2.0912420750921767, + "grad_norm": 1.1492335083573835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431200 + }, + { + "epoch": 2.091290573285013, + "grad_norm": 1.104863667222844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431210 + }, + { + "epoch": 2.091339071477849, + "grad_norm": 1.0486203905202274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431220 + }, + { + "epoch": 2.091387569670685, + "grad_norm": 1.3656608288670213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431230 + }, + { + "epoch": 2.0914360678635213, + "grad_norm": 1.83183921365071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431240 + }, + { + "epoch": 2.0914845660563572, + "grad_norm": 6.1572809073595636e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431250 + }, + { + "epoch": 2.0915330642491936, + "grad_norm": 7.87068277219305e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431260 + }, + { + "epoch": 2.0915815624420295, + "grad_norm": 9.673675194221687e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431270 + }, + { + "epoch": 2.0916300606348655, + "grad_norm": 8.956598129827853e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431280 + }, + { + "epoch": 2.091678558827702, + "grad_norm": 1.1652597997624525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431290 + }, + { + "epoch": 2.0917270570205377, + "grad_norm": 1.053256948324588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431300 + }, + { + "epoch": 2.0917755552133737, + "grad_norm": 1.631338086838241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431310 + }, + { + "epoch": 2.09182405340621, + "grad_norm": 1.6217942544471953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431320 + }, + { + "epoch": 2.091872551599046, + "grad_norm": 9.69581925858165e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431330 + }, + { + "epoch": 2.0919210497918823, + "grad_norm": 1.1039597680451152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431340 + }, + { + "epoch": 2.0919695479847182, + "grad_norm": 1.2623720735405186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431350 + }, + { + "epoch": 2.092018046177554, + "grad_norm": 1.1697033563962123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431360 + }, + { + "epoch": 2.0920665443703905, + "grad_norm": 1.1830598722895047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431370 + }, + { + "epoch": 2.0921150425632264, + "grad_norm": 1.182185105363942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431380 + }, + { + "epoch": 2.0921635407560624, + "grad_norm": 6.593642964958235e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431390 + }, + { + "epoch": 2.0922120389488987, + "grad_norm": 1.4548288795879216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431400 + }, + { + "epoch": 2.0922605371417347, + "grad_norm": 9.405374257198673e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431410 + }, + { + "epoch": 2.092309035334571, + "grad_norm": 1.2568517782085564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431420 + }, + { + "epoch": 2.092357533527407, + "grad_norm": 7.864188411588202e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431430 + }, + { + "epoch": 2.092406031720243, + "grad_norm": 1.515050662703743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431440 + }, + { + "epoch": 2.0924545299130792, + "grad_norm": 1.1008701505943463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431450 + }, + { + "epoch": 2.092503028105915, + "grad_norm": 9.349744978237595e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431460 + }, + { + "epoch": 2.0925515262987515, + "grad_norm": 1.3721368929964228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431470 + }, + { + "epoch": 2.0926000244915874, + "grad_norm": 1.2850010833176384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431480 + }, + { + "epoch": 2.0926485226844234, + "grad_norm": 9.262261180253972e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431490 + }, + { + "epoch": 2.0926970208772597, + "grad_norm": 1.3513862029412849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431500 + }, + { + "epoch": 2.0927455190700956, + "grad_norm": 1.4105212997606031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431510 + }, + { + "epoch": 2.0927940172629316, + "grad_norm": 1.5641630213281132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431520 + }, + { + "epoch": 2.092842515455768, + "grad_norm": 1.4592955288605935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431530 + }, + { + "epoch": 2.092891013648604, + "grad_norm": 9.62233936974144e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431540 + }, + { + "epoch": 2.09293951184144, + "grad_norm": 1.2520681380578935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431550 + }, + { + "epoch": 2.092988010034276, + "grad_norm": 1.2894044942868277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431560 + }, + { + "epoch": 2.093036508227112, + "grad_norm": 1.1553357381899332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431570 + }, + { + "epoch": 2.0930850064199484, + "grad_norm": 9.944065126887836e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431580 + }, + { + "epoch": 2.0931335046127844, + "grad_norm": 6.609083502695512e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431590 + }, + { + "epoch": 2.0931820028056203, + "grad_norm": 1.8424819003826087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431600 + }, + { + "epoch": 2.0932305009984566, + "grad_norm": 1.5589966650964016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431610 + }, + { + "epoch": 2.0932789991912926, + "grad_norm": 2.053863568107772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431620 + }, + { + "epoch": 2.093327497384129, + "grad_norm": 1.0373954140163733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431630 + }, + { + "epoch": 2.093375995576965, + "grad_norm": 1.1664802457289625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431640 + }, + { + "epoch": 2.0934244937698008, + "grad_norm": 7.2416224128346585e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431650 + }, + { + "epoch": 2.093472991962637, + "grad_norm": 1.0382294135524717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431660 + }, + { + "epoch": 2.093521490155473, + "grad_norm": 1.2395936721532053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431670 + }, + { + "epoch": 2.093569988348309, + "grad_norm": 9.770593223379365e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431680 + }, + { + "epoch": 2.0936184865411454, + "grad_norm": 1.2872540366970497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431690 + }, + { + "epoch": 2.0936669847339813, + "grad_norm": 1.2120205283849828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431700 + }, + { + "epoch": 2.0937154829268176, + "grad_norm": 1.517340031398362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431710 + }, + { + "epoch": 2.0937639811196536, + "grad_norm": 7.71687513889674e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431720 + }, + { + "epoch": 2.0938124793124895, + "grad_norm": 1.3778354457372188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431730 + }, + { + "epoch": 2.093860977505326, + "grad_norm": 1.0354201940288021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431740 + }, + { + "epoch": 2.0939094756981618, + "grad_norm": 9.283340318688715e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431750 + }, + { + "epoch": 2.0939579738909977, + "grad_norm": 1.1431832369623862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431760 + }, + { + "epoch": 2.094006472083834, + "grad_norm": 9.02508645594935e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431770 + }, + { + "epoch": 2.09405497027667, + "grad_norm": 1.4701575956621582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431780 + }, + { + "epoch": 2.0941034684695063, + "grad_norm": 8.748738622443852e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431790 + }, + { + "epoch": 2.0941519666623423, + "grad_norm": 1.0354751722729816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431800 + }, + { + "epoch": 2.094200464855178, + "grad_norm": 8.594680522833187e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431810 + }, + { + "epoch": 2.0942489630480146, + "grad_norm": 1.1401616539785664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431820 + }, + { + "epoch": 2.0942974612408505, + "grad_norm": 1.0635230474065338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431830 + }, + { + "epoch": 2.0943459594336864, + "grad_norm": 1.2941015370415698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431840 + }, + { + "epoch": 2.0943944576265228, + "grad_norm": 1.0052767507318094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431850 + }, + { + "epoch": 2.0944429558193587, + "grad_norm": 1.0286462348574332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431860 + }, + { + "epoch": 2.094491454012195, + "grad_norm": 1.8162433335078276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431870 + }, + { + "epoch": 2.094539952205031, + "grad_norm": 7.839576987578312e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431880 + }, + { + "epoch": 2.094588450397867, + "grad_norm": 1.0676242112594991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431890 + }, + { + "epoch": 2.0946369485907033, + "grad_norm": 1.1741902561368533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431900 + }, + { + "epoch": 2.094685446783539, + "grad_norm": 1.6655519630148774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431910 + }, + { + "epoch": 2.094733944976375, + "grad_norm": 1.1006243916256153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431920 + }, + { + "epoch": 2.0947824431692115, + "grad_norm": 1.2595434917272996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431930 + }, + { + "epoch": 2.0948309413620474, + "grad_norm": 1.1877278716099227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431940 + }, + { + "epoch": 2.0948794395548838, + "grad_norm": 1.0585490706205292e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431950 + }, + { + "epoch": 2.0949279377477197, + "grad_norm": 7.095631193720919e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431960 + }, + { + "epoch": 2.0949764359405556, + "grad_norm": 1.0416473017471617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431970 + }, + { + "epoch": 2.095024934133392, + "grad_norm": 1.58163597774319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431980 + }, + { + "epoch": 2.095073432326228, + "grad_norm": 2.3643428548325574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 431990 + }, + { + "epoch": 2.0951219305190643, + "grad_norm": 8.771334769619443e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432000 + }, + { + "epoch": 2.0951704287119, + "grad_norm": 1.6808678893198703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432010 + }, + { + "epoch": 2.095218926904736, + "grad_norm": 7.082274322556259e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432020 + }, + { + "epoch": 2.0952674250975725, + "grad_norm": 1.507234692610382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432030 + }, + { + "epoch": 2.0953159232904084, + "grad_norm": 9.953586399547021e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432040 + }, + { + "epoch": 2.0953644214832443, + "grad_norm": 1.2599888243869373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432050 + }, + { + "epoch": 2.0954129196760807, + "grad_norm": 8.908894955084179e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432060 + }, + { + "epoch": 2.0954614178689166, + "grad_norm": 1.4646384549621416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432070 + }, + { + "epoch": 2.0955099160617525, + "grad_norm": 1.1235965935441072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432080 + }, + { + "epoch": 2.095558414254589, + "grad_norm": 2.1147704032387082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432090 + }, + { + "epoch": 2.095606912447425, + "grad_norm": 9.920579024935705e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432100 + }, + { + "epoch": 2.095655410640261, + "grad_norm": 1.2386711212286627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432110 + }, + { + "epoch": 2.095703908833097, + "grad_norm": 9.479796503342186e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432120 + }, + { + "epoch": 2.095752407025933, + "grad_norm": 1.2230031209981007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432130 + }, + { + "epoch": 2.0958009052187694, + "grad_norm": 1.0803572259021621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432140 + }, + { + "epoch": 2.0958494034116053, + "grad_norm": 7.0684320618852325e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432150 + }, + { + "epoch": 2.0958979016044417, + "grad_norm": 1.2291780926432239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432160 + }, + { + "epoch": 2.0959463997972776, + "grad_norm": 1.351258482884532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432170 + }, + { + "epoch": 2.0959948979901135, + "grad_norm": 1.211012889967833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432180 + }, + { + "epoch": 2.09604339618295, + "grad_norm": 1.3587238001377955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432190 + }, + { + "epoch": 2.096091894375786, + "grad_norm": 1.0809819706025792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432200 + }, + { + "epoch": 2.0961403925686217, + "grad_norm": 1.6922767187566023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432210 + }, + { + "epoch": 2.096188890761458, + "grad_norm": 9.538008605147752e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432220 + }, + { + "epoch": 2.096237388954294, + "grad_norm": 1.3403542276080316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432230 + }, + { + "epoch": 2.0962858871471304, + "grad_norm": 9.334652162351631e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432240 + }, + { + "epoch": 2.0963343853399663, + "grad_norm": 1.3846938706763012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432250 + }, + { + "epoch": 2.096382883532802, + "grad_norm": 1.4706664330788044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432260 + }, + { + "epoch": 2.0964313817256386, + "grad_norm": 1.2340147570455429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432270 + }, + { + "epoch": 2.0964798799184745, + "grad_norm": 8.077763347102973e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432280 + }, + { + "epoch": 2.0965283781113104, + "grad_norm": 8.569859488716247e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432290 + }, + { + "epoch": 2.096576876304147, + "grad_norm": 9.72763114504005e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432300 + }, + { + "epoch": 2.0966253744969827, + "grad_norm": 1.3906820583997614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432310 + }, + { + "epoch": 2.096673872689819, + "grad_norm": 1.6768476385209397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432320 + }, + { + "epoch": 2.096722370882655, + "grad_norm": 1.3733414405692201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432330 + }, + { + "epoch": 2.096770869075491, + "grad_norm": 9.41986666447292e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432340 + }, + { + "epoch": 2.0968193672683273, + "grad_norm": 1.471405042252627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432350 + }, + { + "epoch": 2.096867865461163, + "grad_norm": 1.3177406721354146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432360 + }, + { + "epoch": 2.096916363653999, + "grad_norm": 1.3025535317012782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432370 + }, + { + "epoch": 2.0969648618468355, + "grad_norm": 9.83665415787982e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432380 + }, + { + "epoch": 2.0970133600396714, + "grad_norm": 1.7966831578064557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432390 + }, + { + "epoch": 2.097061858232508, + "grad_norm": 1.5998432800756746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432400 + }, + { + "epoch": 2.0971103564253437, + "grad_norm": 1.0658314231193344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432410 + }, + { + "epoch": 2.0971588546181796, + "grad_norm": 7.888115050036504e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432420 + }, + { + "epoch": 2.097207352811016, + "grad_norm": 1.2467831211893099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432430 + }, + { + "epoch": 2.097255851003852, + "grad_norm": 8.124148465071812e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432440 + }, + { + "epoch": 2.097304349196688, + "grad_norm": 1.032903096387372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432450 + }, + { + "epoch": 2.097352847389524, + "grad_norm": 1.5157116450836838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432460 + }, + { + "epoch": 2.09740134558236, + "grad_norm": 8.360175662858182e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432470 + }, + { + "epoch": 2.0974498437751965, + "grad_norm": 1.254486381441211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432480 + }, + { + "epoch": 2.0974983419680324, + "grad_norm": 1.5304008726957363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432490 + }, + { + "epoch": 2.0975468401608683, + "grad_norm": 7.546978153527562e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432500 + }, + { + "epoch": 2.0975953383537047, + "grad_norm": 1.674029981302283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432510 + }, + { + "epoch": 2.0976438365465406, + "grad_norm": 1.235429980539493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432520 + }, + { + "epoch": 2.097692334739377, + "grad_norm": 1.3273038668160098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432530 + }, + { + "epoch": 2.097740832932213, + "grad_norm": 1.2555573469796855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432540 + }, + { + "epoch": 2.097789331125049, + "grad_norm": 1.0417938511864122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432550 + }, + { + "epoch": 2.097837829317885, + "grad_norm": 9.23259069196547e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432560 + }, + { + "epoch": 2.097886327510721, + "grad_norm": 1.3762075035117505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432570 + }, + { + "epoch": 2.097934825703557, + "grad_norm": 8.949502472432869e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432580 + }, + { + "epoch": 2.0979833238963934, + "grad_norm": 1.2118158032592419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432590 + }, + { + "epoch": 2.0980318220892293, + "grad_norm": 1.3867367698594535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432600 + }, + { + "epoch": 2.0980803202820657, + "grad_norm": 1.7302474120128863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432610 + }, + { + "epoch": 2.0981288184749016, + "grad_norm": 9.626776709126261e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432620 + }, + { + "epoch": 2.0981773166677375, + "grad_norm": 8.937699469413474e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432630 + }, + { + "epoch": 2.098225814860574, + "grad_norm": 9.658425170755436e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432640 + }, + { + "epoch": 2.09827431305341, + "grad_norm": 1.0996082266956364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432650 + }, + { + "epoch": 2.0983228112462458, + "grad_norm": 1.1626087648153316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432660 + }, + { + "epoch": 2.098371309439082, + "grad_norm": 1.4826613714546966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432670 + }, + { + "epoch": 2.098419807631918, + "grad_norm": 1.0702619235303246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432680 + }, + { + "epoch": 2.0984683058247544, + "grad_norm": 1.0451832288538299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432690 + }, + { + "epoch": 2.0985168040175903, + "grad_norm": 8.552335728495564e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432700 + }, + { + "epoch": 2.0985653022104263, + "grad_norm": 1.3375378138391625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432710 + }, + { + "epoch": 2.0986138004032626, + "grad_norm": 1.0180858822650407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432720 + }, + { + "epoch": 2.0986622985960985, + "grad_norm": 2.1342231093512964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432730 + }, + { + "epoch": 2.0987107967889345, + "grad_norm": 1.1194051907637004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432740 + }, + { + "epoch": 2.098759294981771, + "grad_norm": 8.153498320950803e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432750 + }, + { + "epoch": 2.0988077931746068, + "grad_norm": 1.4122291780438445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432760 + }, + { + "epoch": 2.098856291367443, + "grad_norm": 1.5748698345419143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432770 + }, + { + "epoch": 2.098904789560279, + "grad_norm": 1.078035616330908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432780 + }, + { + "epoch": 2.098953287753115, + "grad_norm": 1.3205609938893303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432790 + }, + { + "epoch": 2.0990017859459513, + "grad_norm": 9.09116337766136e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432800 + }, + { + "epoch": 2.0990502841387872, + "grad_norm": 1.709767083468705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432810 + }, + { + "epoch": 2.099098782331623, + "grad_norm": 1.3321816538791609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432820 + }, + { + "epoch": 2.0991472805244595, + "grad_norm": 8.530768980108405e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432830 + }, + { + "epoch": 2.0991957787172955, + "grad_norm": 1.0914105175174882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432840 + }, + { + "epoch": 2.099244276910132, + "grad_norm": 1.4328880304503855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432850 + }, + { + "epoch": 2.0992927751029677, + "grad_norm": 1.0013294193811362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432860 + }, + { + "epoch": 2.0993412732958037, + "grad_norm": 1.4865299213795424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432870 + }, + { + "epoch": 2.09938977148864, + "grad_norm": 1.1191714222036353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432880 + }, + { + "epoch": 2.099438269681476, + "grad_norm": 1.4809935500181837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432890 + }, + { + "epoch": 2.099486767874312, + "grad_norm": 9.949089552208079e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432900 + }, + { + "epoch": 2.0995352660671482, + "grad_norm": 1.76710539534497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432910 + }, + { + "epoch": 2.099583764259984, + "grad_norm": 1.465961219082601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432920 + }, + { + "epoch": 2.0996322624528205, + "grad_norm": 9.902167974473741e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432930 + }, + { + "epoch": 2.0996807606456565, + "grad_norm": 1.1087747608939935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432940 + }, + { + "epoch": 2.0997292588384924, + "grad_norm": 1.8085573927351106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432950 + }, + { + "epoch": 2.0997777570313287, + "grad_norm": 9.247852261751177e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432960 + }, + { + "epoch": 2.0998262552241647, + "grad_norm": 1.5887128057556765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432970 + }, + { + "epoch": 2.0998747534170006, + "grad_norm": 1.3962896616703802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432980 + }, + { + "epoch": 2.099923251609837, + "grad_norm": 1.0582957621352307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 432990 + }, + { + "epoch": 2.099971749802673, + "grad_norm": 1.1843814817780185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433000 + }, + { + "epoch": 2.1000202479955092, + "grad_norm": 1.5014121501621958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433010 + }, + { + "epoch": 2.100068746188345, + "grad_norm": 1.0540484929322247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433020 + }, + { + "epoch": 2.100117244381181, + "grad_norm": 6.949619102414317e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433030 + }, + { + "epoch": 2.1001657425740174, + "grad_norm": 1.0316654197595199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433040 + }, + { + "epoch": 2.1002142407668534, + "grad_norm": 1.7077519842700895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433050 + }, + { + "epoch": 2.1002627389596897, + "grad_norm": 1.4955153559981227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433060 + }, + { + "epoch": 2.1003112371525257, + "grad_norm": 1.2184673714443761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433070 + }, + { + "epoch": 2.1003597353453616, + "grad_norm": 1.1292708101962035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433080 + }, + { + "epoch": 2.100408233538198, + "grad_norm": 1.2421408790430633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433090 + }, + { + "epoch": 2.100456731731034, + "grad_norm": 1.3407490229155883e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433100 + }, + { + "epoch": 2.10050522992387, + "grad_norm": 1.299583196612275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433110 + }, + { + "epoch": 2.100553728116706, + "grad_norm": 1.4376085211154077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433120 + }, + { + "epoch": 2.100602226309542, + "grad_norm": 1.1180932624199613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433130 + }, + { + "epoch": 2.1006507245023784, + "grad_norm": 1.18823901829046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433140 + }, + { + "epoch": 2.1006992226952144, + "grad_norm": 1.0160933427982854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433150 + }, + { + "epoch": 2.1007477208880503, + "grad_norm": 1.3324906511513745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433160 + }, + { + "epoch": 2.1007962190808867, + "grad_norm": 1.096123281030259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433170 + }, + { + "epoch": 2.1008447172737226, + "grad_norm": 1.7671515806227944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433180 + }, + { + "epoch": 2.1008932154665585, + "grad_norm": 1.1599443183740732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433190 + }, + { + "epoch": 2.100941713659395, + "grad_norm": 1.249774328471176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433200 + }, + { + "epoch": 2.100990211852231, + "grad_norm": 1.3069261228793039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433210 + }, + { + "epoch": 2.101038710045067, + "grad_norm": 9.907464182390413e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433220 + }, + { + "epoch": 2.101087208237903, + "grad_norm": 1.551913797470661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433230 + }, + { + "epoch": 2.101135706430739, + "grad_norm": 8.645987037425584e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433240 + }, + { + "epoch": 2.1011842046235754, + "grad_norm": 1.145218764264655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433250 + }, + { + "epoch": 2.1012327028164113, + "grad_norm": 1.2836323115550385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433260 + }, + { + "epoch": 2.101281201009247, + "grad_norm": 7.894450426704225e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433270 + }, + { + "epoch": 2.1013296992020836, + "grad_norm": 8.580893329224182e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433280 + }, + { + "epoch": 2.1013781973949195, + "grad_norm": 1.6792702339785137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433290 + }, + { + "epoch": 2.101426695587756, + "grad_norm": 1.2465121379534594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433300 + }, + { + "epoch": 2.1014751937805918, + "grad_norm": 1.8531872925109383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433310 + }, + { + "epoch": 2.1015236919734277, + "grad_norm": 1.0534725092270492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433320 + }, + { + "epoch": 2.101572190166264, + "grad_norm": 9.191793104434964e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433330 + }, + { + "epoch": 2.1016206883591, + "grad_norm": 1.0019507001857164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433340 + }, + { + "epoch": 2.101669186551936, + "grad_norm": 2.5682199478183065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433350 + }, + { + "epoch": 2.1017176847447723, + "grad_norm": 9.663748024024699e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433360 + }, + { + "epoch": 2.101766182937608, + "grad_norm": 6.157373722004422e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433370 + }, + { + "epoch": 2.1018146811304446, + "grad_norm": 1.0308336406694707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433380 + }, + { + "epoch": 2.1018631793232805, + "grad_norm": 1.2488421852197007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433390 + }, + { + "epoch": 2.1019116775161164, + "grad_norm": 1.6990361118018882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433400 + }, + { + "epoch": 2.1019601757089528, + "grad_norm": 9.707563641825345e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433410 + }, + { + "epoch": 2.1020086739017887, + "grad_norm": 8.564945197520046e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433420 + }, + { + "epoch": 2.1020571720946246, + "grad_norm": 8.78742945076283e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433430 + }, + { + "epoch": 2.102105670287461, + "grad_norm": 1.1470079996911409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433440 + }, + { + "epoch": 2.102154168480297, + "grad_norm": 9.892823449320076e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433450 + }, + { + "epoch": 2.1022026666731333, + "grad_norm": 1.4029322592534754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433460 + }, + { + "epoch": 2.102251164865969, + "grad_norm": 1.4828684946621706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433470 + }, + { + "epoch": 2.102299663058805, + "grad_norm": 1.3487211347751327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433480 + }, + { + "epoch": 2.1023481612516415, + "grad_norm": 1.1476160466372676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433490 + }, + { + "epoch": 2.1023966594444774, + "grad_norm": 8.617395685917018e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433500 + }, + { + "epoch": 2.1024451576373133, + "grad_norm": 1.1363385787888092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433510 + }, + { + "epoch": 2.1024936558301497, + "grad_norm": 9.899522090961455e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433520 + }, + { + "epoch": 2.1025421540229856, + "grad_norm": 1.1149202450155826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433530 + }, + { + "epoch": 2.102590652215822, + "grad_norm": 1.6094109156483682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433540 + }, + { + "epoch": 2.102639150408658, + "grad_norm": 7.927658529638393e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433550 + }, + { + "epoch": 2.102687648601494, + "grad_norm": 1.8299184390002665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433560 + }, + { + "epoch": 2.10273614679433, + "grad_norm": 5.845013806293764e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433570 + }, + { + "epoch": 2.102784644987166, + "grad_norm": 1.0203866196434319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433580 + }, + { + "epoch": 2.1028331431800025, + "grad_norm": 1.2428083451254679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433590 + }, + { + "epoch": 2.1028816413728384, + "grad_norm": 2.1183799603363695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433600 + }, + { + "epoch": 2.1029301395656743, + "grad_norm": 8.593818101587658e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433610 + }, + { + "epoch": 2.1029786377585107, + "grad_norm": 6.450124434564941e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433620 + }, + { + "epoch": 2.1030271359513466, + "grad_norm": 1.2368182922273263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433630 + }, + { + "epoch": 2.1030756341441825, + "grad_norm": 1.1660836740645664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433640 + }, + { + "epoch": 2.103124132337019, + "grad_norm": 1.588415265985077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433650 + }, + { + "epoch": 2.103172630529855, + "grad_norm": 9.602444173140157e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433660 + }, + { + "epoch": 2.103221128722691, + "grad_norm": 1.2544647098877704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433670 + }, + { + "epoch": 2.103269626915527, + "grad_norm": 5.3391966403637525e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433680 + }, + { + "epoch": 2.103318125108363, + "grad_norm": 9.011034585171274e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433690 + }, + { + "epoch": 2.1033666233011994, + "grad_norm": 1.587369524713722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433700 + }, + { + "epoch": 2.1034151214940353, + "grad_norm": 1.6199454222487475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433710 + }, + { + "epoch": 2.1034636196868712, + "grad_norm": 1.0746788348114933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433720 + }, + { + "epoch": 2.1035121178797076, + "grad_norm": 1.4021568794930772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433730 + }, + { + "epoch": 2.1035606160725435, + "grad_norm": 1.455423159768543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433740 + }, + { + "epoch": 2.10360911426538, + "grad_norm": 1.1545004952040472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433750 + }, + { + "epoch": 2.103657612458216, + "grad_norm": 1.3450778268975228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433760 + }, + { + "epoch": 2.1037061106510517, + "grad_norm": 1.0754945378721459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433770 + }, + { + "epoch": 2.103754608843888, + "grad_norm": 1.3352241978736856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433780 + }, + { + "epoch": 2.103803107036724, + "grad_norm": 1.2694039597249684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433790 + }, + { + "epoch": 2.10385160522956, + "grad_norm": 1.5921896689974346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433800 + }, + { + "epoch": 2.1039001034223963, + "grad_norm": 1.263579463284259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433810 + }, + { + "epoch": 2.1039486016152322, + "grad_norm": 1.4809454995656779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433820 + }, + { + "epoch": 2.1039970998080686, + "grad_norm": 9.48662481903284e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433830 + }, + { + "epoch": 2.1040455980009045, + "grad_norm": 1.1606686278753386e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433840 + }, + { + "epoch": 2.1040940961937404, + "grad_norm": 1.2692844109096768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433850 + }, + { + "epoch": 2.104142594386577, + "grad_norm": 9.20036491436349e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433860 + }, + { + "epoch": 2.1041910925794127, + "grad_norm": 1.619928902130141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433870 + }, + { + "epoch": 2.1042395907722486, + "grad_norm": 1.0753473667080016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433880 + }, + { + "epoch": 2.104288088965085, + "grad_norm": 8.645150373354227e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433890 + }, + { + "epoch": 2.104336587157921, + "grad_norm": 1.6936837710090913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433900 + }, + { + "epoch": 2.1043850853507573, + "grad_norm": 1.0686589391184498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433910 + }, + { + "epoch": 2.1044335835435932, + "grad_norm": 6.425829646161674e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433920 + }, + { + "epoch": 2.104482081736429, + "grad_norm": 9.240752163464094e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433930 + }, + { + "epoch": 2.1045305799292655, + "grad_norm": 1.9357189628976812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433940 + }, + { + "epoch": 2.1045790781221014, + "grad_norm": 8.436582099591305e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433950 + }, + { + "epoch": 2.1046275763149374, + "grad_norm": 9.826717217720216e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433960 + }, + { + "epoch": 2.1046760745077737, + "grad_norm": 1.3000651222228043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433970 + }, + { + "epoch": 2.1047245727006096, + "grad_norm": 8.221456404555738e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433980 + }, + { + "epoch": 2.104773070893446, + "grad_norm": 1.770443525117571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 433990 + }, + { + "epoch": 2.104821569086282, + "grad_norm": 1.0499872971081459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434000 + }, + { + "epoch": 2.104870067279118, + "grad_norm": 1.6114114487209008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434010 + }, + { + "epoch": 2.104918565471954, + "grad_norm": 1.2002960403378893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434020 + }, + { + "epoch": 2.10496706366479, + "grad_norm": 1.5755380999848967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434030 + }, + { + "epoch": 2.105015561857626, + "grad_norm": 9.336913464608187e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434040 + }, + { + "epoch": 2.1050640600504624, + "grad_norm": 1.7094368587322606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434050 + }, + { + "epoch": 2.1051125582432983, + "grad_norm": 1.381253067478383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434060 + }, + { + "epoch": 2.1051610564361347, + "grad_norm": 9.662251443387504e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434070 + }, + { + "epoch": 2.1052095546289706, + "grad_norm": 1.088286172290509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434080 + }, + { + "epoch": 2.1052580528218066, + "grad_norm": 1.0712326137252148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434090 + }, + { + "epoch": 2.105306551014643, + "grad_norm": 1.2835457141591178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434100 + }, + { + "epoch": 2.105355049207479, + "grad_norm": 1.1636258179237302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434110 + }, + { + "epoch": 2.105403547400315, + "grad_norm": 1.3135638354810908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434120 + }, + { + "epoch": 2.105452045593151, + "grad_norm": 1.3697416534341755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434130 + }, + { + "epoch": 2.105500543785987, + "grad_norm": 1.8614416674722634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434140 + }, + { + "epoch": 2.1055490419788234, + "grad_norm": 9.974058912121109e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434150 + }, + { + "epoch": 2.1055975401716593, + "grad_norm": 1.4672239423418887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434160 + }, + { + "epoch": 2.1056460383644953, + "grad_norm": 1.0416778550847994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434170 + }, + { + "epoch": 2.1056945365573316, + "grad_norm": 8.260230721646167e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434180 + }, + { + "epoch": 2.1057430347501676, + "grad_norm": 1.3209170646177881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434190 + }, + { + "epoch": 2.105791532943004, + "grad_norm": 1.7944973507155737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434200 + }, + { + "epoch": 2.10584003113584, + "grad_norm": 1.64141766845205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434210 + }, + { + "epoch": 2.1058885293286758, + "grad_norm": 1.2928223824815177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434220 + }, + { + "epoch": 2.105937027521512, + "grad_norm": 1.3777501806089276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434230 + }, + { + "epoch": 2.105985525714348, + "grad_norm": 1.49586103503907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434240 + }, + { + "epoch": 2.106034023907184, + "grad_norm": 1.1985273218328985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434250 + }, + { + "epoch": 2.1060825221000203, + "grad_norm": 1.2597189069651904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434260 + }, + { + "epoch": 2.1061310202928563, + "grad_norm": 1.0487155144289773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434270 + }, + { + "epoch": 2.1061795184856926, + "grad_norm": 6.191300361280128e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434280 + }, + { + "epoch": 2.1062280166785285, + "grad_norm": 1.0977809772327873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434290 + }, + { + "epoch": 2.1062765148713645, + "grad_norm": 9.651487609119158e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434300 + }, + { + "epoch": 2.106325013064201, + "grad_norm": 8.770672188518347e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434310 + }, + { + "epoch": 2.1063735112570368, + "grad_norm": 9.98362015280918e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434320 + }, + { + "epoch": 2.1064220094498727, + "grad_norm": 1.5822143595300986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434330 + }, + { + "epoch": 2.106470507642709, + "grad_norm": 1.506414015750579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434340 + }, + { + "epoch": 2.106519005835545, + "grad_norm": 1.3287837496989141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434350 + }, + { + "epoch": 2.1065675040283813, + "grad_norm": 1.0349509693696746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434360 + }, + { + "epoch": 2.1066160022212173, + "grad_norm": 1.284907735765728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434370 + }, + { + "epoch": 2.106664500414053, + "grad_norm": 2.1221614687760848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434380 + }, + { + "epoch": 2.1067129986068895, + "grad_norm": 7.0523551443102406e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434390 + }, + { + "epoch": 2.1067614967997255, + "grad_norm": 1.2110168867707216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434400 + }, + { + "epoch": 2.1068099949925614, + "grad_norm": 9.245721521722317e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434410 + }, + { + "epoch": 2.1068584931853978, + "grad_norm": 7.75931408014685e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434420 + }, + { + "epoch": 2.1069069913782337, + "grad_norm": 1.3206911120278164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434430 + }, + { + "epoch": 2.10695548957107, + "grad_norm": 1.4507271828279045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434440 + }, + { + "epoch": 2.107003987763906, + "grad_norm": 1.3678235433189911e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434450 + }, + { + "epoch": 2.107052485956742, + "grad_norm": 1.1806939426151075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434460 + }, + { + "epoch": 2.1071009841495782, + "grad_norm": 8.72422134534645e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434470 + }, + { + "epoch": 2.107149482342414, + "grad_norm": 1.0274555428679832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434480 + }, + { + "epoch": 2.10719798053525, + "grad_norm": 1.321891929251251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434490 + }, + { + "epoch": 2.1072464787280865, + "grad_norm": 1.033661511939954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434500 + }, + { + "epoch": 2.1072949769209224, + "grad_norm": 1.4088340272166988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434510 + }, + { + "epoch": 2.1073434751137587, + "grad_norm": 1.3386551422911452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434520 + }, + { + "epoch": 2.1073919733065947, + "grad_norm": 1.0132477967772502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434530 + }, + { + "epoch": 2.1074404714994306, + "grad_norm": 1.124959680964821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434540 + }, + { + "epoch": 2.107488969692267, + "grad_norm": 1.1559230017610389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434550 + }, + { + "epoch": 2.107537467885103, + "grad_norm": 1.0214076695547192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434560 + }, + { + "epoch": 2.107585966077939, + "grad_norm": 1.3596958226003153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434570 + }, + { + "epoch": 2.107634464270775, + "grad_norm": 7.880948338367944e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434580 + }, + { + "epoch": 2.107682962463611, + "grad_norm": 1.1522627296756127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434590 + }, + { + "epoch": 2.1077314606564475, + "grad_norm": 7.828185211167238e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434600 + }, + { + "epoch": 2.1077799588492834, + "grad_norm": 1.232358837199854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434610 + }, + { + "epoch": 2.1078284570421193, + "grad_norm": 8.009526375474252e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434620 + }, + { + "epoch": 2.1078769552349557, + "grad_norm": 1.0209938672289809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434630 + }, + { + "epoch": 2.1079254534277916, + "grad_norm": 7.584047168052166e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434640 + }, + { + "epoch": 2.107973951620628, + "grad_norm": 1.0520960991300399e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434650 + }, + { + "epoch": 2.108022449813464, + "grad_norm": 7.123257095287272e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434660 + }, + { + "epoch": 2.1080709480063, + "grad_norm": 1.5453018420430453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434670 + }, + { + "epoch": 2.108119446199136, + "grad_norm": 1.1664268662059385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434680 + }, + { + "epoch": 2.108167944391972, + "grad_norm": 1.4702129291777055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434690 + }, + { + "epoch": 2.108216442584808, + "grad_norm": 1.1759761164853444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434700 + }, + { + "epoch": 2.1082649407776444, + "grad_norm": 9.85196901837071e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434710 + }, + { + "epoch": 2.1083134389704803, + "grad_norm": 6.443087396945657e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434720 + }, + { + "epoch": 2.1083619371633167, + "grad_norm": 1.3367330353730722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434730 + }, + { + "epoch": 2.1084104353561526, + "grad_norm": 1.2287011408318449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434740 + }, + { + "epoch": 2.1084589335489885, + "grad_norm": 7.046052186154839e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434750 + }, + { + "epoch": 2.108507431741825, + "grad_norm": 1.509648406283759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434760 + }, + { + "epoch": 2.108555929934661, + "grad_norm": 1.2900228441026229e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434770 + }, + { + "epoch": 2.1086044281274967, + "grad_norm": 1.4242519164042733e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434780 + }, + { + "epoch": 2.108652926320333, + "grad_norm": 1.332344279347808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434790 + }, + { + "epoch": 2.108701424513169, + "grad_norm": 9.18715592490571e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434800 + }, + { + "epoch": 2.1087499227060054, + "grad_norm": 8.417389452120005e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434810 + }, + { + "epoch": 2.1087984208988413, + "grad_norm": 1.3886196192913758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434820 + }, + { + "epoch": 2.108846919091677, + "grad_norm": 1.1514153186453768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434830 + }, + { + "epoch": 2.1088954172845136, + "grad_norm": 1.6875940644922593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434840 + }, + { + "epoch": 2.1089439154773495, + "grad_norm": 1.4508789725198312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434850 + }, + { + "epoch": 2.1089924136701854, + "grad_norm": 1.5482664039723204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434860 + }, + { + "epoch": 2.109040911863022, + "grad_norm": 1.6761484644689517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434870 + }, + { + "epoch": 2.1090894100558577, + "grad_norm": 1.1926935883366241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434880 + }, + { + "epoch": 2.109137908248694, + "grad_norm": 1.042001862572306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434890 + }, + { + "epoch": 2.10918640644153, + "grad_norm": 1.4666445835587183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434900 + }, + { + "epoch": 2.109234904634366, + "grad_norm": 8.107660320888499e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434910 + }, + { + "epoch": 2.1092834028272023, + "grad_norm": 9.334546469119687e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434920 + }, + { + "epoch": 2.109331901020038, + "grad_norm": 1.4809665493942248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434930 + }, + { + "epoch": 2.109380399212874, + "grad_norm": 1.4713698703872069e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434940 + }, + { + "epoch": 2.1094288974057105, + "grad_norm": 1.4335998166359332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434950 + }, + { + "epoch": 2.1094773955985464, + "grad_norm": 1.2538624361013717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434960 + }, + { + "epoch": 2.1095258937913828, + "grad_norm": 1.3438941515175884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434970 + }, + { + "epoch": 2.1095743919842187, + "grad_norm": 1.2765358547994765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434980 + }, + { + "epoch": 2.1096228901770546, + "grad_norm": 1.0870739863833023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 434990 + }, + { + "epoch": 2.109671388369891, + "grad_norm": 1.0670277106328285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435000 + }, + { + "epoch": 2.109719886562727, + "grad_norm": 1.0391950411303696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435010 + }, + { + "epoch": 2.109768384755563, + "grad_norm": 6.8973213807055345e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435020 + }, + { + "epoch": 2.109816882948399, + "grad_norm": 1.850712294526602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435030 + }, + { + "epoch": 2.109865381141235, + "grad_norm": 2.1903552749336086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435040 + }, + { + "epoch": 2.1099138793340715, + "grad_norm": 1.1140797617770204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435050 + }, + { + "epoch": 2.1099623775269074, + "grad_norm": 7.4173565067781055e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435060 + }, + { + "epoch": 2.1100108757197433, + "grad_norm": 1.2585134712139734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435070 + }, + { + "epoch": 2.1100593739125797, + "grad_norm": 1.421095241482817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435080 + }, + { + "epoch": 2.1101078721054156, + "grad_norm": 1.3882749172466902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435090 + }, + { + "epoch": 2.1101563702982515, + "grad_norm": 1.5930664787333626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435100 + }, + { + "epoch": 2.110204868491088, + "grad_norm": 1.2503130086827241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435110 + }, + { + "epoch": 2.110253366683924, + "grad_norm": 1.5068804870566055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435120 + }, + { + "epoch": 2.11030186487676, + "grad_norm": 1.4082793597935961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435130 + }, + { + "epoch": 2.110350363069596, + "grad_norm": 1.0630033742131673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435140 + }, + { + "epoch": 2.110398861262432, + "grad_norm": 9.478653417716032e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435150 + }, + { + "epoch": 2.1104473594552684, + "grad_norm": 1.1752410600252006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435160 + }, + { + "epoch": 2.1104958576481043, + "grad_norm": 7.574220362016604e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435170 + }, + { + "epoch": 2.1105443558409407, + "grad_norm": 9.51254097714127e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435180 + }, + { + "epoch": 2.1105928540337766, + "grad_norm": 1.0944191330963804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435190 + }, + { + "epoch": 2.1106413522266125, + "grad_norm": 1.4638541045997044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435200 + }, + { + "epoch": 2.110689850419449, + "grad_norm": 1.0989126941751692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435210 + }, + { + "epoch": 2.110738348612285, + "grad_norm": 1.3826900513436158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435220 + }, + { + "epoch": 2.1107868468051207, + "grad_norm": 2.7915385558685557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435230 + }, + { + "epoch": 2.110835344997957, + "grad_norm": 1.4405227233282858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435240 + }, + { + "epoch": 2.110883843190793, + "grad_norm": 1.0518252047120313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435250 + }, + { + "epoch": 2.1109323413836294, + "grad_norm": 1.2574972174661525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435260 + }, + { + "epoch": 2.1109808395764653, + "grad_norm": 1.491859791258321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435270 + }, + { + "epoch": 2.1110293377693012, + "grad_norm": 1.0754898305265215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435280 + }, + { + "epoch": 2.1110778359621376, + "grad_norm": 1.9129599237999173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435290 + }, + { + "epoch": 2.1111263341549735, + "grad_norm": 1.011346650869882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435300 + }, + { + "epoch": 2.1111748323478094, + "grad_norm": 1.085040057802189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435310 + }, + { + "epoch": 2.111223330540646, + "grad_norm": 7.553065728416186e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435320 + }, + { + "epoch": 2.1112718287334817, + "grad_norm": 1.1053042925368572e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435330 + }, + { + "epoch": 2.111320326926318, + "grad_norm": 1.2809996619012054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435340 + }, + { + "epoch": 2.111368825119154, + "grad_norm": 1.0677435824391068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435350 + }, + { + "epoch": 2.11141732331199, + "grad_norm": 1.259275883569444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435360 + }, + { + "epoch": 2.1114658215048263, + "grad_norm": 1.5736603131699667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435370 + }, + { + "epoch": 2.1115143196976622, + "grad_norm": 9.907126674590927e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435380 + }, + { + "epoch": 2.111562817890498, + "grad_norm": 1.0146723461446072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435390 + }, + { + "epoch": 2.1116113160833345, + "grad_norm": 9.224895514137188e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435400 + }, + { + "epoch": 2.1116598142761704, + "grad_norm": 1.7858580392271506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435410 + }, + { + "epoch": 2.111708312469007, + "grad_norm": 1.277067607219351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435420 + }, + { + "epoch": 2.1117568106618427, + "grad_norm": 8.407210039251822e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435430 + }, + { + "epoch": 2.1118053088546787, + "grad_norm": 1.211480693541489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435440 + }, + { + "epoch": 2.111853807047515, + "grad_norm": 1.1232814678407976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435450 + }, + { + "epoch": 2.111902305240351, + "grad_norm": 8.025560660485098e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435460 + }, + { + "epoch": 2.111950803433187, + "grad_norm": 9.801238043394278e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435470 + }, + { + "epoch": 2.1119993016260232, + "grad_norm": 1.60694710871212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435480 + }, + { + "epoch": 2.112047799818859, + "grad_norm": 1.2844873609196839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435490 + }, + { + "epoch": 2.1120962980116955, + "grad_norm": 1.1109758446536944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435500 + }, + { + "epoch": 2.1121447962045314, + "grad_norm": 1.357665091461513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435510 + }, + { + "epoch": 2.1121932943973674, + "grad_norm": 9.87974946298209e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435520 + }, + { + "epoch": 2.1122417925902037, + "grad_norm": 7.807159363437677e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435530 + }, + { + "epoch": 2.1122902907830396, + "grad_norm": 9.809173917574299e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435540 + }, + { + "epoch": 2.112338788975876, + "grad_norm": 6.406836838834806e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435550 + }, + { + "epoch": 2.112387287168712, + "grad_norm": 8.397357476042089e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435560 + }, + { + "epoch": 2.112435785361548, + "grad_norm": 1.1360137719407248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435570 + }, + { + "epoch": 2.1124842835543842, + "grad_norm": 1.1984350400950916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435580 + }, + { + "epoch": 2.11253278174722, + "grad_norm": 1.2476676580774892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435590 + }, + { + "epoch": 2.112581279940056, + "grad_norm": 1.2596079734805699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435600 + }, + { + "epoch": 2.1126297781328924, + "grad_norm": 1.4831942785065166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435610 + }, + { + "epoch": 2.1126782763257284, + "grad_norm": 1.4741098119941398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435620 + }, + { + "epoch": 2.1127267745185643, + "grad_norm": 8.74895178526458e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435630 + }, + { + "epoch": 2.1127752727114006, + "grad_norm": 1.695450357885875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435640 + }, + { + "epoch": 2.1128237709042366, + "grad_norm": 0.008519060909748077, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 435650 + }, + { + "epoch": 2.112872269097073, + "grad_norm": 0.0014779368648305535, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 435660 + }, + { + "epoch": 2.112920767289909, + "grad_norm": 0.0001721197331789881, + "learning_rate": 0.0002, + "loss": 0.058, + "step": 435670 + }, + { + "epoch": 2.1129692654827448, + "grad_norm": 0.00010634256614139304, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 435680 + }, + { + "epoch": 2.113017763675581, + "grad_norm": 0.00037432549288496375, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435690 + }, + { + "epoch": 2.113066261868417, + "grad_norm": 2.603063876449596e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435700 + }, + { + "epoch": 2.1131147600612534, + "grad_norm": 0.0853448286652565, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435710 + }, + { + "epoch": 2.1131632582540893, + "grad_norm": 0.00019154961046297103, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 435720 + }, + { + "epoch": 2.1132117564469253, + "grad_norm": 1.4740024198545143e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435730 + }, + { + "epoch": 2.1132602546397616, + "grad_norm": 1.4098925930738915e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435740 + }, + { + "epoch": 2.1133087528325976, + "grad_norm": 3.8767586374888197e-05, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 435750 + }, + { + "epoch": 2.1133572510254335, + "grad_norm": 0.0002838369691744447, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435760 + }, + { + "epoch": 2.11340574921827, + "grad_norm": 0.00011132182407891378, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 435770 + }, + { + "epoch": 2.1134542474111058, + "grad_norm": 5.114267332828604e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435780 + }, + { + "epoch": 2.113502745603942, + "grad_norm": 5.14372150064446e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 435790 + }, + { + "epoch": 2.113551243796778, + "grad_norm": 3.956392902182415e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435800 + }, + { + "epoch": 2.113599741989614, + "grad_norm": 3.206267865607515e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435810 + }, + { + "epoch": 2.1136482401824503, + "grad_norm": 2.6349396648583934e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435820 + }, + { + "epoch": 2.1136967383752863, + "grad_norm": 2.5488498067716137e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435830 + }, + { + "epoch": 2.113745236568122, + "grad_norm": 1.9559623979148455e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435840 + }, + { + "epoch": 2.1137937347609586, + "grad_norm": 1.6718551705707796e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435850 + }, + { + "epoch": 2.1138422329537945, + "grad_norm": 1.5786992662469856e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435860 + }, + { + "epoch": 2.113890731146631, + "grad_norm": 1.4362377442012075e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435870 + }, + { + "epoch": 2.1139392293394668, + "grad_norm": 1.5475128748221323e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435880 + }, + { + "epoch": 2.1139877275323027, + "grad_norm": 1.2671356671489775e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435890 + }, + { + "epoch": 2.114036225725139, + "grad_norm": 1.1803766028606333e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435900 + }, + { + "epoch": 2.114084723917975, + "grad_norm": 1.1432998689997476e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435910 + }, + { + "epoch": 2.114133222110811, + "grad_norm": 1.0574670341156889e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435920 + }, + { + "epoch": 2.1141817203036473, + "grad_norm": 1.0259487680741586e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435930 + }, + { + "epoch": 2.114230218496483, + "grad_norm": 1.3320562175067607e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435940 + }, + { + "epoch": 2.1142787166893195, + "grad_norm": 1.0134453077625949e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435950 + }, + { + "epoch": 2.1143272148821555, + "grad_norm": 8.56609949551057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435960 + }, + { + "epoch": 2.1143757130749914, + "grad_norm": 8.479158168483991e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435970 + }, + { + "epoch": 2.1144242112678278, + "grad_norm": 7.79205311118858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435980 + }, + { + "epoch": 2.1144727094606637, + "grad_norm": 7.768514478811994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 435990 + }, + { + "epoch": 2.1145212076534996, + "grad_norm": 7.699092748225667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436000 + }, + { + "epoch": 2.114569705846336, + "grad_norm": 7.1506569838675205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436010 + }, + { + "epoch": 2.114618204039172, + "grad_norm": 7.56833333070972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436020 + }, + { + "epoch": 2.1146667022320083, + "grad_norm": 6.907697752467357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436030 + }, + { + "epoch": 2.114715200424844, + "grad_norm": 6.813050276832655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436040 + }, + { + "epoch": 2.11476369861768, + "grad_norm": 6.3822926676948555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436050 + }, + { + "epoch": 2.1148121968105165, + "grad_norm": 6.054339337424608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436060 + }, + { + "epoch": 2.1148606950033524, + "grad_norm": 6.2536655605072156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436070 + }, + { + "epoch": 2.1149091931961888, + "grad_norm": 6.257293989619939e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436080 + }, + { + "epoch": 2.1149576913890247, + "grad_norm": 5.81055201109848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436090 + }, + { + "epoch": 2.1150061895818606, + "grad_norm": 5.698130735254381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436100 + }, + { + "epoch": 2.115054687774697, + "grad_norm": 5.721950856241165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436110 + }, + { + "epoch": 2.115103185967533, + "grad_norm": 5.260574198473478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436120 + }, + { + "epoch": 2.115151684160369, + "grad_norm": 5.586378847510787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436130 + }, + { + "epoch": 2.115200182353205, + "grad_norm": 5.196044185140636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436140 + }, + { + "epoch": 2.115248680546041, + "grad_norm": 5.19982722835266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436150 + }, + { + "epoch": 2.115297178738877, + "grad_norm": 5.240256996330572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436160 + }, + { + "epoch": 2.1153456769317134, + "grad_norm": 5.233196134213358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436170 + }, + { + "epoch": 2.1153941751245493, + "grad_norm": 4.881353106611641e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436180 + }, + { + "epoch": 2.1154426733173857, + "grad_norm": 4.7757152970007155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436190 + }, + { + "epoch": 2.1154911715102216, + "grad_norm": 4.783604708791245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436200 + }, + { + "epoch": 2.1155396697030575, + "grad_norm": 4.650560185837094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436210 + }, + { + "epoch": 2.115588167895894, + "grad_norm": 5.1230322242190596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436220 + }, + { + "epoch": 2.11563666608873, + "grad_norm": 5.051234893471701e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436230 + }, + { + "epoch": 2.115685164281566, + "grad_norm": 4.367245765024563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436240 + }, + { + "epoch": 2.115733662474402, + "grad_norm": 4.263667960913153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436250 + }, + { + "epoch": 2.115782160667238, + "grad_norm": 4.22764924223884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436260 + }, + { + "epoch": 2.1158306588600744, + "grad_norm": 4.1441967368882615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436270 + }, + { + "epoch": 2.1158791570529103, + "grad_norm": 4.382081897347234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436280 + }, + { + "epoch": 2.115927655245746, + "grad_norm": 4.041191459691618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436290 + }, + { + "epoch": 2.1159761534385826, + "grad_norm": 4.04650745622348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436300 + }, + { + "epoch": 2.1160246516314185, + "grad_norm": 3.787989498960087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436310 + }, + { + "epoch": 2.116073149824255, + "grad_norm": 3.841413217742229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436320 + }, + { + "epoch": 2.116121648017091, + "grad_norm": 4.010983502666932e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436330 + }, + { + "epoch": 2.1161701462099267, + "grad_norm": 3.5627274428406963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436340 + }, + { + "epoch": 2.116218644402763, + "grad_norm": 5.116774900670862e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 436350 + }, + { + "epoch": 2.116267142595599, + "grad_norm": 7.719733548583463e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 436360 + }, + { + "epoch": 2.116315640788435, + "grad_norm": 1.8018246919382364e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436370 + }, + { + "epoch": 2.1163641389812713, + "grad_norm": 5.634218723571394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436380 + }, + { + "epoch": 2.116412637174107, + "grad_norm": 9.14278189156903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436390 + }, + { + "epoch": 2.1164611353669436, + "grad_norm": 9.028197382576764e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436400 + }, + { + "epoch": 2.1165096335597795, + "grad_norm": 7.81637390900869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436410 + }, + { + "epoch": 2.1165581317526154, + "grad_norm": 8.02825434220722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436420 + }, + { + "epoch": 2.116606629945452, + "grad_norm": 4.553674443741329e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436430 + }, + { + "epoch": 2.1166551281382877, + "grad_norm": 6.823500370956026e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436440 + }, + { + "epoch": 2.1167036263311236, + "grad_norm": 7.847254892112687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436450 + }, + { + "epoch": 2.11675212452396, + "grad_norm": 6.643214419455035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436460 + }, + { + "epoch": 2.116800622716796, + "grad_norm": 6.985846539464546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436470 + }, + { + "epoch": 2.1168491209096323, + "grad_norm": 3.8529778976226225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436480 + }, + { + "epoch": 2.116897619102468, + "grad_norm": 5.923192020418355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436490 + }, + { + "epoch": 2.116946117295304, + "grad_norm": 6.786439826100832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436500 + }, + { + "epoch": 2.1169946154881405, + "grad_norm": 5.778801096312236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436510 + }, + { + "epoch": 2.1170431136809764, + "grad_norm": 5.532877821678994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436520 + }, + { + "epoch": 2.1170916118738123, + "grad_norm": 3.5129014577250928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436530 + }, + { + "epoch": 2.1171401100666487, + "grad_norm": 5.292194146022666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436540 + }, + { + "epoch": 2.1171886082594846, + "grad_norm": 9.236220648745075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436550 + }, + { + "epoch": 2.117237106452321, + "grad_norm": 5.447256171464687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436560 + }, + { + "epoch": 2.117285604645157, + "grad_norm": 4.700240424426738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436570 + }, + { + "epoch": 2.117334102837993, + "grad_norm": 3.094741714448901e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436580 + }, + { + "epoch": 2.117382601030829, + "grad_norm": 4.724181053461507e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436590 + }, + { + "epoch": 2.117431099223665, + "grad_norm": 4.1829480323940516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436600 + }, + { + "epoch": 2.1174795974165015, + "grad_norm": 4.0386175896856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436610 + }, + { + "epoch": 2.1175280956093374, + "grad_norm": 4.125541636312846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436620 + }, + { + "epoch": 2.1175765938021733, + "grad_norm": 2.7382286589272553e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436630 + }, + { + "epoch": 2.1176250919950097, + "grad_norm": 3.990308869106229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436640 + }, + { + "epoch": 2.1176735901878456, + "grad_norm": 3.957827175327111e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436650 + }, + { + "epoch": 2.1177220883806815, + "grad_norm": 3.6424016798264347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436660 + }, + { + "epoch": 2.117770586573518, + "grad_norm": 3.67302732229291e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436670 + }, + { + "epoch": 2.117819084766354, + "grad_norm": 2.3406471427733777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436680 + }, + { + "epoch": 2.11786758295919, + "grad_norm": 3.700081833812874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436690 + }, + { + "epoch": 2.117916081152026, + "grad_norm": 3.4448441965651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436700 + }, + { + "epoch": 2.117964579344862, + "grad_norm": 3.1689146453572903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436710 + }, + { + "epoch": 2.1180130775376984, + "grad_norm": 3.4206968848593533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436720 + }, + { + "epoch": 2.1180615757305343, + "grad_norm": 2.036208115896443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436730 + }, + { + "epoch": 2.1181100739233703, + "grad_norm": 3.130984623567201e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436740 + }, + { + "epoch": 2.1181585721162066, + "grad_norm": 3.5384348393563414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436750 + }, + { + "epoch": 2.1182070703090425, + "grad_norm": 3.174517360093887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436760 + }, + { + "epoch": 2.118255568501879, + "grad_norm": 3.2312607345375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436770 + }, + { + "epoch": 2.118304066694715, + "grad_norm": 1.8902831016021082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436780 + }, + { + "epoch": 2.1183525648875507, + "grad_norm": 2.8815950372518273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436790 + }, + { + "epoch": 2.118401063080387, + "grad_norm": 2.7300179681333248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436800 + }, + { + "epoch": 2.118449561273223, + "grad_norm": 2.656315700733103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436810 + }, + { + "epoch": 2.118498059466059, + "grad_norm": 2.826721811288735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436820 + }, + { + "epoch": 2.1185465576588953, + "grad_norm": 1.6974277059489395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436830 + }, + { + "epoch": 2.1185950558517312, + "grad_norm": 2.57216584031994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436840 + }, + { + "epoch": 2.1186435540445676, + "grad_norm": 2.383766741331783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436850 + }, + { + "epoch": 2.1186920522374035, + "grad_norm": 2.6209058887616266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436860 + }, + { + "epoch": 2.1187405504302395, + "grad_norm": 2.624384251248557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436870 + }, + { + "epoch": 2.118789048623076, + "grad_norm": 1.3515406180886202e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436880 + }, + { + "epoch": 2.1188375468159117, + "grad_norm": 2.5602207642805297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436890 + }, + { + "epoch": 2.1188860450087477, + "grad_norm": 2.0420836790435715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436900 + }, + { + "epoch": 2.118934543201584, + "grad_norm": 2.285188429596019e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436910 + }, + { + "epoch": 2.11898304139442, + "grad_norm": 2.1539590306929313e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436920 + }, + { + "epoch": 2.1190315395872563, + "grad_norm": 1.2639849273909931e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436930 + }, + { + "epoch": 2.1190800377800922, + "grad_norm": 2.4450134787912248e-06, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 436940 + }, + { + "epoch": 2.119128535972928, + "grad_norm": 8.947010428528301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436950 + }, + { + "epoch": 2.1191770341657645, + "grad_norm": 2.1645153537974693e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436960 + }, + { + "epoch": 2.1192255323586005, + "grad_norm": 2.7098825739813037e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436970 + }, + { + "epoch": 2.1192740305514364, + "grad_norm": 3.315779031254351e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436980 + }, + { + "epoch": 2.1193225287442727, + "grad_norm": 1.8106004063156433e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 436990 + }, + { + "epoch": 2.1193710269371087, + "grad_norm": 1.485935445089126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437000 + }, + { + "epoch": 2.119419525129945, + "grad_norm": 1.3196478903410025e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437010 + }, + { + "epoch": 2.119468023322781, + "grad_norm": 1.0608110642351676e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437020 + }, + { + "epoch": 2.119516521515617, + "grad_norm": 1.2767995031026658e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437030 + }, + { + "epoch": 2.1195650197084532, + "grad_norm": 8.314150363730732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437040 + }, + { + "epoch": 2.119613517901289, + "grad_norm": 8.310364137287252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437050 + }, + { + "epoch": 2.119662016094125, + "grad_norm": 7.5773837124870624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437060 + }, + { + "epoch": 2.1197105142869614, + "grad_norm": 6.7529367697716225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437070 + }, + { + "epoch": 2.1197590124797974, + "grad_norm": 7.808892405591905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437080 + }, + { + "epoch": 2.1198075106726337, + "grad_norm": 6.133775968919508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437090 + }, + { + "epoch": 2.1198560088654697, + "grad_norm": 5.583578058576677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437100 + }, + { + "epoch": 2.1199045070583056, + "grad_norm": 5.141862857271917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437110 + }, + { + "epoch": 2.119953005251142, + "grad_norm": 5.129840246809181e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437120 + }, + { + "epoch": 2.120001503443978, + "grad_norm": 5.975915883027483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437130 + }, + { + "epoch": 2.1200500016368142, + "grad_norm": 4.804013315151678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437140 + }, + { + "epoch": 2.12009849982965, + "grad_norm": 4.306195478420705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437150 + }, + { + "epoch": 2.120146998022486, + "grad_norm": 4.3484237721713725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437160 + }, + { + "epoch": 2.1201954962153224, + "grad_norm": 4.199103386781644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437170 + }, + { + "epoch": 2.1202439944081584, + "grad_norm": 5.596719802269945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437180 + }, + { + "epoch": 2.1202924926009943, + "grad_norm": 3.7345762393670157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437190 + }, + { + "epoch": 2.1203409907938306, + "grad_norm": 3.511192289806786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437200 + }, + { + "epoch": 2.1203894889866666, + "grad_norm": 3.6273293062549783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437210 + }, + { + "epoch": 2.120437987179503, + "grad_norm": 3.477903874227195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437220 + }, + { + "epoch": 2.120486485372339, + "grad_norm": 4.3403692870924715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437230 + }, + { + "epoch": 2.120534983565175, + "grad_norm": 3.265027316956548e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437240 + }, + { + "epoch": 2.120583481758011, + "grad_norm": 3.015503807546338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437250 + }, + { + "epoch": 2.120631979950847, + "grad_norm": 3.010200998687651e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437260 + }, + { + "epoch": 2.120680478143683, + "grad_norm": 2.820452664309414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437270 + }, + { + "epoch": 2.1207289763365194, + "grad_norm": 3.0963165045250207e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437280 + }, + { + "epoch": 2.1207774745293553, + "grad_norm": 2.705555061766063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437290 + }, + { + "epoch": 2.1208259727221916, + "grad_norm": 2.7741534722736105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437300 + }, + { + "epoch": 2.1208744709150276, + "grad_norm": 2.6254540443915175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437310 + }, + { + "epoch": 2.1209229691078635, + "grad_norm": 2.7203459467273206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437320 + }, + { + "epoch": 2.1209714673007, + "grad_norm": 2.832132850016933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437330 + }, + { + "epoch": 2.1210199654935358, + "grad_norm": 2.3184259134723106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437340 + }, + { + "epoch": 2.1210684636863717, + "grad_norm": 2.3288503143703565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437350 + }, + { + "epoch": 2.121116961879208, + "grad_norm": 2.293034185640863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437360 + }, + { + "epoch": 2.121165460072044, + "grad_norm": 2.2029046249372186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437370 + }, + { + "epoch": 2.1212139582648804, + "grad_norm": 2.362842906222795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437380 + }, + { + "epoch": 2.1212624564577163, + "grad_norm": 2.1395580915850587e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437390 + }, + { + "epoch": 2.121310954650552, + "grad_norm": 2.0881736872979673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437400 + }, + { + "epoch": 2.1213594528433886, + "grad_norm": 1.8669625205802731e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437410 + }, + { + "epoch": 2.1214079510362245, + "grad_norm": 1.975318809854798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437420 + }, + { + "epoch": 2.1214564492290604, + "grad_norm": 2.1512464627448935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437430 + }, + { + "epoch": 2.1215049474218968, + "grad_norm": 1.809359105209296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437440 + }, + { + "epoch": 2.1215534456147327, + "grad_norm": 1.7807260519475676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437450 + }, + { + "epoch": 2.121601943807569, + "grad_norm": 1.8998144923898508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437460 + }, + { + "epoch": 2.121650442000405, + "grad_norm": 1.8838143205357483e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437470 + }, + { + "epoch": 2.121698940193241, + "grad_norm": 1.868359618129034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437480 + }, + { + "epoch": 2.1217474383860773, + "grad_norm": 1.7544398360769264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437490 + }, + { + "epoch": 2.121795936578913, + "grad_norm": 1.729127347971371e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437500 + }, + { + "epoch": 2.121844434771749, + "grad_norm": 1.8214874444311135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437510 + }, + { + "epoch": 2.1218929329645855, + "grad_norm": 1.7946050547834602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437520 + }, + { + "epoch": 2.1219414311574214, + "grad_norm": 1.637300670154218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437530 + }, + { + "epoch": 2.1219899293502578, + "grad_norm": 1.634426098462427e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437540 + }, + { + "epoch": 2.1220384275430937, + "grad_norm": 1.5939867807901464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437550 + }, + { + "epoch": 2.1220869257359296, + "grad_norm": 1.547438387206057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437560 + }, + { + "epoch": 2.122135423928766, + "grad_norm": 1.7516280195195577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437570 + }, + { + "epoch": 2.122183922121602, + "grad_norm": 1.6240790046140319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437580 + }, + { + "epoch": 2.122232420314438, + "grad_norm": 1.4563585182258976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437590 + }, + { + "epoch": 2.122280918507274, + "grad_norm": 1.4629980569225154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437600 + }, + { + "epoch": 2.12232941670011, + "grad_norm": 1.4850281786493724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437610 + }, + { + "epoch": 2.1223779148929465, + "grad_norm": 1.43867055157898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437620 + }, + { + "epoch": 2.1224264130857824, + "grad_norm": 1.3371309250942431e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437630 + }, + { + "epoch": 2.1224749112786183, + "grad_norm": 1.332538772658154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437640 + }, + { + "epoch": 2.1225234094714547, + "grad_norm": 1.4423211496250588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437650 + }, + { + "epoch": 2.1225719076642906, + "grad_norm": 1.3136417464920669e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437660 + }, + { + "epoch": 2.122620405857127, + "grad_norm": 1.3255793192001875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437670 + }, + { + "epoch": 2.122668904049963, + "grad_norm": 1.2965640507900389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437680 + }, + { + "epoch": 2.122717402242799, + "grad_norm": 1.3140980854586815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437690 + }, + { + "epoch": 2.122765900435635, + "grad_norm": 1.4556445648850058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437700 + }, + { + "epoch": 2.122814398628471, + "grad_norm": 1.2644522939808667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437710 + }, + { + "epoch": 2.122862896821307, + "grad_norm": 1.2005062899334007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437720 + }, + { + "epoch": 2.1229113950141434, + "grad_norm": 1.3309557971297181e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437730 + }, + { + "epoch": 2.1229598932069793, + "grad_norm": 1.2017367225780617e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437740 + }, + { + "epoch": 2.1230083913998157, + "grad_norm": 1.2249496421645745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437750 + }, + { + "epoch": 2.1230568895926516, + "grad_norm": 1.1535981911947601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437760 + }, + { + "epoch": 2.1231053877854875, + "grad_norm": 1.2255597994226264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437770 + }, + { + "epoch": 2.123153885978324, + "grad_norm": 1.0722408205765532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437780 + }, + { + "epoch": 2.12320238417116, + "grad_norm": 1.0700637176341843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437790 + }, + { + "epoch": 2.1232508823639957, + "grad_norm": 1.1328125992804416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437800 + }, + { + "epoch": 2.123299380556832, + "grad_norm": 1.0993028354278067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437810 + }, + { + "epoch": 2.123347878749668, + "grad_norm": 1.2430102742655436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437820 + }, + { + "epoch": 2.1233963769425044, + "grad_norm": 1.081401705960161e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437830 + }, + { + "epoch": 2.1234448751353403, + "grad_norm": 1.0032731552200858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437840 + }, + { + "epoch": 2.1234933733281762, + "grad_norm": 1.1078125226049451e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437850 + }, + { + "epoch": 2.1235418715210126, + "grad_norm": 1.210475602420047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437860 + }, + { + "epoch": 2.1235903697138485, + "grad_norm": 1.1004781299561728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437870 + }, + { + "epoch": 2.1236388679066844, + "grad_norm": 1.05799858829414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437880 + }, + { + "epoch": 2.123687366099521, + "grad_norm": 1.0179917353525525e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437890 + }, + { + "epoch": 2.1237358642923567, + "grad_norm": 1.0075794989461428e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437900 + }, + { + "epoch": 2.123784362485193, + "grad_norm": 9.852586799752316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437910 + }, + { + "epoch": 2.123832860678029, + "grad_norm": 1.0506386161068804e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437920 + }, + { + "epoch": 2.123881358870865, + "grad_norm": 1.0005306876337272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437930 + }, + { + "epoch": 2.1239298570637013, + "grad_norm": 9.258465638595226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437940 + }, + { + "epoch": 2.123978355256537, + "grad_norm": 9.192700645144214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437950 + }, + { + "epoch": 2.124026853449373, + "grad_norm": 9.643749763199594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437960 + }, + { + "epoch": 2.1240753516422095, + "grad_norm": 9.458848921894969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437970 + }, + { + "epoch": 2.1241238498350454, + "grad_norm": 8.79784636254044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437980 + }, + { + "epoch": 2.124172348027882, + "grad_norm": 9.537582172924886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 437990 + }, + { + "epoch": 2.1242208462207177, + "grad_norm": 8.988104696072696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438000 + }, + { + "epoch": 2.1242693444135536, + "grad_norm": 9.046157174452674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438010 + }, + { + "epoch": 2.12431784260639, + "grad_norm": 8.719399033907393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438020 + }, + { + "epoch": 2.124366340799226, + "grad_norm": 8.889209084372851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438030 + }, + { + "epoch": 2.124414838992062, + "grad_norm": 8.472365493616962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438040 + }, + { + "epoch": 2.124463337184898, + "grad_norm": 9.255060149371275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438050 + }, + { + "epoch": 2.124511835377734, + "grad_norm": 9.00518159596686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438060 + }, + { + "epoch": 2.1245603335705705, + "grad_norm": 7.862382176426763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438070 + }, + { + "epoch": 2.1246088317634064, + "grad_norm": 8.600852652307367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438080 + }, + { + "epoch": 2.1246573299562423, + "grad_norm": 8.509878739459964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438090 + }, + { + "epoch": 2.1247058281490787, + "grad_norm": 8.249244842772896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438100 + }, + { + "epoch": 2.1247543263419146, + "grad_norm": 7.1430605430578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438110 + }, + { + "epoch": 2.1248028245347506, + "grad_norm": 7.637829639861593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438120 + }, + { + "epoch": 2.124851322727587, + "grad_norm": 7.266914963111049e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438130 + }, + { + "epoch": 2.124899820920423, + "grad_norm": 7.964771384649794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438140 + }, + { + "epoch": 2.124948319113259, + "grad_norm": 7.131875463528559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438150 + }, + { + "epoch": 2.124996817306095, + "grad_norm": 7.205033512036607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438160 + }, + { + "epoch": 2.125045315498931, + "grad_norm": 7.901360845607996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438170 + }, + { + "epoch": 2.1250938136917674, + "grad_norm": 7.68588506616652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438180 + }, + { + "epoch": 2.1251423118846033, + "grad_norm": 7.665614703000756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438190 + }, + { + "epoch": 2.1251908100774397, + "grad_norm": 7.808094437677937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438200 + }, + { + "epoch": 2.1252393082702756, + "grad_norm": 7.091191491781501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438210 + }, + { + "epoch": 2.1252878064631116, + "grad_norm": 7.340057095461816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438220 + }, + { + "epoch": 2.125336304655948, + "grad_norm": 6.421412308554864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438230 + }, + { + "epoch": 2.125384802848784, + "grad_norm": 7.263047336891759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438240 + }, + { + "epoch": 2.1254333010416198, + "grad_norm": 7.106997372829937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438250 + }, + { + "epoch": 2.125481799234456, + "grad_norm": 6.839219395260443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438260 + }, + { + "epoch": 2.125530297427292, + "grad_norm": 6.723789738316555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438270 + }, + { + "epoch": 2.125578795620128, + "grad_norm": 6.024645813340612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438280 + }, + { + "epoch": 2.1256272938129643, + "grad_norm": 6.507228249574837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438290 + }, + { + "epoch": 2.1256757920058003, + "grad_norm": 6.654590265497973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438300 + }, + { + "epoch": 2.1257242901986366, + "grad_norm": 6.802967504881963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438310 + }, + { + "epoch": 2.1257727883914725, + "grad_norm": 6.620337558160827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438320 + }, + { + "epoch": 2.1258212865843085, + "grad_norm": 6.462498731707456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438330 + }, + { + "epoch": 2.125869784777145, + "grad_norm": 6.178451030791621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438340 + }, + { + "epoch": 2.1259182829699808, + "grad_norm": 6.544857455992314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438350 + }, + { + "epoch": 2.125966781162817, + "grad_norm": 6.024667413839779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438360 + }, + { + "epoch": 2.126015279355653, + "grad_norm": 6.751540126970212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438370 + }, + { + "epoch": 2.126063777548489, + "grad_norm": 6.203542852745159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438380 + }, + { + "epoch": 2.1261122757413253, + "grad_norm": 6.008518766975612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438390 + }, + { + "epoch": 2.1261607739341613, + "grad_norm": 5.949711976427352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438400 + }, + { + "epoch": 2.126209272126997, + "grad_norm": 5.78776507609291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438410 + }, + { + "epoch": 2.1262577703198335, + "grad_norm": 6.201075279932411e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438420 + }, + { + "epoch": 2.1263062685126695, + "grad_norm": 5.553772552957525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438430 + }, + { + "epoch": 2.126354766705506, + "grad_norm": 6.061866315576481e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438440 + }, + { + "epoch": 2.1264032648983417, + "grad_norm": 5.695750928680354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438450 + }, + { + "epoch": 2.1264517630911777, + "grad_norm": 5.368806341721211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438460 + }, + { + "epoch": 2.126500261284014, + "grad_norm": 5.865172738594993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438470 + }, + { + "epoch": 2.12654875947685, + "grad_norm": 5.901489430470974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438480 + }, + { + "epoch": 2.126597257669686, + "grad_norm": 5.575657269218937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438490 + }, + { + "epoch": 2.1266457558625222, + "grad_norm": 5.923839125898667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438500 + }, + { + "epoch": 2.126694254055358, + "grad_norm": 5.729477834393037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438510 + }, + { + "epoch": 2.1267427522481945, + "grad_norm": 5.664318223352893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438520 + }, + { + "epoch": 2.1267912504410305, + "grad_norm": 5.367633093555924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438530 + }, + { + "epoch": 2.1268397486338664, + "grad_norm": 5.435584853330511e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438540 + }, + { + "epoch": 2.1268882468267027, + "grad_norm": 5.716627242691175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438550 + }, + { + "epoch": 2.1269367450195387, + "grad_norm": 5.565802325691038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438560 + }, + { + "epoch": 2.126985243212375, + "grad_norm": 5.612935751742043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438570 + }, + { + "epoch": 2.127033741405211, + "grad_norm": 4.769264592141553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438580 + }, + { + "epoch": 2.127082239598047, + "grad_norm": 5.251648644843954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438590 + }, + { + "epoch": 2.1271307377908832, + "grad_norm": 5.069387611911225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438600 + }, + { + "epoch": 2.127179235983719, + "grad_norm": 4.889719207312737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438610 + }, + { + "epoch": 2.127227734176555, + "grad_norm": 5.082300162939646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438620 + }, + { + "epoch": 2.1272762323693915, + "grad_norm": 4.5403376702779497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438630 + }, + { + "epoch": 2.1273247305622274, + "grad_norm": 5.373456133384025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438640 + }, + { + "epoch": 2.1273732287550633, + "grad_norm": 5.365423021430615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438650 + }, + { + "epoch": 2.1274217269478997, + "grad_norm": 4.7663453983659565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438660 + }, + { + "epoch": 2.1274702251407356, + "grad_norm": 4.85880377709691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438670 + }, + { + "epoch": 2.127518723333572, + "grad_norm": 4.894258722742961e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438680 + }, + { + "epoch": 2.127567221526408, + "grad_norm": 5.145137151885137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438690 + }, + { + "epoch": 2.127615719719244, + "grad_norm": 4.6604390036009136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438700 + }, + { + "epoch": 2.12766421791208, + "grad_norm": 5.082529241917655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438710 + }, + { + "epoch": 2.127712716104916, + "grad_norm": 4.888132707492332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438720 + }, + { + "epoch": 2.1277612142977524, + "grad_norm": 5.632099941976776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438730 + }, + { + "epoch": 2.1278097124905884, + "grad_norm": 4.5926853431410564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438740 + }, + { + "epoch": 2.1278582106834243, + "grad_norm": 5.009479195905442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438750 + }, + { + "epoch": 2.1279067088762607, + "grad_norm": 4.382512486245105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438760 + }, + { + "epoch": 2.1279552070690966, + "grad_norm": 4.892358447250444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438770 + }, + { + "epoch": 2.1280037052619325, + "grad_norm": 4.321540245655342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438780 + }, + { + "epoch": 2.128052203454769, + "grad_norm": 4.714651424819749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438790 + }, + { + "epoch": 2.128100701647605, + "grad_norm": 4.4633577545027947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438800 + }, + { + "epoch": 2.128149199840441, + "grad_norm": 4.482142799133726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438810 + }, + { + "epoch": 2.128197698033277, + "grad_norm": 4.610972723639861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438820 + }, + { + "epoch": 2.128246196226113, + "grad_norm": 4.421642358920508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438830 + }, + { + "epoch": 2.1282946944189494, + "grad_norm": 4.402110960199934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438840 + }, + { + "epoch": 2.1283431926117853, + "grad_norm": 4.248468599143962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438850 + }, + { + "epoch": 2.128391690804621, + "grad_norm": 4.426404416335572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438860 + }, + { + "epoch": 2.1284401889974576, + "grad_norm": 4.5623735900335305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438870 + }, + { + "epoch": 2.1284886871902935, + "grad_norm": 3.8592253304159385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438880 + }, + { + "epoch": 2.12853718538313, + "grad_norm": 4.1191364630321914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438890 + }, + { + "epoch": 2.128585683575966, + "grad_norm": 4.573684293518454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438900 + }, + { + "epoch": 2.1286341817688017, + "grad_norm": 4.221541018978314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438910 + }, + { + "epoch": 2.128682679961638, + "grad_norm": 4.384708631732792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438920 + }, + { + "epoch": 2.128731178154474, + "grad_norm": 4.073887112099328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438930 + }, + { + "epoch": 2.12877967634731, + "grad_norm": 4.147703691614879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438940 + }, + { + "epoch": 2.1288281745401463, + "grad_norm": 4.3345008293727005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438950 + }, + { + "epoch": 2.128876672732982, + "grad_norm": 4.2555035406621755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438960 + }, + { + "epoch": 2.1289251709258186, + "grad_norm": 3.993895347775833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438970 + }, + { + "epoch": 2.1289736691186545, + "grad_norm": 3.6702394368148816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438980 + }, + { + "epoch": 2.1290221673114904, + "grad_norm": 4.0828092551237205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 438990 + }, + { + "epoch": 2.1290706655043268, + "grad_norm": 3.940665749269101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439000 + }, + { + "epoch": 2.1291191636971627, + "grad_norm": 4.1951520302063727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439010 + }, + { + "epoch": 2.1291676618899986, + "grad_norm": 3.641713419710868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439020 + }, + { + "epoch": 2.129216160082835, + "grad_norm": 3.8739278807042865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439030 + }, + { + "epoch": 2.129264658275671, + "grad_norm": 3.927992793251178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439040 + }, + { + "epoch": 2.1293131564685073, + "grad_norm": 3.877091785398079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439050 + }, + { + "epoch": 2.129361654661343, + "grad_norm": 4.0876392404243234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439060 + }, + { + "epoch": 2.129410152854179, + "grad_norm": 4.0786437693896005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439070 + }, + { + "epoch": 2.1294586510470155, + "grad_norm": 3.590023425203981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439080 + }, + { + "epoch": 2.1295071492398514, + "grad_norm": 4.1641510506451596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439090 + }, + { + "epoch": 2.1295556474326878, + "grad_norm": 4.006482470231276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439100 + }, + { + "epoch": 2.1296041456255237, + "grad_norm": 4.2526485799498914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439110 + }, + { + "epoch": 2.1296526438183596, + "grad_norm": 3.8291562987069483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439120 + }, + { + "epoch": 2.129701142011196, + "grad_norm": 3.5417949106886226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439130 + }, + { + "epoch": 2.129749640204032, + "grad_norm": 4.006216727248102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439140 + }, + { + "epoch": 2.129798138396868, + "grad_norm": 3.7959540577503503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439150 + }, + { + "epoch": 2.129846636589704, + "grad_norm": 3.85592699103654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439160 + }, + { + "epoch": 2.12989513478254, + "grad_norm": 3.7549872899944603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439170 + }, + { + "epoch": 2.129943632975376, + "grad_norm": 3.245763195991458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439180 + }, + { + "epoch": 2.1299921311682124, + "grad_norm": 3.9566813825331337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439190 + }, + { + "epoch": 2.1300406293610483, + "grad_norm": 3.8818708958388015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439200 + }, + { + "epoch": 2.1300891275538847, + "grad_norm": 3.5628158912004437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439210 + }, + { + "epoch": 2.1301376257467206, + "grad_norm": 3.6924191704201803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439220 + }, + { + "epoch": 2.1301861239395565, + "grad_norm": 3.3346177019666357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439230 + }, + { + "epoch": 2.130234622132393, + "grad_norm": 3.7903819816165196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439240 + }, + { + "epoch": 2.130283120325229, + "grad_norm": 3.845172216188075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439250 + }, + { + "epoch": 2.130331618518065, + "grad_norm": 3.65597117024663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439260 + }, + { + "epoch": 2.130380116710901, + "grad_norm": 3.619105086727359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439270 + }, + { + "epoch": 2.130428614903737, + "grad_norm": 3.014595222339267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439280 + }, + { + "epoch": 2.1304771130965734, + "grad_norm": 3.4352274269622285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439290 + }, + { + "epoch": 2.1305256112894093, + "grad_norm": 3.4505842450016644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439300 + }, + { + "epoch": 2.1305741094822452, + "grad_norm": 5.488234364747768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439310 + }, + { + "epoch": 2.1306226076750816, + "grad_norm": 3.3966153978326474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439320 + }, + { + "epoch": 2.1306711058679175, + "grad_norm": 3.1622383289686695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439330 + }, + { + "epoch": 2.130719604060754, + "grad_norm": 3.310871363737533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439340 + }, + { + "epoch": 2.13076810225359, + "grad_norm": 3.454879333730787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439350 + }, + { + "epoch": 2.1308166004464257, + "grad_norm": 3.298029014331405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439360 + }, + { + "epoch": 2.130865098639262, + "grad_norm": 4.0092859876494913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439370 + }, + { + "epoch": 2.130913596832098, + "grad_norm": 3.1897786811896367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439380 + }, + { + "epoch": 2.130962095024934, + "grad_norm": 3.8794374290773703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439390 + }, + { + "epoch": 2.1310105932177703, + "grad_norm": 3.6104594869357243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439400 + }, + { + "epoch": 2.1310590914106062, + "grad_norm": 3.1726958127364924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439410 + }, + { + "epoch": 2.1311075896034426, + "grad_norm": 3.153642182951444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439420 + }, + { + "epoch": 2.1311560877962785, + "grad_norm": 3.169372462252795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439430 + }, + { + "epoch": 2.1312045859891144, + "grad_norm": 3.17281120487678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439440 + }, + { + "epoch": 2.131253084181951, + "grad_norm": 3.365704799307423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439450 + }, + { + "epoch": 2.1313015823747867, + "grad_norm": 3.621673840825679e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439460 + }, + { + "epoch": 2.1313500805676227, + "grad_norm": 3.1151475354818103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439470 + }, + { + "epoch": 2.131398578760459, + "grad_norm": 2.9440676030390023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439480 + }, + { + "epoch": 2.131447076953295, + "grad_norm": 3.0929612648833427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439490 + }, + { + "epoch": 2.1314955751461313, + "grad_norm": 3.1239721920428565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439500 + }, + { + "epoch": 2.1315440733389672, + "grad_norm": 3.198025808615057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439510 + }, + { + "epoch": 2.131592571531803, + "grad_norm": 3.4200050436083984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439520 + }, + { + "epoch": 2.1316410697246395, + "grad_norm": 2.805148540119262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439530 + }, + { + "epoch": 2.1316895679174754, + "grad_norm": 2.9470638196471555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439540 + }, + { + "epoch": 2.1317380661103114, + "grad_norm": 3.097757144132629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439550 + }, + { + "epoch": 2.1317865643031477, + "grad_norm": 3.1852073334448505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439560 + }, + { + "epoch": 2.1318350624959836, + "grad_norm": 3.1305765446631995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439570 + }, + { + "epoch": 2.13188356068882, + "grad_norm": 2.779309511424799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439580 + }, + { + "epoch": 2.131932058881656, + "grad_norm": 3.410342515053344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439590 + }, + { + "epoch": 2.131980557074492, + "grad_norm": 2.987444815971685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439600 + }, + { + "epoch": 2.132029055267328, + "grad_norm": 3.084568902522733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439610 + }, + { + "epoch": 2.132077553460164, + "grad_norm": 2.8741800406351103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439620 + }, + { + "epoch": 2.1321260516530005, + "grad_norm": 2.752641705683345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439630 + }, + { + "epoch": 2.1321745498458364, + "grad_norm": 2.8398915219440823e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439640 + }, + { + "epoch": 2.1322230480386724, + "grad_norm": 3.1583681447955314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439650 + }, + { + "epoch": 2.1322715462315087, + "grad_norm": 3.2499590929546684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439660 + }, + { + "epoch": 2.1323200444243446, + "grad_norm": 2.8939822982465557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439670 + }, + { + "epoch": 2.1323685426171806, + "grad_norm": 2.853617786513496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439680 + }, + { + "epoch": 2.132417040810017, + "grad_norm": 2.629939501730405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439690 + }, + { + "epoch": 2.132465539002853, + "grad_norm": 2.663508951172844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439700 + }, + { + "epoch": 2.1325140371956888, + "grad_norm": 6.853117042737722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439710 + }, + { + "epoch": 2.132562535388525, + "grad_norm": 2.7976795990980463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439720 + }, + { + "epoch": 2.132611033581361, + "grad_norm": 2.509681849005574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439730 + }, + { + "epoch": 2.1326595317741974, + "grad_norm": 2.6668377017813327e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439740 + }, + { + "epoch": 2.1327080299670333, + "grad_norm": 2.6821356868822477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439750 + }, + { + "epoch": 2.1327565281598693, + "grad_norm": 2.596024160084198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439760 + }, + { + "epoch": 2.1328050263527056, + "grad_norm": 2.57881225707024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439770 + }, + { + "epoch": 2.1328535245455416, + "grad_norm": 2.745881886312418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439780 + }, + { + "epoch": 2.132902022738378, + "grad_norm": 2.8741240498675324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439790 + }, + { + "epoch": 2.132950520931214, + "grad_norm": 2.908353167185851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439800 + }, + { + "epoch": 2.1329990191240498, + "grad_norm": 2.816286439610849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439810 + }, + { + "epoch": 2.133047517316886, + "grad_norm": 3.131249286525417e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439820 + }, + { + "epoch": 2.133096015509722, + "grad_norm": 2.507256056105689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439830 + }, + { + "epoch": 2.133144513702558, + "grad_norm": 2.721018006468512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439840 + }, + { + "epoch": 2.1331930118953943, + "grad_norm": 2.652424200277892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439850 + }, + { + "epoch": 2.1332415100882303, + "grad_norm": 2.485361960680166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439860 + }, + { + "epoch": 2.1332900082810666, + "grad_norm": 2.6789993512466026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439870 + }, + { + "epoch": 2.1333385064739026, + "grad_norm": 2.3187376996247622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439880 + }, + { + "epoch": 2.1333870046667385, + "grad_norm": 2.741247442372696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439890 + }, + { + "epoch": 2.133435502859575, + "grad_norm": 2.627634216878505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439900 + }, + { + "epoch": 2.1334840010524108, + "grad_norm": 2.5946158643819217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439910 + }, + { + "epoch": 2.1335324992452467, + "grad_norm": 2.6521360041442676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439920 + }, + { + "epoch": 2.133580997438083, + "grad_norm": 2.1929952254140517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439930 + }, + { + "epoch": 2.133629495630919, + "grad_norm": 2.8230391535544186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439940 + }, + { + "epoch": 2.1336779938237553, + "grad_norm": 2.663457507878775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439950 + }, + { + "epoch": 2.1337264920165913, + "grad_norm": 2.6150738108299265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439960 + }, + { + "epoch": 2.133774990209427, + "grad_norm": 2.3785044334090344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439970 + }, + { + "epoch": 2.1338234884022635, + "grad_norm": 2.434051680211269e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439980 + }, + { + "epoch": 2.1338719865950995, + "grad_norm": 2.560099403581262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 439990 + }, + { + "epoch": 2.1339204847879354, + "grad_norm": 2.45418846134271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440000 + }, + { + "epoch": 2.1339689829807718, + "grad_norm": 2.4375287921429845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440010 + }, + { + "epoch": 2.1340174811736077, + "grad_norm": 3.5874643344868673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440020 + }, + { + "epoch": 2.134065979366444, + "grad_norm": 2.1045329390290135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440030 + }, + { + "epoch": 2.13411447755928, + "grad_norm": 2.4221057515205757e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440040 + }, + { + "epoch": 2.134162975752116, + "grad_norm": 2.3770964219238522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440050 + }, + { + "epoch": 2.1342114739449523, + "grad_norm": 2.3620316369488137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440060 + }, + { + "epoch": 2.134259972137788, + "grad_norm": 2.3980865648809413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440070 + }, + { + "epoch": 2.134308470330624, + "grad_norm": 2.101783138641622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440080 + }, + { + "epoch": 2.1343569685234605, + "grad_norm": 2.2112911324256856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440090 + }, + { + "epoch": 2.1344054667162964, + "grad_norm": 2.3867244181019487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440100 + }, + { + "epoch": 2.1344539649091328, + "grad_norm": 2.4726753622417164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440110 + }, + { + "epoch": 2.1345024631019687, + "grad_norm": 2.2429190948969335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440120 + }, + { + "epoch": 2.1345509612948046, + "grad_norm": 2.0165958858342492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440130 + }, + { + "epoch": 2.134599459487641, + "grad_norm": 2.363233164714984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440140 + }, + { + "epoch": 2.134647957680477, + "grad_norm": 2.1741223577009805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440150 + }, + { + "epoch": 2.1346964558733132, + "grad_norm": 2.1787278114970832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440160 + }, + { + "epoch": 2.134744954066149, + "grad_norm": 2.2988523085132329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440170 + }, + { + "epoch": 2.134793452258985, + "grad_norm": 1.9676414808600384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440180 + }, + { + "epoch": 2.1348419504518215, + "grad_norm": 2.5745998755155597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440190 + }, + { + "epoch": 2.1348904486446574, + "grad_norm": 2.2066815574817156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440200 + }, + { + "epoch": 2.1349389468374933, + "grad_norm": 2.2700571378209133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440210 + }, + { + "epoch": 2.1349874450303297, + "grad_norm": 2.049623191169303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440220 + }, + { + "epoch": 2.1350359432231656, + "grad_norm": 1.924205434988835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440230 + }, + { + "epoch": 2.1350844414160015, + "grad_norm": 2.1648946813002112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440240 + }, + { + "epoch": 2.135132939608838, + "grad_norm": 1.9851746912991075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440250 + }, + { + "epoch": 2.135181437801674, + "grad_norm": 2.089642094915689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440260 + }, + { + "epoch": 2.13522993599451, + "grad_norm": 6.331669624159986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440270 + }, + { + "epoch": 2.135278434187346, + "grad_norm": 1.7003438301799179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440280 + }, + { + "epoch": 2.135326932380182, + "grad_norm": 2.290857992193196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440290 + }, + { + "epoch": 2.1353754305730184, + "grad_norm": 2.031555226267301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440300 + }, + { + "epoch": 2.1354239287658543, + "grad_norm": 2.0208844375702029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440310 + }, + { + "epoch": 2.1354724269586907, + "grad_norm": 1.9293929653940722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440320 + }, + { + "epoch": 2.1355209251515266, + "grad_norm": 1.871522954388638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440330 + }, + { + "epoch": 2.1355694233443625, + "grad_norm": 1.9020522756818536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440340 + }, + { + "epoch": 2.135617921537199, + "grad_norm": 2.147204298807992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440350 + }, + { + "epoch": 2.135666419730035, + "grad_norm": 1.8799059375851357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440360 + }, + { + "epoch": 2.1357149179228707, + "grad_norm": 2.3750136790567922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440370 + }, + { + "epoch": 2.135763416115707, + "grad_norm": 1.845633619268483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440380 + }, + { + "epoch": 2.135811914308543, + "grad_norm": 1.9106710169580765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440390 + }, + { + "epoch": 2.1358604125013794, + "grad_norm": 1.9064438561144925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440400 + }, + { + "epoch": 2.1359089106942153, + "grad_norm": 2.2620892536906467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440410 + }, + { + "epoch": 2.135957408887051, + "grad_norm": 1.9619695024175599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440420 + }, + { + "epoch": 2.1360059070798876, + "grad_norm": 1.6801455160475598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440430 + }, + { + "epoch": 2.1360544052727235, + "grad_norm": 1.8180318761551462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440440 + }, + { + "epoch": 2.1361029034655594, + "grad_norm": 1.8016842773249664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440450 + }, + { + "epoch": 2.136151401658396, + "grad_norm": 1.8245691535412334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440460 + }, + { + "epoch": 2.1361998998512317, + "grad_norm": 1.7683512965049886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440470 + }, + { + "epoch": 2.136248398044068, + "grad_norm": 1.6092855048555066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440480 + }, + { + "epoch": 2.136296896236904, + "grad_norm": 1.8408309188089333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440490 + }, + { + "epoch": 2.13634539442974, + "grad_norm": 1.7715274225338362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440500 + }, + { + "epoch": 2.1363938926225763, + "grad_norm": 1.8860202999348985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440510 + }, + { + "epoch": 2.136442390815412, + "grad_norm": 1.7547489505886915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440520 + }, + { + "epoch": 2.136490889008248, + "grad_norm": 1.3432017453851586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440530 + }, + { + "epoch": 2.1365393872010845, + "grad_norm": 1.7468946111875994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440540 + }, + { + "epoch": 2.1365878853939204, + "grad_norm": 1.5729298752376053e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440550 + }, + { + "epoch": 2.136636383586757, + "grad_norm": 1.723887947946423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440560 + }, + { + "epoch": 2.1366848817795927, + "grad_norm": 1.7926809903201502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440570 + }, + { + "epoch": 2.1367333799724286, + "grad_norm": 1.7395946372289472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440580 + }, + { + "epoch": 2.136781878165265, + "grad_norm": 2.0307631132254755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440590 + }, + { + "epoch": 2.136830376358101, + "grad_norm": 1.641552529463297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440600 + }, + { + "epoch": 2.136878874550937, + "grad_norm": 1.6655069146054302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440610 + }, + { + "epoch": 2.136927372743773, + "grad_norm": 1.746584814554808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440620 + }, + { + "epoch": 2.136975870936609, + "grad_norm": 1.3410927124368754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440630 + }, + { + "epoch": 2.1370243691294455, + "grad_norm": 1.6163852478712215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440640 + }, + { + "epoch": 2.1370728673222814, + "grad_norm": 1.7475100833053148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440650 + }, + { + "epoch": 2.1371213655151173, + "grad_norm": 1.6253180490366503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440660 + }, + { + "epoch": 2.1371698637079537, + "grad_norm": 1.622443335236312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440670 + }, + { + "epoch": 2.1372183619007896, + "grad_norm": 1.270193195068714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440680 + }, + { + "epoch": 2.137266860093626, + "grad_norm": 1.6359582843961107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440690 + }, + { + "epoch": 2.137315358286462, + "grad_norm": 1.6436638361483347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440700 + }, + { + "epoch": 2.137363856479298, + "grad_norm": 1.689211330813123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440710 + }, + { + "epoch": 2.137412354672134, + "grad_norm": 1.616329683429285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440720 + }, + { + "epoch": 2.13746085286497, + "grad_norm": 1.3084250838346634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440730 + }, + { + "epoch": 2.137509351057806, + "grad_norm": 1.6300325000884186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440740 + }, + { + "epoch": 2.1375578492506424, + "grad_norm": 1.5364980754384305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440750 + }, + { + "epoch": 2.1376063474434783, + "grad_norm": 1.7400400054157217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440760 + }, + { + "epoch": 2.1376548456363142, + "grad_norm": 1.3485356475939625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440770 + }, + { + "epoch": 2.1377033438291506, + "grad_norm": 1.3307631263614894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440780 + }, + { + "epoch": 2.1377518420219865, + "grad_norm": 1.5760623739424773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440790 + }, + { + "epoch": 2.137800340214823, + "grad_norm": 1.4956266625176795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440800 + }, + { + "epoch": 2.137848838407659, + "grad_norm": 1.498568025226632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440810 + }, + { + "epoch": 2.1378973366004947, + "grad_norm": 1.4841315021385526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440820 + }, + { + "epoch": 2.137945834793331, + "grad_norm": 1.0948387796361203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440830 + }, + { + "epoch": 2.137994332986167, + "grad_norm": 1.4383398649897572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440840 + }, + { + "epoch": 2.1380428311790034, + "grad_norm": 1.6872012054136576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440850 + }, + { + "epoch": 2.1380913293718393, + "grad_norm": 1.5293539945560042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440860 + }, + { + "epoch": 2.1381398275646752, + "grad_norm": 1.3830755563049024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440870 + }, + { + "epoch": 2.1381883257575116, + "grad_norm": 1.0734245847743296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440880 + }, + { + "epoch": 2.1382368239503475, + "grad_norm": 1.4453495111865777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440890 + }, + { + "epoch": 2.1382853221431835, + "grad_norm": 1.463941146084835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440900 + }, + { + "epoch": 2.13833382033602, + "grad_norm": 1.3262575748740346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440910 + }, + { + "epoch": 2.1383823185288557, + "grad_norm": 1.3733163939377846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440920 + }, + { + "epoch": 2.138430816721692, + "grad_norm": 1.03655217742471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440930 + }, + { + "epoch": 2.138479314914528, + "grad_norm": 1.323806912978398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440940 + }, + { + "epoch": 2.138527813107364, + "grad_norm": 1.4338168341510027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440950 + }, + { + "epoch": 2.1385763113002003, + "grad_norm": 1.2915447200612107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440960 + }, + { + "epoch": 2.1386248094930362, + "grad_norm": 1.4673125292574696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440970 + }, + { + "epoch": 2.138673307685872, + "grad_norm": 1.0388296800556418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440980 + }, + { + "epoch": 2.1387218058787085, + "grad_norm": 1.4102225520673528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 440990 + }, + { + "epoch": 2.1387703040715444, + "grad_norm": 1.6728853324821102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441000 + }, + { + "epoch": 2.138818802264381, + "grad_norm": 1.261106916672361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441010 + }, + { + "epoch": 2.1388673004572167, + "grad_norm": 1.4199319764429674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441020 + }, + { + "epoch": 2.1389157986500527, + "grad_norm": 1.0553053897410791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441030 + }, + { + "epoch": 2.138964296842889, + "grad_norm": 1.3918007368829421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441040 + }, + { + "epoch": 2.139012795035725, + "grad_norm": 1.1768705121539824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441050 + }, + { + "epoch": 2.139061293228561, + "grad_norm": 1.3612108773486398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441060 + }, + { + "epoch": 2.1391097914213972, + "grad_norm": 1.2739899091229745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441070 + }, + { + "epoch": 2.139158289614233, + "grad_norm": 1.0828834007270416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441080 + }, + { + "epoch": 2.1392067878070695, + "grad_norm": 1.2277918415293243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441090 + }, + { + "epoch": 2.1392552859999054, + "grad_norm": 1.2710290775430622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441100 + }, + { + "epoch": 2.1393037841927414, + "grad_norm": 1.1482966044695786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441110 + }, + { + "epoch": 2.1393522823855777, + "grad_norm": 1.254242789627824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441120 + }, + { + "epoch": 2.1394007805784137, + "grad_norm": 1.0802978778201577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441130 + }, + { + "epoch": 2.1394492787712496, + "grad_norm": 1.263844069399056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441140 + }, + { + "epoch": 2.139497776964086, + "grad_norm": 1.163388390068576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441150 + }, + { + "epoch": 2.139546275156922, + "grad_norm": 1.1666080723671257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441160 + }, + { + "epoch": 2.1395947733497582, + "grad_norm": 1.2937125859480147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441170 + }, + { + "epoch": 2.139643271542594, + "grad_norm": 1.050847160399826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441180 + }, + { + "epoch": 2.13969176973543, + "grad_norm": 1.2176154484677681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441190 + }, + { + "epoch": 2.1397402679282664, + "grad_norm": 1.156001090407699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441200 + }, + { + "epoch": 2.1397887661211024, + "grad_norm": 1.2012051797682943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441210 + }, + { + "epoch": 2.1398372643139387, + "grad_norm": 1.2780479607954476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441220 + }, + { + "epoch": 2.1398857625067746, + "grad_norm": 1.0262498761903771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441230 + }, + { + "epoch": 2.1399342606996106, + "grad_norm": 1.0921656468099172e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441240 + }, + { + "epoch": 2.139982758892447, + "grad_norm": 1.207941977554583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441250 + }, + { + "epoch": 2.140031257085283, + "grad_norm": 1.1434185864800384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441260 + }, + { + "epoch": 2.140079755278119, + "grad_norm": 1.1115446341136703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441270 + }, + { + "epoch": 2.140128253470955, + "grad_norm": 9.355289165569047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441280 + }, + { + "epoch": 2.140176751663791, + "grad_norm": 1.1529185428571509e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441290 + }, + { + "epoch": 2.140225249856627, + "grad_norm": 1.1162445190393555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441300 + }, + { + "epoch": 2.1402737480494634, + "grad_norm": 1.1118028453438455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441310 + }, + { + "epoch": 2.1403222462422993, + "grad_norm": 1.1444686265349446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441320 + }, + { + "epoch": 2.1403707444351356, + "grad_norm": 9.882092655288943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441330 + }, + { + "epoch": 2.1404192426279716, + "grad_norm": 1.0888430779232294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441340 + }, + { + "epoch": 2.1404677408208075, + "grad_norm": 1.1934409371860966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441350 + }, + { + "epoch": 2.140516239013644, + "grad_norm": 1.1644522857068296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441360 + }, + { + "epoch": 2.1405647372064798, + "grad_norm": 1.1613012418365543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441370 + }, + { + "epoch": 2.140613235399316, + "grad_norm": 9.258854305471687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441380 + }, + { + "epoch": 2.140661733592152, + "grad_norm": 1.0281715390192403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441390 + }, + { + "epoch": 2.140710231784988, + "grad_norm": 1.1346538997258904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441400 + }, + { + "epoch": 2.1407587299778243, + "grad_norm": 1.259868440683931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441410 + }, + { + "epoch": 2.1408072281706603, + "grad_norm": 1.0807691097625138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441420 + }, + { + "epoch": 2.140855726363496, + "grad_norm": 9.198904393770135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441430 + }, + { + "epoch": 2.1409042245563326, + "grad_norm": 1.0293366159430661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441440 + }, + { + "epoch": 2.1409527227491685, + "grad_norm": 1.0351566004374035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441450 + }, + { + "epoch": 2.141001220942005, + "grad_norm": 1.0311947562513524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441460 + }, + { + "epoch": 2.1410497191348408, + "grad_norm": 1.1077637651624173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441470 + }, + { + "epoch": 2.1410982173276767, + "grad_norm": 8.645203308788041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441480 + }, + { + "epoch": 2.141146715520513, + "grad_norm": 9.834197101099562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441490 + }, + { + "epoch": 2.141195213713349, + "grad_norm": 1.0625765156646594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441500 + }, + { + "epoch": 2.141243711906185, + "grad_norm": 1.078157936262869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441510 + }, + { + "epoch": 2.1412922100990213, + "grad_norm": 1.0209679857098308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441520 + }, + { + "epoch": 2.141340708291857, + "grad_norm": 9.133322009802214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441530 + }, + { + "epoch": 2.1413892064846936, + "grad_norm": 9.887639862427022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441540 + }, + { + "epoch": 2.1414377046775295, + "grad_norm": 1.0020863783211098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441550 + }, + { + "epoch": 2.1414862028703654, + "grad_norm": 1.0454429144601818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441560 + }, + { + "epoch": 2.1415347010632018, + "grad_norm": 1.0736432898283965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441570 + }, + { + "epoch": 2.1415831992560377, + "grad_norm": 8.79307862078349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441580 + }, + { + "epoch": 2.1416316974488736, + "grad_norm": 9.888811547398291e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441590 + }, + { + "epoch": 2.14168019564171, + "grad_norm": 1.012183332704808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441600 + }, + { + "epoch": 2.141728693834546, + "grad_norm": 1.0103486403068018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441610 + }, + { + "epoch": 2.1417771920273823, + "grad_norm": 9.928412225690408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441620 + }, + { + "epoch": 2.141825690220218, + "grad_norm": 8.640991211450455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441630 + }, + { + "epoch": 2.141874188413054, + "grad_norm": 1.0495475066818472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441640 + }, + { + "epoch": 2.1419226866058905, + "grad_norm": 1.0743747935748615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441650 + }, + { + "epoch": 2.1419711847987264, + "grad_norm": 1.0712363263110092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441660 + }, + { + "epoch": 2.1420196829915623, + "grad_norm": 9.550315382966801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441670 + }, + { + "epoch": 2.1420681811843987, + "grad_norm": 8.782734539636294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441680 + }, + { + "epoch": 2.1421166793772346, + "grad_norm": 9.714742077449046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441690 + }, + { + "epoch": 2.142165177570071, + "grad_norm": 1.0636155423071614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441700 + }, + { + "epoch": 2.142213675762907, + "grad_norm": 1.0268269079460879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441710 + }, + { + "epoch": 2.142262173955743, + "grad_norm": 1.496485282359572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441720 + }, + { + "epoch": 2.142310672148579, + "grad_norm": 8.370367510224241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441730 + }, + { + "epoch": 2.142359170341415, + "grad_norm": 9.201522033208676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441740 + }, + { + "epoch": 2.1424076685342515, + "grad_norm": 1.0871600153450345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441750 + }, + { + "epoch": 2.1424561667270874, + "grad_norm": 9.895806130089113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441760 + }, + { + "epoch": 2.1425046649199233, + "grad_norm": 1.1298135405013454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441770 + }, + { + "epoch": 2.1425531631127597, + "grad_norm": 8.395628725565984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441780 + }, + { + "epoch": 2.1426016613055956, + "grad_norm": 1.0142341011487588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441790 + }, + { + "epoch": 2.1426501594984315, + "grad_norm": 9.664307754064794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441800 + }, + { + "epoch": 2.142698657691268, + "grad_norm": 9.547759560746272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441810 + }, + { + "epoch": 2.142747155884104, + "grad_norm": 1.0313068088407817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441820 + }, + { + "epoch": 2.1427956540769397, + "grad_norm": 8.377649862723047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441830 + }, + { + "epoch": 2.142844152269776, + "grad_norm": 9.176120840947988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441840 + }, + { + "epoch": 2.142892650462612, + "grad_norm": 8.701563558588532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441850 + }, + { + "epoch": 2.1429411486554484, + "grad_norm": 1.1329611737664891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441860 + }, + { + "epoch": 2.1429896468482843, + "grad_norm": 1.0113015491697297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441870 + }, + { + "epoch": 2.1430381450411202, + "grad_norm": 8.274859908397048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441880 + }, + { + "epoch": 2.1430866432339566, + "grad_norm": 1.2088106871033233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441890 + }, + { + "epoch": 2.1431351414267925, + "grad_norm": 9.102697617890954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441900 + }, + { + "epoch": 2.143183639619629, + "grad_norm": 9.31602528453368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441910 + }, + { + "epoch": 2.143232137812465, + "grad_norm": 9.905430431444984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441920 + }, + { + "epoch": 2.1432806360053007, + "grad_norm": 8.049515543007146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441930 + }, + { + "epoch": 2.143329134198137, + "grad_norm": 8.972844511845324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441940 + }, + { + "epoch": 2.143377632390973, + "grad_norm": 9.842641901514071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441950 + }, + { + "epoch": 2.143426130583809, + "grad_norm": 9.304604731141808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441960 + }, + { + "epoch": 2.1434746287766453, + "grad_norm": 9.011867518893268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441970 + }, + { + "epoch": 2.143523126969481, + "grad_norm": 8.346665936187492e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441980 + }, + { + "epoch": 2.1435716251623176, + "grad_norm": 1.1063941229849661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 441990 + }, + { + "epoch": 2.1436201233551535, + "grad_norm": 1.0267317662737696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442000 + }, + { + "epoch": 2.1436686215479894, + "grad_norm": 1.0318819931853795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442010 + }, + { + "epoch": 2.143717119740826, + "grad_norm": 8.7674230542234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442020 + }, + { + "epoch": 2.1437656179336617, + "grad_norm": 8.200461110163815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442030 + }, + { + "epoch": 2.1438141161264976, + "grad_norm": 9.184473270806848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442040 + }, + { + "epoch": 2.143862614319334, + "grad_norm": 9.522216259938432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442050 + }, + { + "epoch": 2.14391111251217, + "grad_norm": 8.75642101050289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442060 + }, + { + "epoch": 2.1439596107050063, + "grad_norm": 8.859991140752754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442070 + }, + { + "epoch": 2.144008108897842, + "grad_norm": 8.40404723589927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442080 + }, + { + "epoch": 2.144056607090678, + "grad_norm": 9.537707512663474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442090 + }, + { + "epoch": 2.1441051052835145, + "grad_norm": 1.0157428675938718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442100 + }, + { + "epoch": 2.1441536034763504, + "grad_norm": 8.710622978469473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442110 + }, + { + "epoch": 2.1442021016691863, + "grad_norm": 8.99292516010064e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442120 + }, + { + "epoch": 2.1442505998620227, + "grad_norm": 8.3456825450412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442130 + }, + { + "epoch": 2.1442990980548586, + "grad_norm": 1.0267586247891813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442140 + }, + { + "epoch": 2.144347596247695, + "grad_norm": 8.543698726271032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442150 + }, + { + "epoch": 2.144396094440531, + "grad_norm": 8.83124613437758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442160 + }, + { + "epoch": 2.144444592633367, + "grad_norm": 9.627234476283775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442170 + }, + { + "epoch": 2.144493090826203, + "grad_norm": 8.394094663799478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442180 + }, + { + "epoch": 2.144541589019039, + "grad_norm": 8.850882693423046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442190 + }, + { + "epoch": 2.144590087211875, + "grad_norm": 8.563277020812166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442200 + }, + { + "epoch": 2.1446385854047114, + "grad_norm": 8.913686855294145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442210 + }, + { + "epoch": 2.1446870835975473, + "grad_norm": 8.438476584160526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442220 + }, + { + "epoch": 2.1447355817903837, + "grad_norm": 1.6819711845528218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442230 + }, + { + "epoch": 2.1447840799832196, + "grad_norm": 8.766153314354597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442240 + }, + { + "epoch": 2.1448325781760555, + "grad_norm": 9.198649308927997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442250 + }, + { + "epoch": 2.144881076368892, + "grad_norm": 9.000356726573955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442260 + }, + { + "epoch": 2.144929574561728, + "grad_norm": 8.288157005154062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442270 + }, + { + "epoch": 2.144978072754564, + "grad_norm": 7.87414577985146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442280 + }, + { + "epoch": 2.1450265709474, + "grad_norm": 2.450262286401994e-07, + "learning_rate": 0.0002, + "loss": 0.0013, + "step": 442290 + }, + { + "epoch": 2.145075069140236, + "grad_norm": 0.0003141309425700456, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442300 + }, + { + "epoch": 2.1451235673330724, + "grad_norm": 1.7094020222430117e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442310 + }, + { + "epoch": 2.1451720655259083, + "grad_norm": 7.580298188258894e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442320 + }, + { + "epoch": 2.1452205637187443, + "grad_norm": 5.23691096532275e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442330 + }, + { + "epoch": 2.1452690619115806, + "grad_norm": 4.604913556249812e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442340 + }, + { + "epoch": 2.1453175601044165, + "grad_norm": 1.889443228719756e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 442350 + }, + { + "epoch": 2.1453660582972525, + "grad_norm": 0.00010729242785600945, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442360 + }, + { + "epoch": 2.145414556490089, + "grad_norm": 2.8222611945238896e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442370 + }, + { + "epoch": 2.1454630546829248, + "grad_norm": 1.9589489966165274e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442380 + }, + { + "epoch": 2.145511552875761, + "grad_norm": 1.592396802152507e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442390 + }, + { + "epoch": 2.145560051068597, + "grad_norm": 0.2432490885257721, + "learning_rate": 0.0002, + "loss": 0.0138, + "step": 442400 + }, + { + "epoch": 2.145608549261433, + "grad_norm": 0.362238347530365, + "learning_rate": 0.0002, + "loss": 0.4333, + "step": 442410 + }, + { + "epoch": 2.1456570474542693, + "grad_norm": 0.11471877992153168, + "learning_rate": 0.0002, + "loss": 0.1783, + "step": 442420 + }, + { + "epoch": 2.1457055456471053, + "grad_norm": 0.001868835068307817, + "learning_rate": 0.0002, + "loss": 0.0605, + "step": 442430 + }, + { + "epoch": 2.1457540438399416, + "grad_norm": 0.5038465261459351, + "learning_rate": 0.0002, + "loss": 0.0652, + "step": 442440 + }, + { + "epoch": 2.1458025420327775, + "grad_norm": 0.32083019614219666, + "learning_rate": 0.0002, + "loss": 0.144, + "step": 442450 + }, + { + "epoch": 2.1458510402256135, + "grad_norm": 0.002085835672914982, + "learning_rate": 0.0002, + "loss": 0.003, + "step": 442460 + }, + { + "epoch": 2.14589953841845, + "grad_norm": 0.00103688333183527, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 442470 + }, + { + "epoch": 2.1459480366112857, + "grad_norm": 0.0012184374500066042, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 442480 + }, + { + "epoch": 2.1459965348041217, + "grad_norm": 3.9081602153601125e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 442490 + }, + { + "epoch": 2.146045032996958, + "grad_norm": 1.633764804864768e-05, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 442500 + }, + { + "epoch": 2.146093531189794, + "grad_norm": 0.00026253212126903236, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442510 + }, + { + "epoch": 2.1461420293826303, + "grad_norm": 0.00035089609446004033, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442520 + }, + { + "epoch": 2.1461905275754662, + "grad_norm": 3.534814095473848e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442530 + }, + { + "epoch": 2.146239025768302, + "grad_norm": 0.0002264279464725405, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 442540 + }, + { + "epoch": 2.1462875239611385, + "grad_norm": 0.00025535980239510536, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442550 + }, + { + "epoch": 2.1463360221539745, + "grad_norm": 7.822228508302942e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 442560 + }, + { + "epoch": 2.1463845203468104, + "grad_norm": 0.0004999894881621003, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 442570 + }, + { + "epoch": 2.1464330185396467, + "grad_norm": 0.0004296363622415811, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 442580 + }, + { + "epoch": 2.1464815167324827, + "grad_norm": 8.15341918496415e-05, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 442590 + }, + { + "epoch": 2.146530014925319, + "grad_norm": 0.03309599682688713, + "learning_rate": 0.0002, + "loss": 0.0017, + "step": 442600 + }, + { + "epoch": 2.146578513118155, + "grad_norm": 0.008063745684921741, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 442610 + }, + { + "epoch": 2.146627011310991, + "grad_norm": 0.0030403730925172567, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 442620 + }, + { + "epoch": 2.1466755095038272, + "grad_norm": 0.0005306792445480824, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 442630 + }, + { + "epoch": 2.146724007696663, + "grad_norm": 0.09662589430809021, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 442640 + }, + { + "epoch": 2.1467725058894995, + "grad_norm": 0.00014084346184972674, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442650 + }, + { + "epoch": 2.1468210040823354, + "grad_norm": 0.00012510974192991853, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442660 + }, + { + "epoch": 2.1468695022751714, + "grad_norm": 0.00011264477507211268, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442670 + }, + { + "epoch": 2.1469180004680077, + "grad_norm": 0.00012838112888857722, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442680 + }, + { + "epoch": 2.1469664986608437, + "grad_norm": 6.714645132888108e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442690 + }, + { + "epoch": 2.1470149968536796, + "grad_norm": 5.492793570738286e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442700 + }, + { + "epoch": 2.147063495046516, + "grad_norm": 5.208575021242723e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442710 + }, + { + "epoch": 2.147111993239352, + "grad_norm": 4.496691326494329e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442720 + }, + { + "epoch": 2.147160491432188, + "grad_norm": 6.190739804878831e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442730 + }, + { + "epoch": 2.147208989625024, + "grad_norm": 3.818530240096152e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442740 + }, + { + "epoch": 2.14725748781786, + "grad_norm": 4.441651617526077e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442750 + }, + { + "epoch": 2.1473059860106964, + "grad_norm": 3.2324274798156694e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442760 + }, + { + "epoch": 2.1473544842035324, + "grad_norm": 3.131205085082911e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442770 + }, + { + "epoch": 2.1474029823963683, + "grad_norm": 3.4698045055847615e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442780 + }, + { + "epoch": 2.1474514805892047, + "grad_norm": 2.4242004656116478e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442790 + }, + { + "epoch": 2.1474999787820406, + "grad_norm": 2.66855931840837e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442800 + }, + { + "epoch": 2.147548476974877, + "grad_norm": 2.1790852770209312e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442810 + }, + { + "epoch": 2.147596975167713, + "grad_norm": 2.399400545982644e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442820 + }, + { + "epoch": 2.147645473360549, + "grad_norm": 3.132519486825913e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442830 + }, + { + "epoch": 2.147693971553385, + "grad_norm": 2.0620194845832884e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442840 + }, + { + "epoch": 2.147742469746221, + "grad_norm": 1.9754543245653622e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442850 + }, + { + "epoch": 2.147790967939057, + "grad_norm": 2.2652468032902107e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442860 + }, + { + "epoch": 2.1478394661318934, + "grad_norm": 1.9853370758937672e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442870 + }, + { + "epoch": 2.1478879643247293, + "grad_norm": 2.3962078557815403e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442880 + }, + { + "epoch": 2.147936462517565, + "grad_norm": 2.4836979719111696e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442890 + }, + { + "epoch": 2.1479849607104016, + "grad_norm": 1.5899166101007722e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442900 + }, + { + "epoch": 2.1480334589032375, + "grad_norm": 1.694188358669635e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442910 + }, + { + "epoch": 2.148081957096074, + "grad_norm": 1.8296097550773993e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442920 + }, + { + "epoch": 2.14813045528891, + "grad_norm": 1.8692613593884744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442930 + }, + { + "epoch": 2.1481789534817457, + "grad_norm": 1.502314717072295e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442940 + }, + { + "epoch": 2.148227451674582, + "grad_norm": 1.4519690012093633e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442950 + }, + { + "epoch": 2.148275949867418, + "grad_norm": 1.4297394955065101e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442960 + }, + { + "epoch": 2.1483244480602544, + "grad_norm": 1.3599718840850983e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442970 + }, + { + "epoch": 2.1483729462530903, + "grad_norm": 1.7588476111995988e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442980 + }, + { + "epoch": 2.148421444445926, + "grad_norm": 1.3696920177608263e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 442990 + }, + { + "epoch": 2.1484699426387626, + "grad_norm": 1.3766176380158868e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443000 + }, + { + "epoch": 2.1485184408315985, + "grad_norm": 1.2097731087123975e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443010 + }, + { + "epoch": 2.1485669390244344, + "grad_norm": 1.6905270967981778e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443020 + }, + { + "epoch": 2.1486154372172708, + "grad_norm": 1.54663957800949e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443030 + }, + { + "epoch": 2.1486639354101067, + "grad_norm": 1.1614398317760788e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443040 + }, + { + "epoch": 2.148712433602943, + "grad_norm": 1.2174057701486163e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443050 + }, + { + "epoch": 2.148760931795779, + "grad_norm": 2.3472963221138343e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443060 + }, + { + "epoch": 2.148809429988615, + "grad_norm": 1.2774065908160992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443070 + }, + { + "epoch": 2.1488579281814513, + "grad_norm": 1.4697689039167017e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443080 + }, + { + "epoch": 2.148906426374287, + "grad_norm": 1.1707514204317704e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443090 + }, + { + "epoch": 2.148954924567123, + "grad_norm": 1.3594362826552242e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443100 + }, + { + "epoch": 2.1490034227599595, + "grad_norm": 1.0584231858956628e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443110 + }, + { + "epoch": 2.1490519209527954, + "grad_norm": 1.029167469823733e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443120 + }, + { + "epoch": 2.1491004191456318, + "grad_norm": 1.2721123312076088e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443130 + }, + { + "epoch": 2.1491489173384677, + "grad_norm": 1.1408085811126512e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443140 + }, + { + "epoch": 2.1491974155313036, + "grad_norm": 1.0051552635559347e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443150 + }, + { + "epoch": 2.14924591372414, + "grad_norm": 1.0173637747357134e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443160 + }, + { + "epoch": 2.149294411916976, + "grad_norm": 1.0409425158286467e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443170 + }, + { + "epoch": 2.1493429101098123, + "grad_norm": 1.1873024050146341e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443180 + }, + { + "epoch": 2.149391408302648, + "grad_norm": 9.897512427414767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443190 + }, + { + "epoch": 2.149439906495484, + "grad_norm": 1.0413341442472301e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443200 + }, + { + "epoch": 2.1494884046883205, + "grad_norm": 9.29941325011896e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443210 + }, + { + "epoch": 2.1495369028811564, + "grad_norm": 9.016988769872114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443220 + }, + { + "epoch": 2.1495854010739923, + "grad_norm": 1.0075204045278952e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443230 + }, + { + "epoch": 2.1496338992668287, + "grad_norm": 1.0406607543700375e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443240 + }, + { + "epoch": 2.1496823974596646, + "grad_norm": 8.717354830878321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443250 + }, + { + "epoch": 2.1497308956525005, + "grad_norm": 9.626081009628251e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443260 + }, + { + "epoch": 2.149779393845337, + "grad_norm": 9.694773325463757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443270 + }, + { + "epoch": 2.149827892038173, + "grad_norm": 0.22762750089168549, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 443280 + }, + { + "epoch": 2.149876390231009, + "grad_norm": 0.000180402013938874, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443290 + }, + { + "epoch": 2.149924888423845, + "grad_norm": 1.2798474017472472e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 443300 + }, + { + "epoch": 2.149973386616681, + "grad_norm": 1.5537216313532554e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443310 + }, + { + "epoch": 2.1500218848095174, + "grad_norm": 1.8500772057450376e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443320 + }, + { + "epoch": 2.1500703830023533, + "grad_norm": 3.269612352596596e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443330 + }, + { + "epoch": 2.1501188811951897, + "grad_norm": 1.5074367183842696e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443340 + }, + { + "epoch": 2.1501673793880256, + "grad_norm": 1.4582926269213203e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443350 + }, + { + "epoch": 2.1502158775808615, + "grad_norm": 1.3468937140714843e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443360 + }, + { + "epoch": 2.150264375773698, + "grad_norm": 1.7856544218375348e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443370 + }, + { + "epoch": 2.150312873966534, + "grad_norm": 2.508751640561968e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443380 + }, + { + "epoch": 2.1503613721593697, + "grad_norm": 2.007382681767922e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443390 + }, + { + "epoch": 2.150409870352206, + "grad_norm": 1.3976803529658355e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443400 + }, + { + "epoch": 2.150458368545042, + "grad_norm": 1.4738520803803112e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443410 + }, + { + "epoch": 2.1505068667378784, + "grad_norm": 1.4223176549421623e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443420 + }, + { + "epoch": 2.1505553649307143, + "grad_norm": 2.0279585442040116e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443430 + }, + { + "epoch": 2.1506038631235502, + "grad_norm": 1.19869891932467e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443440 + }, + { + "epoch": 2.1506523613163866, + "grad_norm": 9.981630682887044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443450 + }, + { + "epoch": 2.1507008595092225, + "grad_norm": 1.2546526704682037e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443460 + }, + { + "epoch": 2.1507493577020584, + "grad_norm": 9.678404239821248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443470 + }, + { + "epoch": 2.150797855894895, + "grad_norm": 1.7943844795809127e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443480 + }, + { + "epoch": 2.1508463540877307, + "grad_norm": 1.0603786904539447e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443490 + }, + { + "epoch": 2.150894852280567, + "grad_norm": 1.038519803842064e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443500 + }, + { + "epoch": 2.150943350473403, + "grad_norm": 1.0171900612476747e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443510 + }, + { + "epoch": 2.150991848666239, + "grad_norm": 9.228966519003734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443520 + }, + { + "epoch": 2.1510403468590753, + "grad_norm": 1.5136041838559322e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443530 + }, + { + "epoch": 2.1510888450519112, + "grad_norm": 9.036106348503381e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443540 + }, + { + "epoch": 2.151137343244747, + "grad_norm": 1.1678327609843109e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443550 + }, + { + "epoch": 2.1511858414375835, + "grad_norm": 9.264082109439187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443560 + }, + { + "epoch": 2.1512343396304194, + "grad_norm": 1.1364939382474404e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443570 + }, + { + "epoch": 2.151282837823256, + "grad_norm": 1.1735653970390558e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443580 + }, + { + "epoch": 2.1513313360160917, + "grad_norm": 8.265895303338766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443590 + }, + { + "epoch": 2.1513798342089276, + "grad_norm": 2.2292737412499264e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443600 + }, + { + "epoch": 2.151428332401764, + "grad_norm": 1.1202993846382014e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443610 + }, + { + "epoch": 2.1514768305946, + "grad_norm": 8.395236363867298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443620 + }, + { + "epoch": 2.151525328787436, + "grad_norm": 1.158467148343334e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443630 + }, + { + "epoch": 2.151573826980272, + "grad_norm": 8.773464287514798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443640 + }, + { + "epoch": 2.151622325173108, + "grad_norm": 7.808344889781438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443650 + }, + { + "epoch": 2.1516708233659445, + "grad_norm": 7.623425517522264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443660 + }, + { + "epoch": 2.1517193215587804, + "grad_norm": 8.155001523846295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443670 + }, + { + "epoch": 2.1517678197516164, + "grad_norm": 1.1043534868804272e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443680 + }, + { + "epoch": 2.1518163179444527, + "grad_norm": 7.65567983762594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443690 + }, + { + "epoch": 2.1518648161372886, + "grad_norm": 7.709221790719312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443700 + }, + { + "epoch": 2.151913314330125, + "grad_norm": 7.168338015617337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443710 + }, + { + "epoch": 2.151961812522961, + "grad_norm": 0.0012322543188929558, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443720 + }, + { + "epoch": 2.152010310715797, + "grad_norm": 1.0638824278430548e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443730 + }, + { + "epoch": 2.152058808908633, + "grad_norm": 1.3965287507744506e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443740 + }, + { + "epoch": 2.152107307101469, + "grad_norm": 6.647369900747435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443750 + }, + { + "epoch": 2.152155805294305, + "grad_norm": 7.394391559500946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443760 + }, + { + "epoch": 2.1522043034871414, + "grad_norm": 6.727914751536446e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443770 + }, + { + "epoch": 2.1522528016799773, + "grad_norm": 8.366297151951585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443780 + }, + { + "epoch": 2.1523012998728133, + "grad_norm": 6.905046120664338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443790 + }, + { + "epoch": 2.1523497980656496, + "grad_norm": 6.52901007924811e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443800 + }, + { + "epoch": 2.1523982962584856, + "grad_norm": 6.498679795186035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443810 + }, + { + "epoch": 2.152446794451322, + "grad_norm": 7.412307695631171e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443820 + }, + { + "epoch": 2.152495292644158, + "grad_norm": 8.311452802445274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443830 + }, + { + "epoch": 2.1525437908369938, + "grad_norm": 6.6135758061136585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443840 + }, + { + "epoch": 2.15259228902983, + "grad_norm": 6.238271907932358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443850 + }, + { + "epoch": 2.152640787222666, + "grad_norm": 6.792402928113006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443860 + }, + { + "epoch": 2.1526892854155024, + "grad_norm": 6.7022451730736066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443870 + }, + { + "epoch": 2.1527377836083383, + "grad_norm": 8.69435734784929e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443880 + }, + { + "epoch": 2.1527862818011743, + "grad_norm": 6.6693178268906195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443890 + }, + { + "epoch": 2.1528347799940106, + "grad_norm": 6.318658961390611e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443900 + }, + { + "epoch": 2.1528832781868465, + "grad_norm": 7.069647381285904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443910 + }, + { + "epoch": 2.1529317763796825, + "grad_norm": 5.937376499787206e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443920 + }, + { + "epoch": 2.152980274572519, + "grad_norm": 8.053777492023073e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443930 + }, + { + "epoch": 2.1530287727653548, + "grad_norm": 5.659801900037564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443940 + }, + { + "epoch": 2.153077270958191, + "grad_norm": 5.938701178820338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443950 + }, + { + "epoch": 2.153125769151027, + "grad_norm": 6.141689482319634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443960 + }, + { + "epoch": 2.153174267343863, + "grad_norm": 6.450441560446052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443970 + }, + { + "epoch": 2.1532227655366993, + "grad_norm": 6.36337608739268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443980 + }, + { + "epoch": 2.1532712637295353, + "grad_norm": 5.712804522772785e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 443990 + }, + { + "epoch": 2.153319761922371, + "grad_norm": 6.492722604889423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444000 + }, + { + "epoch": 2.1533682601152075, + "grad_norm": 5.482847882376518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444010 + }, + { + "epoch": 2.1534167583080435, + "grad_norm": 5.862609214091208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444020 + }, + { + "epoch": 2.15346525650088, + "grad_norm": 8.21283902041614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444030 + }, + { + "epoch": 2.1535137546937158, + "grad_norm": 5.487938778969692e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444040 + }, + { + "epoch": 2.1535622528865517, + "grad_norm": 5.7948964240495116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444050 + }, + { + "epoch": 2.153610751079388, + "grad_norm": 6.5091971919173375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444060 + }, + { + "epoch": 2.153659249272224, + "grad_norm": 5.5742716540407855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444070 + }, + { + "epoch": 2.15370774746506, + "grad_norm": 5.9029762269346975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444080 + }, + { + "epoch": 2.1537562456578963, + "grad_norm": 5.516586043086136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444090 + }, + { + "epoch": 2.153804743850732, + "grad_norm": 6.968820798647357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444100 + }, + { + "epoch": 2.1538532420435685, + "grad_norm": 5.43208352610236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444110 + }, + { + "epoch": 2.1539017402364045, + "grad_norm": 5.196018264541635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444120 + }, + { + "epoch": 2.1539502384292404, + "grad_norm": 6.578680313396035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444130 + }, + { + "epoch": 2.1539987366220767, + "grad_norm": 5.315995622368064e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444140 + }, + { + "epoch": 2.1540472348149127, + "grad_norm": 5.079030870547285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444150 + }, + { + "epoch": 2.1540957330077486, + "grad_norm": 6.615814072574722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444160 + }, + { + "epoch": 2.154144231200585, + "grad_norm": 5.9284698181727435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444170 + }, + { + "epoch": 2.154192729393421, + "grad_norm": 5.38746326128603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444180 + }, + { + "epoch": 2.1542412275862572, + "grad_norm": 5.346844318410149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444190 + }, + { + "epoch": 2.154289725779093, + "grad_norm": 4.8884407988225576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444200 + }, + { + "epoch": 2.154338223971929, + "grad_norm": 5.05818024976179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444210 + }, + { + "epoch": 2.1543867221647655, + "grad_norm": 4.86666067445185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444220 + }, + { + "epoch": 2.1544352203576014, + "grad_norm": 5.1863203225366306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444230 + }, + { + "epoch": 2.1544837185504377, + "grad_norm": 4.879022981185699e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444240 + }, + { + "epoch": 2.1545322167432737, + "grad_norm": 4.7495846047240775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444250 + }, + { + "epoch": 2.1545807149361096, + "grad_norm": 5.213073109189281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444260 + }, + { + "epoch": 2.154629213128946, + "grad_norm": 4.858582087763352e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444270 + }, + { + "epoch": 2.154677711321782, + "grad_norm": 9.533705451758578e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444280 + }, + { + "epoch": 2.154726209514618, + "grad_norm": 4.7268745220208075e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444290 + }, + { + "epoch": 2.154774707707454, + "grad_norm": 5.050920208304888e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444300 + }, + { + "epoch": 2.15482320590029, + "grad_norm": 4.492291736823972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444310 + }, + { + "epoch": 2.154871704093126, + "grad_norm": 4.7477424232056364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444320 + }, + { + "epoch": 2.1549202022859624, + "grad_norm": 5.662453531840583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444330 + }, + { + "epoch": 2.1549687004787983, + "grad_norm": 4.461067874217406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444340 + }, + { + "epoch": 2.1550171986716347, + "grad_norm": 4.404811534186592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444350 + }, + { + "epoch": 2.1550656968644706, + "grad_norm": 4.7805656322452705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444360 + }, + { + "epoch": 2.1551141950573065, + "grad_norm": 0.0005933003267273307, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444370 + }, + { + "epoch": 2.155162693250143, + "grad_norm": 4.6484301492455415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444380 + }, + { + "epoch": 2.155211191442979, + "grad_norm": 4.4415710362955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444390 + }, + { + "epoch": 2.155259689635815, + "grad_norm": 4.795716904482106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444400 + }, + { + "epoch": 2.155308187828651, + "grad_norm": 4.484703822527081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444410 + }, + { + "epoch": 2.155356686021487, + "grad_norm": 4.442782483238261e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444420 + }, + { + "epoch": 2.1554051842143234, + "grad_norm": 7.318300049519166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444430 + }, + { + "epoch": 2.1554536824071593, + "grad_norm": 4.214536147628678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444440 + }, + { + "epoch": 2.155502180599995, + "grad_norm": 6.6299135141889565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444450 + }, + { + "epoch": 2.1555506787928316, + "grad_norm": 4.122482550883433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444460 + }, + { + "epoch": 2.1555991769856675, + "grad_norm": 4.40738540419261e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444470 + }, + { + "epoch": 2.155647675178504, + "grad_norm": 5.514643362403149e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 444480 + }, + { + "epoch": 2.15569617337134, + "grad_norm": 4.09189669881016e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444490 + }, + { + "epoch": 2.1557446715641757, + "grad_norm": 4.232044375385158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444500 + }, + { + "epoch": 2.155793169757012, + "grad_norm": 4.003194590040948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444510 + }, + { + "epoch": 2.155841667949848, + "grad_norm": 4.276278559700586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444520 + }, + { + "epoch": 2.155890166142684, + "grad_norm": 5.0974663281522226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444530 + }, + { + "epoch": 2.1559386643355203, + "grad_norm": 3.6666310734290164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444540 + }, + { + "epoch": 2.155987162528356, + "grad_norm": 3.6409703625395196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444550 + }, + { + "epoch": 2.1560356607211926, + "grad_norm": 3.855025624943664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444560 + }, + { + "epoch": 2.1560841589140285, + "grad_norm": 3.727340754267061e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444570 + }, + { + "epoch": 2.1561326571068644, + "grad_norm": 4.194536359136691e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444580 + }, + { + "epoch": 2.156181155299701, + "grad_norm": 3.635331268014852e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444590 + }, + { + "epoch": 2.1562296534925367, + "grad_norm": 4.0531799641030375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444600 + }, + { + "epoch": 2.1562781516853726, + "grad_norm": 3.794917802224518e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444610 + }, + { + "epoch": 2.156326649878209, + "grad_norm": 3.5582261261879466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444620 + }, + { + "epoch": 2.156375148071045, + "grad_norm": 4.711369001597632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444630 + }, + { + "epoch": 2.1564236462638813, + "grad_norm": 3.5315083550813142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444640 + }, + { + "epoch": 2.156472144456717, + "grad_norm": 3.502446816128213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444650 + }, + { + "epoch": 2.156520642649553, + "grad_norm": 3.7128233998373616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444660 + }, + { + "epoch": 2.1565691408423895, + "grad_norm": 3.6097380871069618e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444670 + }, + { + "epoch": 2.1566176390352254, + "grad_norm": 3.926301815226907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444680 + }, + { + "epoch": 2.1566661372280613, + "grad_norm": 3.3250469186896225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444690 + }, + { + "epoch": 2.1567146354208977, + "grad_norm": 3.5801153899228666e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444700 + }, + { + "epoch": 2.1567631336137336, + "grad_norm": 0.008622108958661556, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 444710 + }, + { + "epoch": 2.15681163180657, + "grad_norm": 5.2064633564441465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444720 + }, + { + "epoch": 2.156860129999406, + "grad_norm": 5.838764082000125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444730 + }, + { + "epoch": 2.156908628192242, + "grad_norm": 7.864197868912015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444740 + }, + { + "epoch": 2.156957126385078, + "grad_norm": 8.354099918506108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444750 + }, + { + "epoch": 2.157005624577914, + "grad_norm": 7.939905117382295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444760 + }, + { + "epoch": 2.1570541227707505, + "grad_norm": 7.949389328132384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444770 + }, + { + "epoch": 2.1571026209635864, + "grad_norm": 6.078304068068974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444780 + }, + { + "epoch": 2.1571511191564223, + "grad_norm": 8.201468517654575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444790 + }, + { + "epoch": 2.1571996173492587, + "grad_norm": 7.79146375862183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444800 + }, + { + "epoch": 2.1572481155420946, + "grad_norm": 7.586404080939246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444810 + }, + { + "epoch": 2.1572966137349305, + "grad_norm": 7.686312528676353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444820 + }, + { + "epoch": 2.157345111927767, + "grad_norm": 5.9163407968299e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444830 + }, + { + "epoch": 2.157393610120603, + "grad_norm": 6.991243026277516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444840 + }, + { + "epoch": 2.1574421083134387, + "grad_norm": 7.534937140007969e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444850 + }, + { + "epoch": 2.157490606506275, + "grad_norm": 7.276002179423813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444860 + }, + { + "epoch": 2.157539104699111, + "grad_norm": 7.248213933053194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444870 + }, + { + "epoch": 2.1575876028919474, + "grad_norm": 5.553647497436032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444880 + }, + { + "epoch": 2.1576361010847833, + "grad_norm": 6.89786565999384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444890 + }, + { + "epoch": 2.1576845992776192, + "grad_norm": 6.886162736918777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444900 + }, + { + "epoch": 2.1577330974704556, + "grad_norm": 6.424302227969747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444910 + }, + { + "epoch": 2.1577815956632915, + "grad_norm": 6.3928423514880706e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444920 + }, + { + "epoch": 2.157830093856128, + "grad_norm": 5.713503014703747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444930 + }, + { + "epoch": 2.157878592048964, + "grad_norm": 6.73414479024359e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444940 + }, + { + "epoch": 2.1579270902417997, + "grad_norm": 8.236348548962269e-06, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 444950 + }, + { + "epoch": 2.157975588434636, + "grad_norm": 1.7286969523411244e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444960 + }, + { + "epoch": 2.158024086627472, + "grad_norm": 2.2789534341427498e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444970 + }, + { + "epoch": 2.158072584820308, + "grad_norm": 9.746553587319795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444980 + }, + { + "epoch": 2.1581210830131443, + "grad_norm": 2.454676359775476e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 444990 + }, + { + "epoch": 2.1581695812059802, + "grad_norm": 2.302072971360758e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445000 + }, + { + "epoch": 2.1582180793988166, + "grad_norm": 2.1867028408451006e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445010 + }, + { + "epoch": 2.1582665775916525, + "grad_norm": 2.3846027033869177e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445020 + }, + { + "epoch": 2.1583150757844884, + "grad_norm": 8.846613127388991e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445030 + }, + { + "epoch": 2.158363573977325, + "grad_norm": 1.9174429326085374e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445040 + }, + { + "epoch": 2.1584120721701607, + "grad_norm": 1.8656419342732988e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445050 + }, + { + "epoch": 2.1584605703629967, + "grad_norm": 1.9023467757506296e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445060 + }, + { + "epoch": 2.158509068555833, + "grad_norm": 0.0006017689593136311, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 445070 + }, + { + "epoch": 2.158557566748669, + "grad_norm": 4.267316035111435e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445080 + }, + { + "epoch": 2.1586060649415053, + "grad_norm": 1.7354403098579496e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445090 + }, + { + "epoch": 2.1586545631343412, + "grad_norm": 1.6409581803600304e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445100 + }, + { + "epoch": 2.158703061327177, + "grad_norm": 1.7557418686919846e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445110 + }, + { + "epoch": 2.1587515595200135, + "grad_norm": 1.6060894267866388e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445120 + }, + { + "epoch": 2.1588000577128494, + "grad_norm": 1.053349842550233e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445130 + }, + { + "epoch": 2.1588485559056854, + "grad_norm": 1.489337046223227e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445140 + }, + { + "epoch": 2.1588970540985217, + "grad_norm": 1.4782598555029836e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445150 + }, + { + "epoch": 2.1589455522913577, + "grad_norm": 1.4743917745363433e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445160 + }, + { + "epoch": 2.158994050484194, + "grad_norm": 1.35559184855083e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445170 + }, + { + "epoch": 2.15904254867703, + "grad_norm": 7.998620276339352e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445180 + }, + { + "epoch": 2.159091046869866, + "grad_norm": 1.2358301319181919e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445190 + }, + { + "epoch": 2.1591395450627022, + "grad_norm": 1.3150300219422206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445200 + }, + { + "epoch": 2.159188043255538, + "grad_norm": 1.3054166629444808e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445210 + }, + { + "epoch": 2.159236541448374, + "grad_norm": 1.2170630725449882e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445220 + }, + { + "epoch": 2.1592850396412104, + "grad_norm": 6.9095085564185865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445230 + }, + { + "epoch": 2.1593335378340464, + "grad_norm": 1.1158324923599139e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445240 + }, + { + "epoch": 2.1593820360268827, + "grad_norm": 1.1498221283545718e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445250 + }, + { + "epoch": 2.1594305342197186, + "grad_norm": 1.073193834599806e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445260 + }, + { + "epoch": 2.1594790324125546, + "grad_norm": 1.0760582881630398e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445270 + }, + { + "epoch": 2.159527530605391, + "grad_norm": 6.608260264329147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445280 + }, + { + "epoch": 2.159576028798227, + "grad_norm": 1.0352966455684509e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445290 + }, + { + "epoch": 2.159624526991063, + "grad_norm": 9.397850590175949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445300 + }, + { + "epoch": 2.159673025183899, + "grad_norm": 9.25544009078294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445310 + }, + { + "epoch": 2.159721523376735, + "grad_norm": 9.927768587658647e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445320 + }, + { + "epoch": 2.1597700215695714, + "grad_norm": 5.788654107163893e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445330 + }, + { + "epoch": 2.1598185197624074, + "grad_norm": 9.996437256631907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445340 + }, + { + "epoch": 2.1598670179552433, + "grad_norm": 8.97380323294783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445350 + }, + { + "epoch": 2.1599155161480796, + "grad_norm": 1.3143349860911258e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445360 + }, + { + "epoch": 2.1599640143409156, + "grad_norm": 8.69655286805937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445370 + }, + { + "epoch": 2.1600125125337515, + "grad_norm": 4.955725216859719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445380 + }, + { + "epoch": 2.160061010726588, + "grad_norm": 9.101298928726465e-06, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 445390 + }, + { + "epoch": 2.1601095089194238, + "grad_norm": 1.0861681403184775e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445400 + }, + { + "epoch": 2.16015800711226, + "grad_norm": 2.4981489332276396e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445410 + }, + { + "epoch": 2.160206505305096, + "grad_norm": 1.2573426829476375e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445420 + }, + { + "epoch": 2.160255003497932, + "grad_norm": 1.4764642401132733e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445430 + }, + { + "epoch": 2.1603035016907683, + "grad_norm": 1.302077180298511e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445440 + }, + { + "epoch": 2.1603519998836043, + "grad_norm": 1.1766143870772794e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445450 + }, + { + "epoch": 2.1604004980764406, + "grad_norm": 1.1401483789086342e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445460 + }, + { + "epoch": 2.1604489962692766, + "grad_norm": 1.1651737622742075e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445470 + }, + { + "epoch": 2.1604974944621125, + "grad_norm": 1.3644823411596008e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445480 + }, + { + "epoch": 2.160545992654949, + "grad_norm": 0.005734777078032494, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 445490 + }, + { + "epoch": 2.1605944908477848, + "grad_norm": 6.722725083818659e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445500 + }, + { + "epoch": 2.1606429890406207, + "grad_norm": 5.92286087339744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445510 + }, + { + "epoch": 2.160691487233457, + "grad_norm": 5.140200300957076e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445520 + }, + { + "epoch": 2.160739985426293, + "grad_norm": 5.3555224440060556e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445530 + }, + { + "epoch": 2.1607884836191293, + "grad_norm": 1.3492826838046312e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 445540 + }, + { + "epoch": 2.1608369818119653, + "grad_norm": 1.5775183783262037e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445550 + }, + { + "epoch": 2.160885480004801, + "grad_norm": 1.926635559357237e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445560 + }, + { + "epoch": 2.1609339781976376, + "grad_norm": 1.723832610878162e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445570 + }, + { + "epoch": 2.1609824763904735, + "grad_norm": 2.0337465684860945e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445580 + }, + { + "epoch": 2.1610309745833094, + "grad_norm": 1.4868443031446077e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445590 + }, + { + "epoch": 2.1610794727761458, + "grad_norm": 1.4863508113194257e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445600 + }, + { + "epoch": 2.1611279709689817, + "grad_norm": 1.59847731993068e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445610 + }, + { + "epoch": 2.161176469161818, + "grad_norm": 1.2509975931607187e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445620 + }, + { + "epoch": 2.161224967354654, + "grad_norm": 1.7821828805608675e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445630 + }, + { + "epoch": 2.16127346554749, + "grad_norm": 1.3409219718596432e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445640 + }, + { + "epoch": 2.1613219637403263, + "grad_norm": 1.4318804460344836e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445650 + }, + { + "epoch": 2.161370461933162, + "grad_norm": 1.1341177923895884e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445660 + }, + { + "epoch": 2.161418960125998, + "grad_norm": 1.1135186468891334e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445670 + }, + { + "epoch": 2.1614674583188345, + "grad_norm": 1.1556252502487041e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445680 + }, + { + "epoch": 2.1615159565116704, + "grad_norm": 1.0484658560017124e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445690 + }, + { + "epoch": 2.1615644547045068, + "grad_norm": 1.0332259989809245e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445700 + }, + { + "epoch": 2.1616129528973427, + "grad_norm": 9.768754352990072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445710 + }, + { + "epoch": 2.1616614510901786, + "grad_norm": 9.772486919246148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445720 + }, + { + "epoch": 2.161709949283015, + "grad_norm": 1.05270191852469e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445730 + }, + { + "epoch": 2.161758447475851, + "grad_norm": 1.0288558769389056e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445740 + }, + { + "epoch": 2.161806945668687, + "grad_norm": 9.383656106365379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445750 + }, + { + "epoch": 2.161855443861523, + "grad_norm": 9.0258563432144e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445760 + }, + { + "epoch": 2.161903942054359, + "grad_norm": 0.007263267412781715, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445770 + }, + { + "epoch": 2.1619524402471955, + "grad_norm": 9.280482117901556e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445780 + }, + { + "epoch": 2.1620009384400314, + "grad_norm": 8.887572221283335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445790 + }, + { + "epoch": 2.1620494366328673, + "grad_norm": 8.449080269201659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445800 + }, + { + "epoch": 2.1620979348257037, + "grad_norm": 8.10698293207679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445810 + }, + { + "epoch": 2.1621464330185396, + "grad_norm": 8.148156666720752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445820 + }, + { + "epoch": 2.162194931211376, + "grad_norm": 8.234956112573855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445830 + }, + { + "epoch": 2.162243429404212, + "grad_norm": 7.684962838538922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445840 + }, + { + "epoch": 2.162291927597048, + "grad_norm": 7.911042303021532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445850 + }, + { + "epoch": 2.162340425789884, + "grad_norm": 7.864034159865696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445860 + }, + { + "epoch": 2.16238892398272, + "grad_norm": 7.627038485225057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445870 + }, + { + "epoch": 2.162437422175556, + "grad_norm": 8.424900443060324e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445880 + }, + { + "epoch": 2.1624859203683924, + "grad_norm": 7.2808338700269815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445890 + }, + { + "epoch": 2.1625344185612283, + "grad_norm": 7.197413651738316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445900 + }, + { + "epoch": 2.162582916754064, + "grad_norm": 7.270785772561794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445910 + }, + { + "epoch": 2.1626314149469006, + "grad_norm": 7.1386530180461705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445920 + }, + { + "epoch": 2.1626799131397365, + "grad_norm": 7.1044714786694385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445930 + }, + { + "epoch": 2.162728411332573, + "grad_norm": 6.863072940177517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445940 + }, + { + "epoch": 2.162776909525409, + "grad_norm": 6.781016963941511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445950 + }, + { + "epoch": 2.1628254077182447, + "grad_norm": 6.807362751715118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445960 + }, + { + "epoch": 2.162873905911081, + "grad_norm": 6.6194797909702174e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445970 + }, + { + "epoch": 2.162922404103917, + "grad_norm": 7.297724550880957e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445980 + }, + { + "epoch": 2.1629709022967534, + "grad_norm": 6.16525403529522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 445990 + }, + { + "epoch": 2.1630194004895893, + "grad_norm": 6.115469659562223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446000 + }, + { + "epoch": 2.163067898682425, + "grad_norm": 5.976286047371104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446010 + }, + { + "epoch": 2.1631163968752616, + "grad_norm": 6.228347046999261e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446020 + }, + { + "epoch": 2.1631648950680975, + "grad_norm": 6.369201855704887e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446030 + }, + { + "epoch": 2.1632133932609334, + "grad_norm": 6.184081030369271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446040 + }, + { + "epoch": 2.16326189145377, + "grad_norm": 5.8091472965315916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446050 + }, + { + "epoch": 2.1633103896466057, + "grad_norm": 6.166735147417057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446060 + }, + { + "epoch": 2.163358887839442, + "grad_norm": 5.8438777159608435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446070 + }, + { + "epoch": 2.163407386032278, + "grad_norm": 5.740405413234839e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446080 + }, + { + "epoch": 2.163455884225114, + "grad_norm": 5.55660153622739e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446090 + }, + { + "epoch": 2.1635043824179503, + "grad_norm": 5.6058775044220965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446100 + }, + { + "epoch": 2.163552880610786, + "grad_norm": 5.7067877605732065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446110 + }, + { + "epoch": 2.163601378803622, + "grad_norm": 5.616138423647499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446120 + }, + { + "epoch": 2.1636498769964585, + "grad_norm": 5.584762675425736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446130 + }, + { + "epoch": 2.1636983751892944, + "grad_norm": 5.300104476191336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446140 + }, + { + "epoch": 2.163746873382131, + "grad_norm": 5.360489467420848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446150 + }, + { + "epoch": 2.1637953715749667, + "grad_norm": 5.361414423532551e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446160 + }, + { + "epoch": 2.1638438697678026, + "grad_norm": 5.055361725680996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446170 + }, + { + "epoch": 2.163892367960639, + "grad_norm": 5.297158622852294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446180 + }, + { + "epoch": 2.163940866153475, + "grad_norm": 5.188855539017823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446190 + }, + { + "epoch": 2.163989364346311, + "grad_norm": 5.0496873882366344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446200 + }, + { + "epoch": 2.164037862539147, + "grad_norm": 4.903547505819006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446210 + }, + { + "epoch": 2.164086360731983, + "grad_norm": 4.8041733862191904e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446220 + }, + { + "epoch": 2.1641348589248195, + "grad_norm": 4.941013685311191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446230 + }, + { + "epoch": 2.1641833571176554, + "grad_norm": 4.875262675341219e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446240 + }, + { + "epoch": 2.1642318553104913, + "grad_norm": 4.873323177889688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446250 + }, + { + "epoch": 2.1642803535033277, + "grad_norm": 4.724945938505698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446260 + }, + { + "epoch": 2.1643288516961636, + "grad_norm": 4.699563305621268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446270 + }, + { + "epoch": 2.1643773498889995, + "grad_norm": 4.759360763273435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446280 + }, + { + "epoch": 2.164425848081836, + "grad_norm": 4.681553946284112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446290 + }, + { + "epoch": 2.164474346274672, + "grad_norm": 4.4431135393097065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446300 + }, + { + "epoch": 2.164522844467508, + "grad_norm": 4.355659257271327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446310 + }, + { + "epoch": 2.164571342660344, + "grad_norm": 4.532410912361229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446320 + }, + { + "epoch": 2.16461984085318, + "grad_norm": 4.641728992282879e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446330 + }, + { + "epoch": 2.1646683390460164, + "grad_norm": 4.398449618747691e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446340 + }, + { + "epoch": 2.1647168372388523, + "grad_norm": 4.146689661865821e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446350 + }, + { + "epoch": 2.1647653354316887, + "grad_norm": 4.473200988286408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446360 + }, + { + "epoch": 2.1648138336245246, + "grad_norm": 4.286096555006225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446370 + }, + { + "epoch": 2.1648623318173605, + "grad_norm": 4.293020083423471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446380 + }, + { + "epoch": 2.164910830010197, + "grad_norm": 4.378954145067837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446390 + }, + { + "epoch": 2.164959328203033, + "grad_norm": 4.103967967239441e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446400 + }, + { + "epoch": 2.1650078263958688, + "grad_norm": 4.1702996895764954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446410 + }, + { + "epoch": 2.165056324588705, + "grad_norm": 4.108962457394227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446420 + }, + { + "epoch": 2.165104822781541, + "grad_norm": 4.091292339580832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446430 + }, + { + "epoch": 2.165153320974377, + "grad_norm": 3.97009216612787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446440 + }, + { + "epoch": 2.1652018191672133, + "grad_norm": 4.000072749477113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446450 + }, + { + "epoch": 2.1652503173600492, + "grad_norm": 3.88362786907237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446460 + }, + { + "epoch": 2.1652988155528856, + "grad_norm": 3.8664325074933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446470 + }, + { + "epoch": 2.1653473137457215, + "grad_norm": 3.882724286086159e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446480 + }, + { + "epoch": 2.1653958119385575, + "grad_norm": 4.0800077840685844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446490 + }, + { + "epoch": 2.165444310131394, + "grad_norm": 3.875272341247182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446500 + }, + { + "epoch": 2.1654928083242297, + "grad_norm": 3.7691420402552467e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446510 + }, + { + "epoch": 2.165541306517066, + "grad_norm": 3.7338375022955006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446520 + }, + { + "epoch": 2.165589804709902, + "grad_norm": 3.623684278863948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446530 + }, + { + "epoch": 2.165638302902738, + "grad_norm": 3.594687541408348e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446540 + }, + { + "epoch": 2.1656868010955743, + "grad_norm": 3.6114872727921465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446550 + }, + { + "epoch": 2.1657352992884102, + "grad_norm": 3.6103033380641136e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446560 + }, + { + "epoch": 2.165783797481246, + "grad_norm": 3.5551056498661637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446570 + }, + { + "epoch": 2.1658322956740825, + "grad_norm": 3.444459025558899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446580 + }, + { + "epoch": 2.1658807938669185, + "grad_norm": 3.5338098314241506e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446590 + }, + { + "epoch": 2.165929292059755, + "grad_norm": 3.4766612770908978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446600 + }, + { + "epoch": 2.1659777902525907, + "grad_norm": 3.5850980566465296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446610 + }, + { + "epoch": 2.1660262884454267, + "grad_norm": 3.4315237371629337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446620 + }, + { + "epoch": 2.166074786638263, + "grad_norm": 3.3239439289900474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446630 + }, + { + "epoch": 2.166123284831099, + "grad_norm": 3.398687340450124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446640 + }, + { + "epoch": 2.166171783023935, + "grad_norm": 3.355383796588285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446650 + }, + { + "epoch": 2.1662202812167712, + "grad_norm": 3.2771124551800312e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446660 + }, + { + "epoch": 2.166268779409607, + "grad_norm": 3.2302750696544535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446670 + }, + { + "epoch": 2.1663172776024435, + "grad_norm": 3.244824711146066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446680 + }, + { + "epoch": 2.1663657757952794, + "grad_norm": 3.3253184028581018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446690 + }, + { + "epoch": 2.1664142739881154, + "grad_norm": 3.232670906072599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446700 + }, + { + "epoch": 2.1664627721809517, + "grad_norm": 3.247227141400799e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446710 + }, + { + "epoch": 2.1665112703737877, + "grad_norm": 3.0682228953082813e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446720 + }, + { + "epoch": 2.1665597685666236, + "grad_norm": 3.009829015354626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446730 + }, + { + "epoch": 2.16660826675946, + "grad_norm": 3.0029855224711355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446740 + }, + { + "epoch": 2.166656764952296, + "grad_norm": 3.194643113602069e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446750 + }, + { + "epoch": 2.1667052631451322, + "grad_norm": 2.93017660624173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446760 + }, + { + "epoch": 2.166753761337968, + "grad_norm": 3.0120786504994612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446770 + }, + { + "epoch": 2.166802259530804, + "grad_norm": 2.9471846119122347e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446780 + }, + { + "epoch": 2.1668507577236404, + "grad_norm": 2.85351529782929e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446790 + }, + { + "epoch": 2.1668992559164764, + "grad_norm": 3.05638764075411e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446800 + }, + { + "epoch": 2.1669477541093123, + "grad_norm": 2.8519582428998547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446810 + }, + { + "epoch": 2.1669962523021487, + "grad_norm": 2.8089243642170914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446820 + }, + { + "epoch": 2.1670447504949846, + "grad_norm": 2.7104472337668994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446830 + }, + { + "epoch": 2.167093248687821, + "grad_norm": 2.7063920242653694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446840 + }, + { + "epoch": 2.167141746880657, + "grad_norm": 2.726306092881714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446850 + }, + { + "epoch": 2.167190245073493, + "grad_norm": 2.7308365133649204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446860 + }, + { + "epoch": 2.167238743266329, + "grad_norm": 2.607291889944463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446870 + }, + { + "epoch": 2.167287241459165, + "grad_norm": 2.620810619191616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446880 + }, + { + "epoch": 2.1673357396520014, + "grad_norm": 2.574059863036382e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446890 + }, + { + "epoch": 2.1673842378448374, + "grad_norm": 2.7792411856353283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446900 + }, + { + "epoch": 2.1674327360376733, + "grad_norm": 2.606196176202502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446910 + }, + { + "epoch": 2.1674812342305096, + "grad_norm": 2.6442678517923923e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446920 + }, + { + "epoch": 2.1675297324233456, + "grad_norm": 2.5750107397470856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446930 + }, + { + "epoch": 2.1675782306161815, + "grad_norm": 2.6079719646077137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446940 + }, + { + "epoch": 2.167626728809018, + "grad_norm": 2.409448597973096e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446950 + }, + { + "epoch": 2.1676752270018538, + "grad_norm": 2.5696074317238526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446960 + }, + { + "epoch": 2.1677237251946897, + "grad_norm": 2.4487987957400037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446970 + }, + { + "epoch": 2.167772223387526, + "grad_norm": 2.296964566994575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446980 + }, + { + "epoch": 2.167820721580362, + "grad_norm": 2.3620516458322527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 446990 + }, + { + "epoch": 2.1678692197731984, + "grad_norm": 2.3929915187181905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447000 + }, + { + "epoch": 2.1679177179660343, + "grad_norm": 2.3930726911203237e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447010 + }, + { + "epoch": 2.16796621615887, + "grad_norm": 2.1943287720205262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447020 + }, + { + "epoch": 2.1680147143517066, + "grad_norm": 2.2011013243172783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447030 + }, + { + "epoch": 2.1680632125445425, + "grad_norm": 2.1915570869168732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447040 + }, + { + "epoch": 2.168111710737379, + "grad_norm": 2.2310237000056077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447050 + }, + { + "epoch": 2.1681602089302148, + "grad_norm": 2.325123659829842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447060 + }, + { + "epoch": 2.1682087071230507, + "grad_norm": 2.1375310552684823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447070 + }, + { + "epoch": 2.168257205315887, + "grad_norm": 2.167526417906629e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447080 + }, + { + "epoch": 2.168305703508723, + "grad_norm": 2.1980815745337168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447090 + }, + { + "epoch": 2.168354201701559, + "grad_norm": 2.1072478375572246e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447100 + }, + { + "epoch": 2.1684026998943953, + "grad_norm": 2.2117317257652758e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447110 + }, + { + "epoch": 2.168451198087231, + "grad_norm": 2.033185637628776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447120 + }, + { + "epoch": 2.1684996962800676, + "grad_norm": 1.9135959519189782e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447130 + }, + { + "epoch": 2.1685481944729035, + "grad_norm": 2.1370237845985685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447140 + }, + { + "epoch": 2.1685966926657394, + "grad_norm": 2.092828935928992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447150 + }, + { + "epoch": 2.1686451908585758, + "grad_norm": 2.0233737814123742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447160 + }, + { + "epoch": 2.1686936890514117, + "grad_norm": 1.9989806787634734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447170 + }, + { + "epoch": 2.1687421872442476, + "grad_norm": 1.7521675772513845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447180 + }, + { + "epoch": 2.168790685437084, + "grad_norm": 1.9614201391959796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447190 + }, + { + "epoch": 2.16883918362992, + "grad_norm": 1.9365354546607705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447200 + }, + { + "epoch": 2.1688876818227563, + "grad_norm": 1.8303912838746328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447210 + }, + { + "epoch": 2.168936180015592, + "grad_norm": 2.0154343474132475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447220 + }, + { + "epoch": 2.168984678208428, + "grad_norm": 1.7075494724849705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447230 + }, + { + "epoch": 2.1690331764012645, + "grad_norm": 1.947654709510971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447240 + }, + { + "epoch": 2.1690816745941004, + "grad_norm": 1.8457413943906431e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447250 + }, + { + "epoch": 2.1691301727869368, + "grad_norm": 1.8894373852162971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447260 + }, + { + "epoch": 2.1691786709797727, + "grad_norm": 1.8817881937138736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447270 + }, + { + "epoch": 2.1692271691726086, + "grad_norm": 1.6565794567213743e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447280 + }, + { + "epoch": 2.169275667365445, + "grad_norm": 1.784875053090218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447290 + }, + { + "epoch": 2.169324165558281, + "grad_norm": 1.6489915424244828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447300 + }, + { + "epoch": 2.169372663751117, + "grad_norm": 1.6876081190275727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447310 + }, + { + "epoch": 2.169421161943953, + "grad_norm": 1.7042613080775482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447320 + }, + { + "epoch": 2.169469660136789, + "grad_norm": 1.619249815121293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447330 + }, + { + "epoch": 2.169518158329625, + "grad_norm": 1.6881473356988863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447340 + }, + { + "epoch": 2.1695666565224614, + "grad_norm": 1.7417112303519389e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447350 + }, + { + "epoch": 2.1696151547152973, + "grad_norm": 1.6838877172631328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447360 + }, + { + "epoch": 2.1696636529081337, + "grad_norm": 1.6610508737358032e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447370 + }, + { + "epoch": 2.1697121511009696, + "grad_norm": 1.4395769767361344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447380 + }, + { + "epoch": 2.1697606492938055, + "grad_norm": 1.7485684793427936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447390 + }, + { + "epoch": 2.169809147486642, + "grad_norm": 1.5653051832487108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447400 + }, + { + "epoch": 2.169857645679478, + "grad_norm": 1.5782781019879621e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447410 + }, + { + "epoch": 2.169906143872314, + "grad_norm": 1.567595745655126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447420 + }, + { + "epoch": 2.16995464206515, + "grad_norm": 1.4670197288069176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447430 + }, + { + "epoch": 2.170003140257986, + "grad_norm": 1.4761210422875592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447440 + }, + { + "epoch": 2.1700516384508224, + "grad_norm": 1.630566885069129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447450 + }, + { + "epoch": 2.1701001366436583, + "grad_norm": 1.4434618833547574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447460 + }, + { + "epoch": 2.1701486348364942, + "grad_norm": 1.4941539348001243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447470 + }, + { + "epoch": 2.1701971330293306, + "grad_norm": 1.3311807833815692e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447480 + }, + { + "epoch": 2.1702456312221665, + "grad_norm": 1.3930397244621417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447490 + }, + { + "epoch": 2.1702941294150024, + "grad_norm": 1.4521846196657862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447500 + }, + { + "epoch": 2.170342627607839, + "grad_norm": 1.5693824479967589e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447510 + }, + { + "epoch": 2.1703911258006747, + "grad_norm": 1.527239874121733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447520 + }, + { + "epoch": 2.170439623993511, + "grad_norm": 1.406156798111624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447530 + }, + { + "epoch": 2.170488122186347, + "grad_norm": 1.5552685681541334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447540 + }, + { + "epoch": 2.170536620379183, + "grad_norm": 1.4494086144622997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447550 + }, + { + "epoch": 2.1705851185720193, + "grad_norm": 1.3995437484481954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447560 + }, + { + "epoch": 2.1706336167648552, + "grad_norm": 1.3950683523944463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447570 + }, + { + "epoch": 2.1706821149576916, + "grad_norm": 1.1891071380887297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447580 + }, + { + "epoch": 2.1707306131505275, + "grad_norm": 1.3893578625356895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447590 + }, + { + "epoch": 2.1707791113433634, + "grad_norm": 1.3639378266816493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447600 + }, + { + "epoch": 2.1708276095362, + "grad_norm": 1.367054323964112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447610 + }, + { + "epoch": 2.1708761077290357, + "grad_norm": 1.306430476688547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447620 + }, + { + "epoch": 2.1709246059218716, + "grad_norm": 1.1953461580560543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447630 + }, + { + "epoch": 2.170973104114708, + "grad_norm": 1.4026952612766763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447640 + }, + { + "epoch": 2.171021602307544, + "grad_norm": 1.3067532336208387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447650 + }, + { + "epoch": 2.1710701005003803, + "grad_norm": 1.315151052949659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447660 + }, + { + "epoch": 2.171118598693216, + "grad_norm": 1.3110438885632902e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447670 + }, + { + "epoch": 2.171167096886052, + "grad_norm": 1.100153781408153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447680 + }, + { + "epoch": 2.1712155950788885, + "grad_norm": 1.2539532008304377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447690 + }, + { + "epoch": 2.1712640932717244, + "grad_norm": 1.1889720781255164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447700 + }, + { + "epoch": 2.1713125914645603, + "grad_norm": 1.2330642675806303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447710 + }, + { + "epoch": 2.1713610896573967, + "grad_norm": 1.2089830079275998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447720 + }, + { + "epoch": 2.1714095878502326, + "grad_norm": 1.0908319154623314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447730 + }, + { + "epoch": 2.171458086043069, + "grad_norm": 1.1506102737257606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447740 + }, + { + "epoch": 2.171506584235905, + "grad_norm": 1.16188675747253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447750 + }, + { + "epoch": 2.171555082428741, + "grad_norm": 1.1711936167557724e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447760 + }, + { + "epoch": 2.171603580621577, + "grad_norm": 1.171723056359042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447770 + }, + { + "epoch": 2.171652078814413, + "grad_norm": 1.0162282251258148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447780 + }, + { + "epoch": 2.1717005770072495, + "grad_norm": 1.1904398888873402e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447790 + }, + { + "epoch": 2.1717490752000854, + "grad_norm": 1.147578700511076e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447800 + }, + { + "epoch": 2.1717975733929213, + "grad_norm": 1.1907668522326276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447810 + }, + { + "epoch": 2.1718460715857577, + "grad_norm": 1.1458221251814393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447820 + }, + { + "epoch": 2.1718945697785936, + "grad_norm": 9.72195607573667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447830 + }, + { + "epoch": 2.1719430679714296, + "grad_norm": 1.1171997584824567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447840 + }, + { + "epoch": 2.171991566164266, + "grad_norm": 1.0685884035410709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447850 + }, + { + "epoch": 2.172040064357102, + "grad_norm": 1.140048539127747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447860 + }, + { + "epoch": 2.1720885625499378, + "grad_norm": 1.0937931165244663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447870 + }, + { + "epoch": 2.172137060742774, + "grad_norm": 9.448344258089492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447880 + }, + { + "epoch": 2.17218555893561, + "grad_norm": 1.0471633231645683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447890 + }, + { + "epoch": 2.1722340571284464, + "grad_norm": 1.0960562804029905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447900 + }, + { + "epoch": 2.1722825553212823, + "grad_norm": 1.0611796597004286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447910 + }, + { + "epoch": 2.1723310535141183, + "grad_norm": 1.0608943057377473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447920 + }, + { + "epoch": 2.1723795517069546, + "grad_norm": 9.490739785178448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447930 + }, + { + "epoch": 2.1724280498997905, + "grad_norm": 1.0038103255283204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447940 + }, + { + "epoch": 2.172476548092627, + "grad_norm": 1.080074525816599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447950 + }, + { + "epoch": 2.172525046285463, + "grad_norm": 1.2150801467214478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447960 + }, + { + "epoch": 2.1725735444782988, + "grad_norm": 1.0422244258734281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447970 + }, + { + "epoch": 2.172622042671135, + "grad_norm": 8.915498597161786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447980 + }, + { + "epoch": 2.172670540863971, + "grad_norm": 9.713587587611983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 447990 + }, + { + "epoch": 2.172719039056807, + "grad_norm": 9.593835557097918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448000 + }, + { + "epoch": 2.1727675372496433, + "grad_norm": 9.934373110809247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448010 + }, + { + "epoch": 2.1728160354424793, + "grad_norm": 1.0135604497918393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448020 + }, + { + "epoch": 2.1728645336353156, + "grad_norm": 8.208481858673622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448030 + }, + { + "epoch": 2.1729130318281515, + "grad_norm": 1.0006586990130018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448040 + }, + { + "epoch": 2.1729615300209875, + "grad_norm": 9.58095824898919e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448050 + }, + { + "epoch": 2.173010028213824, + "grad_norm": 8.980945267467177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448060 + }, + { + "epoch": 2.1730585264066598, + "grad_norm": 9.658590442995774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448070 + }, + { + "epoch": 2.1731070245994957, + "grad_norm": 7.762646418996155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448080 + }, + { + "epoch": 2.173155522792332, + "grad_norm": 9.844625310506672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448090 + }, + { + "epoch": 2.173204020985168, + "grad_norm": 9.130615126196062e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448100 + }, + { + "epoch": 2.1732525191780043, + "grad_norm": 9.480918947701866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448110 + }, + { + "epoch": 2.1733010173708402, + "grad_norm": 9.024238920574135e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448120 + }, + { + "epoch": 2.173349515563676, + "grad_norm": 7.907369194981584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448130 + }, + { + "epoch": 2.1733980137565125, + "grad_norm": 9.23408776998258e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448140 + }, + { + "epoch": 2.1734465119493485, + "grad_norm": 8.954264103522291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448150 + }, + { + "epoch": 2.1734950101421844, + "grad_norm": 9.211232736561215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448160 + }, + { + "epoch": 2.1735435083350207, + "grad_norm": 8.81470839431131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448170 + }, + { + "epoch": 2.1735920065278567, + "grad_norm": 8.115372907013807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448180 + }, + { + "epoch": 2.173640504720693, + "grad_norm": 9.002610568131786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448190 + }, + { + "epoch": 2.173689002913529, + "grad_norm": 8.416351988671522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448200 + }, + { + "epoch": 2.173737501106365, + "grad_norm": 8.077130360106821e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448210 + }, + { + "epoch": 2.1737859992992012, + "grad_norm": 7.992071004991885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448220 + }, + { + "epoch": 2.173834497492037, + "grad_norm": 8.137594704749063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448230 + }, + { + "epoch": 2.173882995684873, + "grad_norm": 8.110887392831501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448240 + }, + { + "epoch": 2.1739314938777095, + "grad_norm": 8.398291697631066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448250 + }, + { + "epoch": 2.1739799920705454, + "grad_norm": 8.78783282587392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448260 + }, + { + "epoch": 2.1740284902633817, + "grad_norm": 8.601170407018799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448270 + }, + { + "epoch": 2.1740769884562177, + "grad_norm": 7.413784146592661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448280 + }, + { + "epoch": 2.1741254866490536, + "grad_norm": 8.055243938542844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448290 + }, + { + "epoch": 2.17417398484189, + "grad_norm": 8.537505209460505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448300 + }, + { + "epoch": 2.174222483034726, + "grad_norm": 7.873614435993659e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448310 + }, + { + "epoch": 2.1742709812275622, + "grad_norm": 8.176634196388477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448320 + }, + { + "epoch": 2.174319479420398, + "grad_norm": 6.817738267272944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448330 + }, + { + "epoch": 2.174367977613234, + "grad_norm": 8.076940503087826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448340 + }, + { + "epoch": 2.1744164758060704, + "grad_norm": 7.568541491309588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448350 + }, + { + "epoch": 2.1744649739989064, + "grad_norm": 7.621994768669538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448360 + }, + { + "epoch": 2.1745134721917423, + "grad_norm": 7.659733114451228e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448370 + }, + { + "epoch": 2.1745619703845787, + "grad_norm": 6.46664602754754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448380 + }, + { + "epoch": 2.1746104685774146, + "grad_norm": 8.129370030474092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448390 + }, + { + "epoch": 2.1746589667702505, + "grad_norm": 7.623211786267348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448400 + }, + { + "epoch": 2.174707464963087, + "grad_norm": 7.34840284621896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448410 + }, + { + "epoch": 2.174755963155923, + "grad_norm": 7.741448939668771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448420 + }, + { + "epoch": 2.174804461348759, + "grad_norm": 6.341767857520608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448430 + }, + { + "epoch": 2.174852959541595, + "grad_norm": 7.4734902000273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448440 + }, + { + "epoch": 2.174901457734431, + "grad_norm": 7.716357117715233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448450 + }, + { + "epoch": 2.1749499559272674, + "grad_norm": 6.945064683350211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448460 + }, + { + "epoch": 2.1749984541201033, + "grad_norm": 7.113073934306158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448470 + }, + { + "epoch": 2.1750469523129397, + "grad_norm": 6.343076961456973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448480 + }, + { + "epoch": 2.1750954505057756, + "grad_norm": 7.194595923465386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448490 + }, + { + "epoch": 2.1751439486986115, + "grad_norm": 7.145400218178111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448500 + }, + { + "epoch": 2.175192446891448, + "grad_norm": 7.703916367063357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448510 + }, + { + "epoch": 2.175240945084284, + "grad_norm": 7.054578645693255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448520 + }, + { + "epoch": 2.1752894432771197, + "grad_norm": 6.658215738752915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448530 + }, + { + "epoch": 2.175337941469956, + "grad_norm": 7.340927368204575e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448540 + }, + { + "epoch": 2.175386439662792, + "grad_norm": 6.636343528043653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448550 + }, + { + "epoch": 2.1754349378556284, + "grad_norm": 6.801479912610375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448560 + }, + { + "epoch": 2.1754834360484643, + "grad_norm": 7.111008244464756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448570 + }, + { + "epoch": 2.1755319342413, + "grad_norm": 6.051919854144217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448580 + }, + { + "epoch": 2.1755804324341366, + "grad_norm": 6.637454248448194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448590 + }, + { + "epoch": 2.1756289306269725, + "grad_norm": 6.783961339351663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448600 + }, + { + "epoch": 2.1756774288198084, + "grad_norm": 7.470897571693058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448610 + }, + { + "epoch": 2.175725927012645, + "grad_norm": 6.784381980651233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448620 + }, + { + "epoch": 2.1757744252054807, + "grad_norm": 5.632113015963114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448630 + }, + { + "epoch": 2.175822923398317, + "grad_norm": 6.521338491438655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448640 + }, + { + "epoch": 2.175871421591153, + "grad_norm": 6.487343284788949e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448650 + }, + { + "epoch": 2.175919919783989, + "grad_norm": 6.91067100433429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448660 + }, + { + "epoch": 2.1759684179768253, + "grad_norm": 6.461609700636473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448670 + }, + { + "epoch": 2.176016916169661, + "grad_norm": 5.466451398206118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448680 + }, + { + "epoch": 2.176065414362497, + "grad_norm": 6.244986821002385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448690 + }, + { + "epoch": 2.1761139125553335, + "grad_norm": 6.195732566993684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448700 + }, + { + "epoch": 2.1761624107481694, + "grad_norm": 6.394153615474352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448710 + }, + { + "epoch": 2.1762109089410058, + "grad_norm": 6.077996772546612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448720 + }, + { + "epoch": 2.1762594071338417, + "grad_norm": 5.150738502379681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448730 + }, + { + "epoch": 2.1763079053266776, + "grad_norm": 5.942320058238693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448740 + }, + { + "epoch": 2.176356403519514, + "grad_norm": 6.186668315422139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448750 + }, + { + "epoch": 2.17640490171235, + "grad_norm": 6.079105787648587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448760 + }, + { + "epoch": 2.176453399905186, + "grad_norm": 6.096956894907635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448770 + }, + { + "epoch": 2.176501898098022, + "grad_norm": 4.796949610863521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448780 + }, + { + "epoch": 2.176550396290858, + "grad_norm": 5.92911419516895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448790 + }, + { + "epoch": 2.1765988944836945, + "grad_norm": 6.212958965079451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448800 + }, + { + "epoch": 2.1766473926765304, + "grad_norm": 6.137482841950259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448810 + }, + { + "epoch": 2.1766958908693663, + "grad_norm": 5.817476562697266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448820 + }, + { + "epoch": 2.1767443890622027, + "grad_norm": 5.087026693217922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448830 + }, + { + "epoch": 2.1767928872550386, + "grad_norm": 5.544039254345989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448840 + }, + { + "epoch": 2.176841385447875, + "grad_norm": 6.276692943174567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448850 + }, + { + "epoch": 2.176889883640711, + "grad_norm": 5.486741088134295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448860 + }, + { + "epoch": 2.176938381833547, + "grad_norm": 5.971198220322549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448870 + }, + { + "epoch": 2.176986880026383, + "grad_norm": 4.683107590608415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448880 + }, + { + "epoch": 2.177035378219219, + "grad_norm": 5.749411684519146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448890 + }, + { + "epoch": 2.177083876412055, + "grad_norm": 5.514526151273458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448900 + }, + { + "epoch": 2.1771323746048914, + "grad_norm": 5.620851197818411e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448910 + }, + { + "epoch": 2.1771808727977273, + "grad_norm": 5.614383553620428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448920 + }, + { + "epoch": 2.1772293709905632, + "grad_norm": 4.380851521545992e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448930 + }, + { + "epoch": 2.1772778691833996, + "grad_norm": 5.387756232266838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448940 + }, + { + "epoch": 2.1773263673762355, + "grad_norm": 5.31425087046955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448950 + }, + { + "epoch": 2.177374865569072, + "grad_norm": 5.531541091841063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448960 + }, + { + "epoch": 2.177423363761908, + "grad_norm": 5.472356292557379e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448970 + }, + { + "epoch": 2.1774718619547437, + "grad_norm": 4.6284395693874103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448980 + }, + { + "epoch": 2.17752036014758, + "grad_norm": 5.408385277405614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 448990 + }, + { + "epoch": 2.177568858340416, + "grad_norm": 5.281263497636246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449000 + }, + { + "epoch": 2.1776173565332524, + "grad_norm": 5.524231028175564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449010 + }, + { + "epoch": 2.1776658547260883, + "grad_norm": 5.040747623752395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449020 + }, + { + "epoch": 2.1777143529189242, + "grad_norm": 4.3385148273955565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449030 + }, + { + "epoch": 2.1777628511117606, + "grad_norm": 5.001846261620813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449040 + }, + { + "epoch": 2.1778113493045965, + "grad_norm": 5.132668547958019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449050 + }, + { + "epoch": 2.1778598474974324, + "grad_norm": 5.168313919057255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449060 + }, + { + "epoch": 2.177908345690269, + "grad_norm": 4.992334083908645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449070 + }, + { + "epoch": 2.1779568438831047, + "grad_norm": 4.4736779614140687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449080 + }, + { + "epoch": 2.178005342075941, + "grad_norm": 5.165134098206181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449090 + }, + { + "epoch": 2.178053840268777, + "grad_norm": 5.040994324190251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449100 + }, + { + "epoch": 2.178102338461613, + "grad_norm": 5.329548571353371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449110 + }, + { + "epoch": 2.1781508366544493, + "grad_norm": 4.902786940874648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449120 + }, + { + "epoch": 2.1781993348472852, + "grad_norm": 4.370924955310329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449130 + }, + { + "epoch": 2.178247833040121, + "grad_norm": 4.78851688967552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449140 + }, + { + "epoch": 2.1782963312329575, + "grad_norm": 4.6112609197734855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449150 + }, + { + "epoch": 2.1783448294257934, + "grad_norm": 4.808533731193165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449160 + }, + { + "epoch": 2.17839332761863, + "grad_norm": 4.680615006691369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449170 + }, + { + "epoch": 2.1784418258114657, + "grad_norm": 3.9695115106042067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449180 + }, + { + "epoch": 2.1784903240043016, + "grad_norm": 4.958716885994363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449190 + }, + { + "epoch": 2.178538822197138, + "grad_norm": 4.831205728805799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449200 + }, + { + "epoch": 2.178587320389974, + "grad_norm": 4.926733367938141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449210 + }, + { + "epoch": 2.17863581858281, + "grad_norm": 4.988652904103219e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449220 + }, + { + "epoch": 2.1786843167756462, + "grad_norm": 3.8652424905194493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449230 + }, + { + "epoch": 2.178732814968482, + "grad_norm": 4.7536423153360374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449240 + }, + { + "epoch": 2.1787813131613185, + "grad_norm": 4.511538804763404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449250 + }, + { + "epoch": 2.1788298113541544, + "grad_norm": 4.936653681397729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449260 + }, + { + "epoch": 2.1788783095469904, + "grad_norm": 4.5316213004298334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449270 + }, + { + "epoch": 2.1789268077398267, + "grad_norm": 3.482365968920931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449280 + }, + { + "epoch": 2.1789753059326626, + "grad_norm": 4.722997459793987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449290 + }, + { + "epoch": 2.1790238041254986, + "grad_norm": 4.540517579698644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449300 + }, + { + "epoch": 2.179072302318335, + "grad_norm": 4.524782184489595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449310 + }, + { + "epoch": 2.179120800511171, + "grad_norm": 4.5314203589441604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449320 + }, + { + "epoch": 2.179169298704007, + "grad_norm": 3.5716942647923133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449330 + }, + { + "epoch": 2.179217796896843, + "grad_norm": 4.6462039904326957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449340 + }, + { + "epoch": 2.179266295089679, + "grad_norm": 4.285131467440806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449350 + }, + { + "epoch": 2.1793147932825154, + "grad_norm": 4.3525648152353824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449360 + }, + { + "epoch": 2.1793632914753513, + "grad_norm": 4.333859067173762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449370 + }, + { + "epoch": 2.1794117896681877, + "grad_norm": 3.6262878211346106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449380 + }, + { + "epoch": 2.1794602878610236, + "grad_norm": 4.50615800673404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449390 + }, + { + "epoch": 2.1795087860538596, + "grad_norm": 4.5110840574125177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449400 + }, + { + "epoch": 2.179557284246696, + "grad_norm": 4.1724041466295603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449410 + }, + { + "epoch": 2.179605782439532, + "grad_norm": 4.4272985633142525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449420 + }, + { + "epoch": 2.1796542806323678, + "grad_norm": 3.369763703631179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449430 + }, + { + "epoch": 2.179702778825204, + "grad_norm": 4.1229833414035966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449440 + }, + { + "epoch": 2.17975127701804, + "grad_norm": 4.1006703099810693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449450 + }, + { + "epoch": 2.179799775210876, + "grad_norm": 4.000674493909173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449460 + }, + { + "epoch": 2.1798482734037123, + "grad_norm": 4.236331108131708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449470 + }, + { + "epoch": 2.1798967715965483, + "grad_norm": 3.3361516216245946e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449480 + }, + { + "epoch": 2.1799452697893846, + "grad_norm": 4.255825558630022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449490 + }, + { + "epoch": 2.1799937679822206, + "grad_norm": 4.064002609993622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449500 + }, + { + "epoch": 2.1800422661750565, + "grad_norm": 3.91916074704568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449510 + }, + { + "epoch": 2.180090764367893, + "grad_norm": 4.0793224798107985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449520 + }, + { + "epoch": 2.1801392625607288, + "grad_norm": 3.4283527838852024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449530 + }, + { + "epoch": 2.180187760753565, + "grad_norm": 3.8052610307204304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449540 + }, + { + "epoch": 2.180236258946401, + "grad_norm": 4.0662155242898734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449550 + }, + { + "epoch": 2.180284757139237, + "grad_norm": 3.9908528037813085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449560 + }, + { + "epoch": 2.1803332553320733, + "grad_norm": 3.997982958026114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449570 + }, + { + "epoch": 2.1803817535249093, + "grad_norm": 3.192742212831945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449580 + }, + { + "epoch": 2.180430251717745, + "grad_norm": 4.0113894783644355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449590 + }, + { + "epoch": 2.1804787499105815, + "grad_norm": 3.8997313822619617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449600 + }, + { + "epoch": 2.1805272481034175, + "grad_norm": 3.7855917867091193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449610 + }, + { + "epoch": 2.180575746296254, + "grad_norm": 3.9056567402440123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449620 + }, + { + "epoch": 2.1806242444890898, + "grad_norm": 2.9936009582343104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449630 + }, + { + "epoch": 2.1806727426819257, + "grad_norm": 3.951224982756685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449640 + }, + { + "epoch": 2.180721240874762, + "grad_norm": 4.122370569348277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449650 + }, + { + "epoch": 2.180769739067598, + "grad_norm": 3.7516016959671106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449660 + }, + { + "epoch": 2.180818237260434, + "grad_norm": 3.6320855656413187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449670 + }, + { + "epoch": 2.1808667354532703, + "grad_norm": 3.138721069717576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449680 + }, + { + "epoch": 2.180915233646106, + "grad_norm": 3.761385585221433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449690 + }, + { + "epoch": 2.1809637318389425, + "grad_norm": 3.61291768058436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449700 + }, + { + "epoch": 2.1810122300317785, + "grad_norm": 3.814898548171186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449710 + }, + { + "epoch": 2.1810607282246144, + "grad_norm": 3.7251905382618133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449720 + }, + { + "epoch": 2.1811092264174508, + "grad_norm": 2.8367702498144354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449730 + }, + { + "epoch": 2.1811577246102867, + "grad_norm": 3.719087828812917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449740 + }, + { + "epoch": 2.1812062228031226, + "grad_norm": 3.9181600186566357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449750 + }, + { + "epoch": 2.181254720995959, + "grad_norm": 3.5899967087971163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449760 + }, + { + "epoch": 2.181303219188795, + "grad_norm": 3.612620673720812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449770 + }, + { + "epoch": 2.1813517173816312, + "grad_norm": 3.456210606600507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449780 + }, + { + "epoch": 2.181400215574467, + "grad_norm": 3.531025640768348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449790 + }, + { + "epoch": 2.181448713767303, + "grad_norm": 3.514334707688249e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449800 + }, + { + "epoch": 2.1814972119601395, + "grad_norm": 3.652672830867232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449810 + }, + { + "epoch": 2.1815457101529754, + "grad_norm": 3.8595598539359344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449820 + }, + { + "epoch": 2.1815942083458113, + "grad_norm": 2.808405099585798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449830 + }, + { + "epoch": 2.1816427065386477, + "grad_norm": 3.631158449479699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449840 + }, + { + "epoch": 2.1816912047314836, + "grad_norm": 3.476760355169972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449850 + }, + { + "epoch": 2.18173970292432, + "grad_norm": 3.3455845027674513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449860 + }, + { + "epoch": 2.181788201117156, + "grad_norm": 3.37653432325169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449870 + }, + { + "epoch": 2.181836699309992, + "grad_norm": 2.6878967673837906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449880 + }, + { + "epoch": 2.181885197502828, + "grad_norm": 3.410762303701631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449890 + }, + { + "epoch": 2.181933695695664, + "grad_norm": 3.4476997257115727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449900 + }, + { + "epoch": 2.1819821938885005, + "grad_norm": 3.1900214025881724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449910 + }, + { + "epoch": 2.1820306920813364, + "grad_norm": 3.416007245959918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449920 + }, + { + "epoch": 2.1820791902741723, + "grad_norm": 2.67780393414796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449930 + }, + { + "epoch": 2.1821276884670087, + "grad_norm": 3.319737800211442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449940 + }, + { + "epoch": 2.1821761866598446, + "grad_norm": 3.230383924801572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449950 + }, + { + "epoch": 2.1822246848526805, + "grad_norm": 3.185499508617795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449960 + }, + { + "epoch": 2.182273183045517, + "grad_norm": 3.4260963843735226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449970 + }, + { + "epoch": 2.182321681238353, + "grad_norm": 2.4587265556874627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449980 + }, + { + "epoch": 2.1823701794311887, + "grad_norm": 3.393390102246485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 449990 + }, + { + "epoch": 2.182418677624025, + "grad_norm": 3.321999031413725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450000 + }, + { + "epoch": 2.182467175816861, + "grad_norm": 3.3378583452758903e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450010 + }, + { + "epoch": 2.1825156740096974, + "grad_norm": 3.2933581906036125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450020 + }, + { + "epoch": 2.1825641722025333, + "grad_norm": 2.4377501972594473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450030 + }, + { + "epoch": 2.182612670395369, + "grad_norm": 3.450841745689104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450040 + }, + { + "epoch": 2.1826611685882056, + "grad_norm": 3.202512743882835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450050 + }, + { + "epoch": 2.1827096667810415, + "grad_norm": 3.383798627965007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450060 + }, + { + "epoch": 2.182758164973878, + "grad_norm": 3.2672511451892206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450070 + }, + { + "epoch": 2.182806663166714, + "grad_norm": 2.477881082540989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450080 + }, + { + "epoch": 2.1828551613595497, + "grad_norm": 3.070158811624424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450090 + }, + { + "epoch": 2.182903659552386, + "grad_norm": 3.181704357757553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450100 + }, + { + "epoch": 2.182952157745222, + "grad_norm": 3.0147273832881183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450110 + }, + { + "epoch": 2.183000655938058, + "grad_norm": 3.18869723514581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450120 + }, + { + "epoch": 2.1830491541308943, + "grad_norm": 2.478245733072981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450130 + }, + { + "epoch": 2.18309765232373, + "grad_norm": 3.02223895687348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450140 + }, + { + "epoch": 2.1831461505165666, + "grad_norm": 3.0714252829966426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450150 + }, + { + "epoch": 2.1831946487094025, + "grad_norm": 3.047888128548948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450160 + }, + { + "epoch": 2.1832431469022384, + "grad_norm": 3.0511586146531045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450170 + }, + { + "epoch": 2.183291645095075, + "grad_norm": 2.4236490503426467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450180 + }, + { + "epoch": 2.1833401432879107, + "grad_norm": 3.141329329992004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450190 + }, + { + "epoch": 2.1833886414807466, + "grad_norm": 3.0727787247997185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450200 + }, + { + "epoch": 2.183437139673583, + "grad_norm": 3.1818214552004065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450210 + }, + { + "epoch": 2.183485637866419, + "grad_norm": 3.074913195177942e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450220 + }, + { + "epoch": 2.1835341360592553, + "grad_norm": 2.3819538341740554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450230 + }, + { + "epoch": 2.183582634252091, + "grad_norm": 3.0197026035239105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450240 + }, + { + "epoch": 2.183631132444927, + "grad_norm": 2.9566905368483276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450250 + }, + { + "epoch": 2.1836796306377635, + "grad_norm": 2.896831006182765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450260 + }, + { + "epoch": 2.1837281288305994, + "grad_norm": 2.945427297618153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450270 + }, + { + "epoch": 2.1837766270234353, + "grad_norm": 2.186149004046456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450280 + }, + { + "epoch": 2.1838251252162717, + "grad_norm": 3.0172159881658445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450290 + }, + { + "epoch": 2.1838736234091076, + "grad_norm": 2.8775988880624936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450300 + }, + { + "epoch": 2.183922121601944, + "grad_norm": 2.955196123366477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450310 + }, + { + "epoch": 2.18397061979478, + "grad_norm": 2.9780514410049364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450320 + }, + { + "epoch": 2.184019117987616, + "grad_norm": 2.2579868641514622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450330 + }, + { + "epoch": 2.184067616180452, + "grad_norm": 3.0354689783962385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450340 + }, + { + "epoch": 2.184116114373288, + "grad_norm": 2.923396209553175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450350 + }, + { + "epoch": 2.184164612566124, + "grad_norm": 2.7920717116103333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450360 + }, + { + "epoch": 2.1842131107589604, + "grad_norm": 2.9186887218202173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450370 + }, + { + "epoch": 2.1842616089517963, + "grad_norm": 2.1990074117184122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450380 + }, + { + "epoch": 2.1843101071446327, + "grad_norm": 2.75005874073031e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450390 + }, + { + "epoch": 2.1843586053374686, + "grad_norm": 2.8861168743787857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450400 + }, + { + "epoch": 2.1844071035303045, + "grad_norm": 2.88766699441112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450410 + }, + { + "epoch": 2.184455601723141, + "grad_norm": 2.895398836244567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450420 + }, + { + "epoch": 2.184504099915977, + "grad_norm": 2.2117087894457654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450430 + }, + { + "epoch": 2.184552598108813, + "grad_norm": 2.70299892690673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450440 + }, + { + "epoch": 2.184601096301649, + "grad_norm": 2.90536632974181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450450 + }, + { + "epoch": 2.184649594494485, + "grad_norm": 2.897678825775074e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450460 + }, + { + "epoch": 2.1846980926873214, + "grad_norm": 2.6968709221364406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450470 + }, + { + "epoch": 2.1847465908801573, + "grad_norm": 2.1767948510387214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450480 + }, + { + "epoch": 2.1847950890729932, + "grad_norm": 2.7019140702577715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450490 + }, + { + "epoch": 2.1848435872658296, + "grad_norm": 2.913329240072926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450500 + }, + { + "epoch": 2.1848920854586655, + "grad_norm": 2.81189585393804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450510 + }, + { + "epoch": 2.1849405836515015, + "grad_norm": 2.9073319751660165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450520 + }, + { + "epoch": 2.184989081844338, + "grad_norm": 2.1512740033813316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450530 + }, + { + "epoch": 2.1850375800371737, + "grad_norm": 2.8017930731039087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450540 + }, + { + "epoch": 2.18508607823001, + "grad_norm": 2.7766813559537695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450550 + }, + { + "epoch": 2.185134576422846, + "grad_norm": 2.6463206381777127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450560 + }, + { + "epoch": 2.185183074615682, + "grad_norm": 2.760489223874174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450570 + }, + { + "epoch": 2.1852315728085183, + "grad_norm": 2.0700507263882173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450580 + }, + { + "epoch": 2.1852800710013542, + "grad_norm": 2.617120742343104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450590 + }, + { + "epoch": 2.1853285691941906, + "grad_norm": 2.7388620083002024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450600 + }, + { + "epoch": 2.1853770673870265, + "grad_norm": 2.641605760800303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450610 + }, + { + "epoch": 2.1854255655798625, + "grad_norm": 2.6587906631903024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450620 + }, + { + "epoch": 2.185474063772699, + "grad_norm": 2.001825833986004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450630 + }, + { + "epoch": 2.1855225619655347, + "grad_norm": 2.73789510174538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450640 + }, + { + "epoch": 2.1855710601583707, + "grad_norm": 2.675044754596456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450650 + }, + { + "epoch": 2.185619558351207, + "grad_norm": 2.6053518809021625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450660 + }, + { + "epoch": 2.185668056544043, + "grad_norm": 2.5472931497461104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450670 + }, + { + "epoch": 2.1857165547368793, + "grad_norm": 2.0785110166343657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450680 + }, + { + "epoch": 2.1857650529297152, + "grad_norm": 2.6335388270126714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450690 + }, + { + "epoch": 2.185813551122551, + "grad_norm": 2.5372665390932525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450700 + }, + { + "epoch": 2.1858620493153875, + "grad_norm": 2.5623296551202657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450710 + }, + { + "epoch": 2.1859105475082234, + "grad_norm": 2.668049603471445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450720 + }, + { + "epoch": 2.1859590457010594, + "grad_norm": 2.1361719859669392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450730 + }, + { + "epoch": 2.1860075438938957, + "grad_norm": 2.620835459765658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450740 + }, + { + "epoch": 2.1860560420867317, + "grad_norm": 2.631208246839378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450750 + }, + { + "epoch": 2.186104540279568, + "grad_norm": 2.5142216486528923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450760 + }, + { + "epoch": 2.186153038472404, + "grad_norm": 2.4345467863895465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450770 + }, + { + "epoch": 2.18620153666524, + "grad_norm": 1.9644350857106474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450780 + }, + { + "epoch": 2.1862500348580762, + "grad_norm": 2.5235820544367016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450790 + }, + { + "epoch": 2.186298533050912, + "grad_norm": 2.679310284747771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450800 + }, + { + "epoch": 2.186347031243748, + "grad_norm": 2.521134376820555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450810 + }, + { + "epoch": 2.1863955294365844, + "grad_norm": 2.6309712097827287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450820 + }, + { + "epoch": 2.1864440276294204, + "grad_norm": 2.0417662938143621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450830 + }, + { + "epoch": 2.1864925258222567, + "grad_norm": 2.5217738652827393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450840 + }, + { + "epoch": 2.1865410240150926, + "grad_norm": 2.4713142465770943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450850 + }, + { + "epoch": 2.1865895222079286, + "grad_norm": 2.4307834678438667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450860 + }, + { + "epoch": 2.186638020400765, + "grad_norm": 2.5495842237432953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450870 + }, + { + "epoch": 2.186686518593601, + "grad_norm": 1.9568382469969947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450880 + }, + { + "epoch": 2.186735016786437, + "grad_norm": 2.391021780567826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450890 + }, + { + "epoch": 2.186783514979273, + "grad_norm": 2.4619123450975167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450900 + }, + { + "epoch": 2.186832013172109, + "grad_norm": 2.429480616683577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450910 + }, + { + "epoch": 2.1868805113649454, + "grad_norm": 2.490530732757179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450920 + }, + { + "epoch": 2.1869290095577814, + "grad_norm": 2.086021595459897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450930 + }, + { + "epoch": 2.1869775077506173, + "grad_norm": 2.3301544160858612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450940 + }, + { + "epoch": 2.1870260059434536, + "grad_norm": 2.5512363777124847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450950 + }, + { + "epoch": 2.1870745041362896, + "grad_norm": 2.41687473589991e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450960 + }, + { + "epoch": 2.187123002329126, + "grad_norm": 2.507757130842947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450970 + }, + { + "epoch": 2.187171500521962, + "grad_norm": 1.8657712530512072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450980 + }, + { + "epoch": 2.1872199987147978, + "grad_norm": 2.488025643287983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 450990 + }, + { + "epoch": 2.187268496907634, + "grad_norm": 2.4527074060642917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451000 + }, + { + "epoch": 2.18731699510047, + "grad_norm": 2.3781994684668462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451010 + }, + { + "epoch": 2.187365493293306, + "grad_norm": 2.33768957969005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451020 + }, + { + "epoch": 2.1874139914861424, + "grad_norm": 1.9431273301506735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451030 + }, + { + "epoch": 2.1874624896789783, + "grad_norm": 2.502079325950035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451040 + }, + { + "epoch": 2.187510987871814, + "grad_norm": 2.3761262468724453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451050 + }, + { + "epoch": 2.1875594860646506, + "grad_norm": 2.3741014842926234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451060 + }, + { + "epoch": 2.1876079842574865, + "grad_norm": 2.386215953720239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451070 + }, + { + "epoch": 2.187656482450323, + "grad_norm": 1.8942321844406251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451080 + }, + { + "epoch": 2.1877049806431588, + "grad_norm": 2.510165302282985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451090 + }, + { + "epoch": 2.1877534788359947, + "grad_norm": 2.3644273028367024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451100 + }, + { + "epoch": 2.187801977028831, + "grad_norm": 2.3203675425520487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451110 + }, + { + "epoch": 2.187850475221667, + "grad_norm": 2.3040529129048082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451120 + }, + { + "epoch": 2.1878989734145033, + "grad_norm": 1.8614134944527905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451130 + }, + { + "epoch": 2.1879474716073393, + "grad_norm": 2.4908490559027996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451140 + }, + { + "epoch": 2.187995969800175, + "grad_norm": 2.2815760303274146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451150 + }, + { + "epoch": 2.1880444679930116, + "grad_norm": 2.2550827338818635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451160 + }, + { + "epoch": 2.1880929661858475, + "grad_norm": 2.2966702317717136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451170 + }, + { + "epoch": 2.1881414643786834, + "grad_norm": 1.8520476885441894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451180 + }, + { + "epoch": 2.1881899625715198, + "grad_norm": 2.2533423305048927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451190 + }, + { + "epoch": 2.1882384607643557, + "grad_norm": 2.2226596740893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451200 + }, + { + "epoch": 2.188286958957192, + "grad_norm": 2.2611101258007693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451210 + }, + { + "epoch": 2.188335457150028, + "grad_norm": 2.294285081916314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451220 + }, + { + "epoch": 2.188383955342864, + "grad_norm": 1.849845006063333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451230 + }, + { + "epoch": 2.1884324535357003, + "grad_norm": 2.2899314444657648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451240 + }, + { + "epoch": 2.188480951728536, + "grad_norm": 2.405885481948644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451250 + }, + { + "epoch": 2.188529449921372, + "grad_norm": 2.1942757655324385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451260 + }, + { + "epoch": 2.1885779481142085, + "grad_norm": 2.2224456586172892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451270 + }, + { + "epoch": 2.1886264463070444, + "grad_norm": 1.8253594191719458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451280 + }, + { + "epoch": 2.1886749444998808, + "grad_norm": 2.241508809674997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451290 + }, + { + "epoch": 2.1887234426927167, + "grad_norm": 2.2123620624370233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451300 + }, + { + "epoch": 2.1887719408855526, + "grad_norm": 2.2054146597838553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451310 + }, + { + "epoch": 2.188820439078389, + "grad_norm": 2.1976774178256164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451320 + }, + { + "epoch": 2.188868937271225, + "grad_norm": 1.7754743453224364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451330 + }, + { + "epoch": 2.1889174354640613, + "grad_norm": 2.3068669463555125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451340 + }, + { + "epoch": 2.188965933656897, + "grad_norm": 2.0812113632473483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451350 + }, + { + "epoch": 2.189014431849733, + "grad_norm": 2.161269350153816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451360 + }, + { + "epoch": 2.1890629300425695, + "grad_norm": 2.200749662506496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451370 + }, + { + "epoch": 2.1891114282354054, + "grad_norm": 2.0165842329333827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451380 + }, + { + "epoch": 2.1891599264282413, + "grad_norm": 2.0442763570827083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451390 + }, + { + "epoch": 2.1892084246210777, + "grad_norm": 2.17570914173848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451400 + }, + { + "epoch": 2.1892569228139136, + "grad_norm": 2.1233657321317878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451410 + }, + { + "epoch": 2.1893054210067495, + "grad_norm": 2.168041106642704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451420 + }, + { + "epoch": 2.189353919199586, + "grad_norm": 1.7252739326067967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451430 + }, + { + "epoch": 2.189402417392422, + "grad_norm": 2.1120605708802032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451440 + }, + { + "epoch": 2.189450915585258, + "grad_norm": 2.115898638521685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451450 + }, + { + "epoch": 2.189499413778094, + "grad_norm": 2.03539840981648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451460 + }, + { + "epoch": 2.18954791197093, + "grad_norm": 2.1133557481789467e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451470 + }, + { + "epoch": 2.1895964101637664, + "grad_norm": 1.607846229489951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451480 + }, + { + "epoch": 2.1896449083566023, + "grad_norm": 2.0527149047211424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451490 + }, + { + "epoch": 2.1896934065494387, + "grad_norm": 2.110576957647936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451500 + }, + { + "epoch": 2.1897419047422746, + "grad_norm": 2.193000199213202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451510 + }, + { + "epoch": 2.1897904029351105, + "grad_norm": 2.0373187226141454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451520 + }, + { + "epoch": 2.189838901127947, + "grad_norm": 1.5134541797579004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451530 + }, + { + "epoch": 2.189887399320783, + "grad_norm": 2.0593363103671436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451540 + }, + { + "epoch": 2.1899358975136187, + "grad_norm": 2.0234168118804519e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451550 + }, + { + "epoch": 2.189984395706455, + "grad_norm": 1.9651655236430088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451560 + }, + { + "epoch": 2.190032893899291, + "grad_norm": 1.931754098905003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451570 + }, + { + "epoch": 2.190081392092127, + "grad_norm": 1.4267008907609124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451580 + }, + { + "epoch": 2.1901298902849633, + "grad_norm": 1.9325838707118237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451590 + }, + { + "epoch": 2.190178388477799, + "grad_norm": 1.9490978786507185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451600 + }, + { + "epoch": 2.1902268866706356, + "grad_norm": 2.0478758244735218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451610 + }, + { + "epoch": 2.1902753848634715, + "grad_norm": 1.89971174791026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451620 + }, + { + "epoch": 2.1903238830563074, + "grad_norm": 1.3783819952095655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451630 + }, + { + "epoch": 2.190372381249144, + "grad_norm": 2.0444838355615502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451640 + }, + { + "epoch": 2.1904208794419797, + "grad_norm": 1.892507413003841e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451650 + }, + { + "epoch": 2.190469377634816, + "grad_norm": 1.83441869694434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451660 + }, + { + "epoch": 2.190517875827652, + "grad_norm": 1.92408393218102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451670 + }, + { + "epoch": 2.190566374020488, + "grad_norm": 1.3410983967787615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451680 + }, + { + "epoch": 2.1906148722133243, + "grad_norm": 1.9036153275919787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451690 + }, + { + "epoch": 2.19066337040616, + "grad_norm": 1.845386918830627e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451700 + }, + { + "epoch": 2.190711868598996, + "grad_norm": 1.8690015224365197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451710 + }, + { + "epoch": 2.1907603667918325, + "grad_norm": 1.8639411791809835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451720 + }, + { + "epoch": 2.1908088649846684, + "grad_norm": 1.3155873546111252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451730 + }, + { + "epoch": 2.190857363177505, + "grad_norm": 1.753401477344596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451740 + }, + { + "epoch": 2.1909058613703407, + "grad_norm": 1.721994777881264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451750 + }, + { + "epoch": 2.1909543595631766, + "grad_norm": 1.8675437729598343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451760 + }, + { + "epoch": 2.191002857756013, + "grad_norm": 1.708767456420901e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451770 + }, + { + "epoch": 2.191051355948849, + "grad_norm": 1.1919567555196409e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451780 + }, + { + "epoch": 2.191099854141685, + "grad_norm": 1.763429082757284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451790 + }, + { + "epoch": 2.191148352334521, + "grad_norm": 1.799073885422331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451800 + }, + { + "epoch": 2.191196850527357, + "grad_norm": 1.6425235571659869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451810 + }, + { + "epoch": 2.1912453487201935, + "grad_norm": 1.6088854692952737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451820 + }, + { + "epoch": 2.1912938469130294, + "grad_norm": 1.2225001455590245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451830 + }, + { + "epoch": 2.1913423451058653, + "grad_norm": 1.6368007038636279e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451840 + }, + { + "epoch": 2.1913908432987017, + "grad_norm": 1.5799371055891243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451850 + }, + { + "epoch": 2.1914393414915376, + "grad_norm": 1.6579873829414282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451860 + }, + { + "epoch": 2.191487839684374, + "grad_norm": 1.620770007093597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451870 + }, + { + "epoch": 2.19153633787721, + "grad_norm": 1.2135085114550748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451880 + }, + { + "epoch": 2.191584836070046, + "grad_norm": 1.634143274031885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451890 + }, + { + "epoch": 2.191633334262882, + "grad_norm": 1.5915232154384285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451900 + }, + { + "epoch": 2.191681832455718, + "grad_norm": 1.5137592868086358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451910 + }, + { + "epoch": 2.191730330648554, + "grad_norm": 1.6235810562648112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451920 + }, + { + "epoch": 2.1917788288413904, + "grad_norm": 1.0510683523534681e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451930 + }, + { + "epoch": 2.1918273270342263, + "grad_norm": 1.5501947814300365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451940 + }, + { + "epoch": 2.1918758252270623, + "grad_norm": 1.480175768620029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451950 + }, + { + "epoch": 2.1919243234198986, + "grad_norm": 1.4916372492734808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451960 + }, + { + "epoch": 2.1919728216127345, + "grad_norm": 1.635714568237745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451970 + }, + { + "epoch": 2.192021319805571, + "grad_norm": 1.1140674160969866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451980 + }, + { + "epoch": 2.192069817998407, + "grad_norm": 1.5501571226650412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 451990 + }, + { + "epoch": 2.1921183161912428, + "grad_norm": 1.5150492060911347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452000 + }, + { + "epoch": 2.192166814384079, + "grad_norm": 1.4493129185666476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452010 + }, + { + "epoch": 2.192215312576915, + "grad_norm": 1.5001566566752444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452020 + }, + { + "epoch": 2.1922638107697514, + "grad_norm": 1.0627660884665602e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452030 + }, + { + "epoch": 2.1923123089625873, + "grad_norm": 1.546029722021558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452040 + }, + { + "epoch": 2.1923608071554233, + "grad_norm": 1.4778159140860225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452050 + }, + { + "epoch": 2.1924093053482596, + "grad_norm": 1.5237745287777216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452060 + }, + { + "epoch": 2.1924578035410955, + "grad_norm": 1.4226837663500191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452070 + }, + { + "epoch": 2.1925063017339315, + "grad_norm": 1.0489708301975043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452080 + }, + { + "epoch": 2.192554799926768, + "grad_norm": 1.4646383306171629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452090 + }, + { + "epoch": 2.1926032981196037, + "grad_norm": 1.5404604880586703e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452100 + }, + { + "epoch": 2.19265179631244, + "grad_norm": 1.39172300350765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452110 + }, + { + "epoch": 2.192700294505276, + "grad_norm": 1.4548197668773355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452120 + }, + { + "epoch": 2.192748792698112, + "grad_norm": 1.0030922936721254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452130 + }, + { + "epoch": 2.1927972908909483, + "grad_norm": 1.4527989833368338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452140 + }, + { + "epoch": 2.1928457890837842, + "grad_norm": 1.4320328034500562e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452150 + }, + { + "epoch": 2.19289428727662, + "grad_norm": 1.3535769483041804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452160 + }, + { + "epoch": 2.1929427854694565, + "grad_norm": 1.441394630319337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452170 + }, + { + "epoch": 2.1929912836622925, + "grad_norm": 9.610877071963841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452180 + }, + { + "epoch": 2.193039781855129, + "grad_norm": 1.335754689080204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452190 + }, + { + "epoch": 2.1930882800479647, + "grad_norm": 1.3419408162462787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452200 + }, + { + "epoch": 2.1931367782408007, + "grad_norm": 1.3880715243885788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452210 + }, + { + "epoch": 2.193185276433637, + "grad_norm": 1.43481642567167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452220 + }, + { + "epoch": 2.193233774626473, + "grad_norm": 9.840792358772887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452230 + }, + { + "epoch": 2.193282272819309, + "grad_norm": 1.3399773024502792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452240 + }, + { + "epoch": 2.1933307710121452, + "grad_norm": 1.1843593483717996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452250 + }, + { + "epoch": 2.193379269204981, + "grad_norm": 1.356963252874266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452260 + }, + { + "epoch": 2.1934277673978175, + "grad_norm": 1.3408377697032847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452270 + }, + { + "epoch": 2.1934762655906535, + "grad_norm": 9.498390340922924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452280 + }, + { + "epoch": 2.1935247637834894, + "grad_norm": 1.2837931251397094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452290 + }, + { + "epoch": 2.1935732619763257, + "grad_norm": 1.3318145875018672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452300 + }, + { + "epoch": 2.1936217601691617, + "grad_norm": 1.3260677178550395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452310 + }, + { + "epoch": 2.1936702583619976, + "grad_norm": 1.264881603901813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452320 + }, + { + "epoch": 2.193718756554834, + "grad_norm": 9.423423819043819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452330 + }, + { + "epoch": 2.19376725474767, + "grad_norm": 1.2532787252439448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452340 + }, + { + "epoch": 2.1938157529405062, + "grad_norm": 1.3105517382427934e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452350 + }, + { + "epoch": 2.193864251133342, + "grad_norm": 1.214307872032805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452360 + }, + { + "epoch": 2.193912749326178, + "grad_norm": 1.2407980420903186e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452370 + }, + { + "epoch": 2.1939612475190144, + "grad_norm": 9.389042787688595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452380 + }, + { + "epoch": 2.1940097457118504, + "grad_norm": 1.246008878297289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452390 + }, + { + "epoch": 2.1940582439046867, + "grad_norm": 1.2701636364909064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452400 + }, + { + "epoch": 2.1941067420975227, + "grad_norm": 1.266160438717634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452410 + }, + { + "epoch": 2.1941552402903586, + "grad_norm": 1.2092566237242863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452420 + }, + { + "epoch": 2.194203738483195, + "grad_norm": 9.367082043354458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452430 + }, + { + "epoch": 2.194252236676031, + "grad_norm": 1.1844883829326136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452440 + }, + { + "epoch": 2.194300734868867, + "grad_norm": 1.2302244556394726e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452450 + }, + { + "epoch": 2.194349233061703, + "grad_norm": 1.2156600348589564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452460 + }, + { + "epoch": 2.194397731254539, + "grad_norm": 1.2656396108923218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452470 + }, + { + "epoch": 2.194446229447375, + "grad_norm": 9.110535614809123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452480 + }, + { + "epoch": 2.1944947276402114, + "grad_norm": 1.1822707790543063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452490 + }, + { + "epoch": 2.1945432258330473, + "grad_norm": 1.1319263393261281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452500 + }, + { + "epoch": 2.1945917240258837, + "grad_norm": 1.2179897623809666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452510 + }, + { + "epoch": 2.1946402222187196, + "grad_norm": 1.1698420365746642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452520 + }, + { + "epoch": 2.1946887204115555, + "grad_norm": 9.194747008223203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452530 + }, + { + "epoch": 2.194737218604392, + "grad_norm": 1.3029166723299568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452540 + }, + { + "epoch": 2.194785716797228, + "grad_norm": 1.220559937564758e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452550 + }, + { + "epoch": 2.194834214990064, + "grad_norm": 1.1710758229810381e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452560 + }, + { + "epoch": 2.1948827131829, + "grad_norm": 1.1849003556108073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452570 + }, + { + "epoch": 2.194931211375736, + "grad_norm": 9.018388880122075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452580 + }, + { + "epoch": 2.1949797095685724, + "grad_norm": 1.2267855709069408e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452590 + }, + { + "epoch": 2.1950282077614083, + "grad_norm": 1.1964152690779883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452600 + }, + { + "epoch": 2.195076705954244, + "grad_norm": 1.1081541373414439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452610 + }, + { + "epoch": 2.1951252041470806, + "grad_norm": 1.2109403257909435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452620 + }, + { + "epoch": 2.1951737023399165, + "grad_norm": 8.830906494949886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452630 + }, + { + "epoch": 2.195222200532753, + "grad_norm": 1.1446499570411106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452640 + }, + { + "epoch": 2.1952706987255888, + "grad_norm": 1.163343483767676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452650 + }, + { + "epoch": 2.1953191969184247, + "grad_norm": 1.1208211958546599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452660 + }, + { + "epoch": 2.195367695111261, + "grad_norm": 1.1257631626904185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452670 + }, + { + "epoch": 2.195416193304097, + "grad_norm": 8.910556914543122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452680 + }, + { + "epoch": 2.195464691496933, + "grad_norm": 1.1352221918059513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452690 + }, + { + "epoch": 2.1955131896897693, + "grad_norm": 1.195139844867299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452700 + }, + { + "epoch": 2.195561687882605, + "grad_norm": 1.1197482052693886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452710 + }, + { + "epoch": 2.1956101860754416, + "grad_norm": 1.1814868372539422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452720 + }, + { + "epoch": 2.1956586842682775, + "grad_norm": 8.823352004583285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452730 + }, + { + "epoch": 2.1957071824611134, + "grad_norm": 1.1499590613084365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452740 + }, + { + "epoch": 2.1957556806539498, + "grad_norm": 1.170402654793179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452750 + }, + { + "epoch": 2.1958041788467857, + "grad_norm": 1.149275732359456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452760 + }, + { + "epoch": 2.1958526770396216, + "grad_norm": 1.0119163107447093e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452770 + }, + { + "epoch": 2.195901175232458, + "grad_norm": 8.527977257699604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452780 + }, + { + "epoch": 2.195949673425294, + "grad_norm": 1.0857434062927496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452790 + }, + { + "epoch": 2.1959981716181303, + "grad_norm": 1.1138004651911615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452800 + }, + { + "epoch": 2.196046669810966, + "grad_norm": 1.1051535864226025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452810 + }, + { + "epoch": 2.196095168003802, + "grad_norm": 1.1365100505145165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452820 + }, + { + "epoch": 2.1961436661966385, + "grad_norm": 8.931427686320603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452830 + }, + { + "epoch": 2.1961921643894744, + "grad_norm": 1.1952741374443576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452840 + }, + { + "epoch": 2.1962406625823103, + "grad_norm": 1.1398728361200483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452850 + }, + { + "epoch": 2.1962891607751467, + "grad_norm": 1.1122245524575192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452860 + }, + { + "epoch": 2.1963376589679826, + "grad_norm": 1.0938090611034568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452870 + }, + { + "epoch": 2.196386157160819, + "grad_norm": 8.829275088828581e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452880 + }, + { + "epoch": 2.196434655353655, + "grad_norm": 1.1321016302190401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452890 + }, + { + "epoch": 2.196483153546491, + "grad_norm": 1.0962421725935201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452900 + }, + { + "epoch": 2.196531651739327, + "grad_norm": 1.107495677388215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452910 + }, + { + "epoch": 2.196580149932163, + "grad_norm": 1.0781311488017309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452920 + }, + { + "epoch": 2.1966286481249995, + "grad_norm": 8.433470810587096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452930 + }, + { + "epoch": 2.1966771463178354, + "grad_norm": 1.0669553773823282e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452940 + }, + { + "epoch": 2.1967256445106713, + "grad_norm": 1.106463685118797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452950 + }, + { + "epoch": 2.1967741427035077, + "grad_norm": 1.0606792955059063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452960 + }, + { + "epoch": 2.1968226408963436, + "grad_norm": 1.0619061896477433e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452970 + }, + { + "epoch": 2.1968711390891795, + "grad_norm": 9.044202897712239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452980 + }, + { + "epoch": 2.196919637282016, + "grad_norm": 1.0419859819421617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 452990 + }, + { + "epoch": 2.196968135474852, + "grad_norm": 1.051966123100101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453000 + }, + { + "epoch": 2.1970166336676877, + "grad_norm": 1.0914357773117445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453010 + }, + { + "epoch": 2.197065131860524, + "grad_norm": 1.1670024946397461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453020 + }, + { + "epoch": 2.19711363005336, + "grad_norm": 8.297750753172295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453030 + }, + { + "epoch": 2.1971621282461964, + "grad_norm": 1.1210654093929406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453040 + }, + { + "epoch": 2.1972106264390323, + "grad_norm": 1.0415246975981063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453050 + }, + { + "epoch": 2.1972591246318682, + "grad_norm": 1.0422954943578588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453060 + }, + { + "epoch": 2.1973076228247046, + "grad_norm": 1.0591523391667579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453070 + }, + { + "epoch": 2.1973561210175405, + "grad_norm": 8.446215815638425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453080 + }, + { + "epoch": 2.197404619210377, + "grad_norm": 1.0780542680777216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453090 + }, + { + "epoch": 2.197453117403213, + "grad_norm": 1.0514905568470567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453100 + }, + { + "epoch": 2.1975016155960487, + "grad_norm": 1.048725692953667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453110 + }, + { + "epoch": 2.197550113788885, + "grad_norm": 1.0843417186379156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453120 + }, + { + "epoch": 2.197598611981721, + "grad_norm": 8.546999197278637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453130 + }, + { + "epoch": 2.197647110174557, + "grad_norm": 1.0967168861952814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453140 + }, + { + "epoch": 2.1976956083673933, + "grad_norm": 1.053625879876563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453150 + }, + { + "epoch": 2.1977441065602292, + "grad_norm": 1.0593447541396017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453160 + }, + { + "epoch": 2.1977926047530656, + "grad_norm": 1.0632651736841581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453170 + }, + { + "epoch": 2.1978411029459015, + "grad_norm": 8.570255261020066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453180 + }, + { + "epoch": 2.1978896011387374, + "grad_norm": 1.0707796604947362e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453190 + }, + { + "epoch": 2.197938099331574, + "grad_norm": 1.0649687709474165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453200 + }, + { + "epoch": 2.1979865975244097, + "grad_norm": 1.083324576711675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453210 + }, + { + "epoch": 2.1980350957172456, + "grad_norm": 1.0361363678157431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453220 + }, + { + "epoch": 2.198083593910082, + "grad_norm": 8.483876712261917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453230 + }, + { + "epoch": 2.198132092102918, + "grad_norm": 1.080878178072453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453240 + }, + { + "epoch": 2.1981805902957543, + "grad_norm": 1.0052918497649443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453250 + }, + { + "epoch": 2.19822908848859, + "grad_norm": 1.0287887874937951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453260 + }, + { + "epoch": 2.198277586681426, + "grad_norm": 1.0290771967902401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453270 + }, + { + "epoch": 2.1983260848742625, + "grad_norm": 8.392994743644522e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453280 + }, + { + "epoch": 2.1983745830670984, + "grad_norm": 1.0670553507452496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453290 + }, + { + "epoch": 2.1984230812599344, + "grad_norm": 1.021200901618613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453300 + }, + { + "epoch": 2.1984715794527707, + "grad_norm": 9.794010935593178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453310 + }, + { + "epoch": 2.1985200776456066, + "grad_norm": 9.981890514154657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453320 + }, + { + "epoch": 2.198568575838443, + "grad_norm": 8.130025008767916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453330 + }, + { + "epoch": 2.198617074031279, + "grad_norm": 1.0731201882663299e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453340 + }, + { + "epoch": 2.198665572224115, + "grad_norm": 1.0371263670094777e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453350 + }, + { + "epoch": 2.198714070416951, + "grad_norm": 1.0213191359298435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453360 + }, + { + "epoch": 2.198762568609787, + "grad_norm": 9.998548478051816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453370 + }, + { + "epoch": 2.198811066802623, + "grad_norm": 8.141158502894541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453380 + }, + { + "epoch": 2.1988595649954594, + "grad_norm": 1.1014343925808134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453390 + }, + { + "epoch": 2.1989080631882953, + "grad_norm": 1.0165631891823068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453400 + }, + { + "epoch": 2.1989565613811317, + "grad_norm": 1.0356970392422227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453410 + }, + { + "epoch": 2.1990050595739676, + "grad_norm": 1.0065760136512836e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453420 + }, + { + "epoch": 2.1990535577668036, + "grad_norm": 8.060234080176087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453430 + }, + { + "epoch": 2.19910205595964, + "grad_norm": 1.0201013367350242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453440 + }, + { + "epoch": 2.199150554152476, + "grad_norm": 1.0026062824408655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453450 + }, + { + "epoch": 2.199199052345312, + "grad_norm": 9.983900639554122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453460 + }, + { + "epoch": 2.199247550538148, + "grad_norm": 1.0187399368533079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453470 + }, + { + "epoch": 2.199296048730984, + "grad_norm": 8.077377344761771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453480 + }, + { + "epoch": 2.1993445469238204, + "grad_norm": 1.0735384137205983e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453490 + }, + { + "epoch": 2.1993930451166563, + "grad_norm": 9.835322600793006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453500 + }, + { + "epoch": 2.1994415433094923, + "grad_norm": 1.0075393674924271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453510 + }, + { + "epoch": 2.1994900415023286, + "grad_norm": 1.0000587025160712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453520 + }, + { + "epoch": 2.1995385396951646, + "grad_norm": 7.95068686443301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453530 + }, + { + "epoch": 2.1995870378880005, + "grad_norm": 1.0002069927850243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453540 + }, + { + "epoch": 2.199635536080837, + "grad_norm": 9.855764915300824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453550 + }, + { + "epoch": 2.1996840342736728, + "grad_norm": 9.823826729871143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453560 + }, + { + "epoch": 2.199732532466509, + "grad_norm": 1.0311292442111153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453570 + }, + { + "epoch": 2.199781030659345, + "grad_norm": 7.936472457004129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453580 + }, + { + "epoch": 2.199829528852181, + "grad_norm": 9.739702022670826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453590 + }, + { + "epoch": 2.1998780270450173, + "grad_norm": 9.70098099628558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453600 + }, + { + "epoch": 2.1999265252378533, + "grad_norm": 9.584825022557197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453610 + }, + { + "epoch": 2.1999750234306896, + "grad_norm": 9.65561568477824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453620 + }, + { + "epoch": 2.2000235216235255, + "grad_norm": 8.447031518699077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453630 + }, + { + "epoch": 2.2000720198163615, + "grad_norm": 9.688795188367294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453640 + }, + { + "epoch": 2.200120518009198, + "grad_norm": 9.483986929126331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453650 + }, + { + "epoch": 2.2001690162020338, + "grad_norm": 9.756374197422701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453660 + }, + { + "epoch": 2.2002175143948697, + "grad_norm": 1.0423777752066599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453670 + }, + { + "epoch": 2.200266012587706, + "grad_norm": 7.911081922884478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453680 + }, + { + "epoch": 2.200314510780542, + "grad_norm": 1.007897694194071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453690 + }, + { + "epoch": 2.2003630089733783, + "grad_norm": 1.0336864164628423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453700 + }, + { + "epoch": 2.2004115071662143, + "grad_norm": 9.490359076380628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453710 + }, + { + "epoch": 2.20046000535905, + "grad_norm": 9.44064808550138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453720 + }, + { + "epoch": 2.2005085035518865, + "grad_norm": 7.839996385428094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453730 + }, + { + "epoch": 2.2005570017447225, + "grad_norm": 9.533914635539986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453740 + }, + { + "epoch": 2.2006054999375584, + "grad_norm": 1.0564881591790254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453750 + }, + { + "epoch": 2.2006539981303948, + "grad_norm": 9.506661768909908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453760 + }, + { + "epoch": 2.2007024963232307, + "grad_norm": 9.599502703849794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453770 + }, + { + "epoch": 2.200750994516067, + "grad_norm": 7.85320608542861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453780 + }, + { + "epoch": 2.200799492708903, + "grad_norm": 9.900109887439612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453790 + }, + { + "epoch": 2.200847990901739, + "grad_norm": 9.557060565157371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453800 + }, + { + "epoch": 2.2008964890945752, + "grad_norm": 9.818732138455744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453810 + }, + { + "epoch": 2.200944987287411, + "grad_norm": 9.591845184786507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453820 + }, + { + "epoch": 2.200993485480247, + "grad_norm": 7.79919346882707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453830 + }, + { + "epoch": 2.2010419836730835, + "grad_norm": 9.864582750651607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453840 + }, + { + "epoch": 2.2010904818659194, + "grad_norm": 9.588999461129788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453850 + }, + { + "epoch": 2.2011389800587557, + "grad_norm": 9.463884964588942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453860 + }, + { + "epoch": 2.2011874782515917, + "grad_norm": 9.694486635680732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453870 + }, + { + "epoch": 2.2012359764444276, + "grad_norm": 7.880397845383413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453880 + }, + { + "epoch": 2.201284474637264, + "grad_norm": 9.409749424094116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453890 + }, + { + "epoch": 2.2013329728301, + "grad_norm": 9.478544882313145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453900 + }, + { + "epoch": 2.201381471022936, + "grad_norm": 9.645406606750839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453910 + }, + { + "epoch": 2.201429969215772, + "grad_norm": 9.630485919842613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453920 + }, + { + "epoch": 2.201478467408608, + "grad_norm": 7.728753814717493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453930 + }, + { + "epoch": 2.2015269656014445, + "grad_norm": 9.426768343701042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453940 + }, + { + "epoch": 2.2015754637942804, + "grad_norm": 9.709134474178427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453950 + }, + { + "epoch": 2.2016239619871163, + "grad_norm": 9.155727553888937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453960 + }, + { + "epoch": 2.2016724601799527, + "grad_norm": 9.611836304657118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453970 + }, + { + "epoch": 2.2017209583727886, + "grad_norm": 7.768544918462794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453980 + }, + { + "epoch": 2.201769456565625, + "grad_norm": 9.68517781529954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 453990 + }, + { + "epoch": 2.201817954758461, + "grad_norm": 9.453207638898675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454000 + }, + { + "epoch": 2.201866452951297, + "grad_norm": 9.3843596005172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454010 + }, + { + "epoch": 2.201914951144133, + "grad_norm": 9.220327967796038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454020 + }, + { + "epoch": 2.201963449336969, + "grad_norm": 7.353749964522649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454030 + }, + { + "epoch": 2.202011947529805, + "grad_norm": 9.341635376358681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454040 + }, + { + "epoch": 2.2020604457226414, + "grad_norm": 9.196869399374918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454050 + }, + { + "epoch": 2.2021089439154773, + "grad_norm": 9.064392969548862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454060 + }, + { + "epoch": 2.202157442108313, + "grad_norm": 9.342920748167671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454070 + }, + { + "epoch": 2.2022059403011496, + "grad_norm": 7.49117603504601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454080 + }, + { + "epoch": 2.2022544384939855, + "grad_norm": 9.319602867208232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454090 + }, + { + "epoch": 2.202302936686822, + "grad_norm": 8.708178711458459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454100 + }, + { + "epoch": 2.202351434879658, + "grad_norm": 9.554469215800054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454110 + }, + { + "epoch": 2.2023999330724937, + "grad_norm": 9.03645371863604e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454120 + }, + { + "epoch": 2.20244843126533, + "grad_norm": 7.779529909157645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454130 + }, + { + "epoch": 2.202496929458166, + "grad_norm": 9.001232115224411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454140 + }, + { + "epoch": 2.2025454276510024, + "grad_norm": 9.177673376825624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454150 + }, + { + "epoch": 2.2025939258438383, + "grad_norm": 9.18534937000004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454160 + }, + { + "epoch": 2.202642424036674, + "grad_norm": 8.739052503869971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454170 + }, + { + "epoch": 2.2026909222295106, + "grad_norm": 7.443086502689766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454180 + }, + { + "epoch": 2.2027394204223465, + "grad_norm": 9.245002274838043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454190 + }, + { + "epoch": 2.2027879186151824, + "grad_norm": 9.036939729867299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454200 + }, + { + "epoch": 2.202836416808019, + "grad_norm": 8.874822299276275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454210 + }, + { + "epoch": 2.2028849150008547, + "grad_norm": 9.541869872009556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454220 + }, + { + "epoch": 2.202933413193691, + "grad_norm": 7.62932614861711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454230 + }, + { + "epoch": 2.202981911386527, + "grad_norm": 9.169704640044074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454240 + }, + { + "epoch": 2.203030409579363, + "grad_norm": 9.412727308699687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454250 + }, + { + "epoch": 2.2030789077721993, + "grad_norm": 8.993842470772506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454260 + }, + { + "epoch": 2.203127405965035, + "grad_norm": 8.816105889764003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454270 + }, + { + "epoch": 2.203175904157871, + "grad_norm": 7.22981070566675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454280 + }, + { + "epoch": 2.2032244023507075, + "grad_norm": 8.728711975436454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454290 + }, + { + "epoch": 2.2032729005435434, + "grad_norm": 9.082955187977859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454300 + }, + { + "epoch": 2.2033213987363798, + "grad_norm": 9.008182644265617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454310 + }, + { + "epoch": 2.2033698969292157, + "grad_norm": 8.943865736910084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454320 + }, + { + "epoch": 2.2034183951220516, + "grad_norm": 7.506814370117354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454330 + }, + { + "epoch": 2.203466893314888, + "grad_norm": 8.757321978691834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454340 + }, + { + "epoch": 2.203515391507724, + "grad_norm": 8.990500077743491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454350 + }, + { + "epoch": 2.20356388970056, + "grad_norm": 8.501984893882764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454360 + }, + { + "epoch": 2.203612387893396, + "grad_norm": 8.961120556705282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454370 + }, + { + "epoch": 2.203660886086232, + "grad_norm": 7.307048122129345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454380 + }, + { + "epoch": 2.2037093842790685, + "grad_norm": 8.940335050056092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454390 + }, + { + "epoch": 2.2037578824719044, + "grad_norm": 8.706725651563829e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454400 + }, + { + "epoch": 2.2038063806647403, + "grad_norm": 8.799392503533454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454410 + }, + { + "epoch": 2.2038548788575767, + "grad_norm": 8.755996105946906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454420 + }, + { + "epoch": 2.2039033770504126, + "grad_norm": 7.098031318264475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454430 + }, + { + "epoch": 2.2039518752432485, + "grad_norm": 8.270808393717743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454440 + }, + { + "epoch": 2.204000373436085, + "grad_norm": 8.207189949871463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454450 + }, + { + "epoch": 2.204048871628921, + "grad_norm": 8.480977697900016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454460 + }, + { + "epoch": 2.204097369821757, + "grad_norm": 8.577038812518367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454470 + }, + { + "epoch": 2.204145868014593, + "grad_norm": 7.211424701836222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454480 + }, + { + "epoch": 2.204194366207429, + "grad_norm": 8.893568548273834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454490 + }, + { + "epoch": 2.2042428644002654, + "grad_norm": 8.175403820587235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454500 + }, + { + "epoch": 2.2042913625931013, + "grad_norm": 8.684804697622894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454510 + }, + { + "epoch": 2.2043398607859377, + "grad_norm": 8.215771174491238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454520 + }, + { + "epoch": 2.2043883589787736, + "grad_norm": 7.206428875861093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454530 + }, + { + "epoch": 2.2044368571716095, + "grad_norm": 8.45281107331175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454540 + }, + { + "epoch": 2.204485355364446, + "grad_norm": 8.50416412845334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454550 + }, + { + "epoch": 2.204533853557282, + "grad_norm": 8.886789970574682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454560 + }, + { + "epoch": 2.2045823517501177, + "grad_norm": 8.834904008381272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454570 + }, + { + "epoch": 2.204630849942954, + "grad_norm": 7.375127353270727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454580 + }, + { + "epoch": 2.20467934813579, + "grad_norm": 8.64566089830987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454590 + }, + { + "epoch": 2.204727846328626, + "grad_norm": 8.882472002369468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454600 + }, + { + "epoch": 2.2047763445214623, + "grad_norm": 8.06448028356499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454610 + }, + { + "epoch": 2.2048248427142982, + "grad_norm": 8.342762214397226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454620 + }, + { + "epoch": 2.2048733409071346, + "grad_norm": 7.221998998829804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454630 + }, + { + "epoch": 2.2049218390999705, + "grad_norm": 8.321457301008195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454640 + }, + { + "epoch": 2.2049703372928064, + "grad_norm": 8.159594244716573e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454650 + }, + { + "epoch": 2.205018835485643, + "grad_norm": 8.288569830483539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454660 + }, + { + "epoch": 2.2050673336784787, + "grad_norm": 8.30586941447109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454670 + }, + { + "epoch": 2.205115831871315, + "grad_norm": 7.143192704006651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454680 + }, + { + "epoch": 2.205164330064151, + "grad_norm": 8.244797555789773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454690 + }, + { + "epoch": 2.205212828256987, + "grad_norm": 8.584600408312326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454700 + }, + { + "epoch": 2.2052613264498233, + "grad_norm": 8.426199826772063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454710 + }, + { + "epoch": 2.2053098246426592, + "grad_norm": 8.174298216090392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454720 + }, + { + "epoch": 2.205358322835495, + "grad_norm": 7.090135767384709e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454730 + }, + { + "epoch": 2.2054068210283315, + "grad_norm": 8.200152734616495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454740 + }, + { + "epoch": 2.2054553192211674, + "grad_norm": 8.665476514124748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454750 + }, + { + "epoch": 2.205503817414004, + "grad_norm": 7.886738728757337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454760 + }, + { + "epoch": 2.2055523156068397, + "grad_norm": 8.271032925222244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454770 + }, + { + "epoch": 2.2056008137996757, + "grad_norm": 6.889436576784647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454780 + }, + { + "epoch": 2.205649311992512, + "grad_norm": 8.004941065564708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454790 + }, + { + "epoch": 2.205697810185348, + "grad_norm": 8.165367404444623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454800 + }, + { + "epoch": 2.205746308378184, + "grad_norm": 7.994301398639436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454810 + }, + { + "epoch": 2.2057948065710202, + "grad_norm": 8.289332242839009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454820 + }, + { + "epoch": 2.205843304763856, + "grad_norm": 6.85239811559768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454830 + }, + { + "epoch": 2.2058918029566925, + "grad_norm": 8.612548185737978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454840 + }, + { + "epoch": 2.2059403011495284, + "grad_norm": 8.226963643664931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454850 + }, + { + "epoch": 2.2059887993423644, + "grad_norm": 8.402737705637264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454860 + }, + { + "epoch": 2.2060372975352007, + "grad_norm": 7.732661089221438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454870 + }, + { + "epoch": 2.2060857957280366, + "grad_norm": 6.495518789506605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454880 + }, + { + "epoch": 2.2061342939208726, + "grad_norm": 8.1054061240593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454890 + }, + { + "epoch": 2.206182792113709, + "grad_norm": 7.986302819062985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454900 + }, + { + "epoch": 2.206231290306545, + "grad_norm": 7.842135829605468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454910 + }, + { + "epoch": 2.2062797884993812, + "grad_norm": 8.089230618679721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454920 + }, + { + "epoch": 2.206328286692217, + "grad_norm": 6.816937769826836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454930 + }, + { + "epoch": 2.206376784885053, + "grad_norm": 7.657663303461959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454940 + }, + { + "epoch": 2.2064252830778894, + "grad_norm": 7.983582861470495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454950 + }, + { + "epoch": 2.2064737812707254, + "grad_norm": 7.947357971715974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454960 + }, + { + "epoch": 2.2065222794635613, + "grad_norm": 7.886993813599474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454970 + }, + { + "epoch": 2.2065707776563976, + "grad_norm": 6.382418860084726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454980 + }, + { + "epoch": 2.2066192758492336, + "grad_norm": 7.651336630942751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 454990 + }, + { + "epoch": 2.20666777404207, + "grad_norm": 7.951091163249657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455000 + }, + { + "epoch": 2.206716272234906, + "grad_norm": 7.90360061841966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455010 + }, + { + "epoch": 2.2067647704277418, + "grad_norm": 8.224479586260713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455020 + }, + { + "epoch": 2.206813268620578, + "grad_norm": 6.62325447819967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455030 + }, + { + "epoch": 2.206861766813414, + "grad_norm": 7.68962493680192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455040 + }, + { + "epoch": 2.2069102650062504, + "grad_norm": 7.576258553854132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455050 + }, + { + "epoch": 2.2069587631990863, + "grad_norm": 7.590922024292013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455060 + }, + { + "epoch": 2.2070072613919223, + "grad_norm": 7.949862634859528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455070 + }, + { + "epoch": 2.2070557595847586, + "grad_norm": 6.280435371763815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455080 + }, + { + "epoch": 2.2071042577775946, + "grad_norm": 7.827653547565205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455090 + }, + { + "epoch": 2.2071527559704305, + "grad_norm": 7.447678029848248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455100 + }, + { + "epoch": 2.207201254163267, + "grad_norm": 8.269132223404085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455110 + }, + { + "epoch": 2.2072497523561028, + "grad_norm": 7.559292214409652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455120 + }, + { + "epoch": 2.2072982505489387, + "grad_norm": 6.31997281175245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455130 + }, + { + "epoch": 2.207346748741775, + "grad_norm": 7.592173290049686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455140 + }, + { + "epoch": 2.207395246934611, + "grad_norm": 7.786753997152118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455150 + }, + { + "epoch": 2.2074437451274473, + "grad_norm": 7.512350919114397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455160 + }, + { + "epoch": 2.2074922433202833, + "grad_norm": 7.71185995063206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455170 + }, + { + "epoch": 2.207540741513119, + "grad_norm": 6.155867282586769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455180 + }, + { + "epoch": 2.2075892397059556, + "grad_norm": 7.884100483579459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455190 + }, + { + "epoch": 2.2076377378987915, + "grad_norm": 7.6938547977079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455200 + }, + { + "epoch": 2.207686236091628, + "grad_norm": 7.57620526314895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455210 + }, + { + "epoch": 2.2077347342844638, + "grad_norm": 7.506829291514805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455220 + }, + { + "epoch": 2.2077832324772997, + "grad_norm": 5.990031581859512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455230 + }, + { + "epoch": 2.207831730670136, + "grad_norm": 7.411375690935529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455240 + }, + { + "epoch": 2.207880228862972, + "grad_norm": 7.16883334916929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455250 + }, + { + "epoch": 2.207928727055808, + "grad_norm": 7.560947068441237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455260 + }, + { + "epoch": 2.2079772252486443, + "grad_norm": 7.33962082222206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455270 + }, + { + "epoch": 2.20802572344148, + "grad_norm": 6.620813053359598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455280 + }, + { + "epoch": 2.2080742216343165, + "grad_norm": 7.345338559616721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455290 + }, + { + "epoch": 2.2081227198271525, + "grad_norm": 7.127287204866661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455300 + }, + { + "epoch": 2.2081712180199884, + "grad_norm": 7.429442661077701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455310 + }, + { + "epoch": 2.2082197162128248, + "grad_norm": 7.723200212694792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455320 + }, + { + "epoch": 2.2082682144056607, + "grad_norm": 5.8606211439382605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455330 + }, + { + "epoch": 2.2083167125984966, + "grad_norm": 6.994549295313846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455340 + }, + { + "epoch": 2.208365210791333, + "grad_norm": 7.542503510649112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455350 + }, + { + "epoch": 2.208413708984169, + "grad_norm": 7.656487355234276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455360 + }, + { + "epoch": 2.2084622071770053, + "grad_norm": 7.09877454596608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455370 + }, + { + "epoch": 2.208510705369841, + "grad_norm": 6.185588574680878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455380 + }, + { + "epoch": 2.208559203562677, + "grad_norm": 7.23937318980461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455390 + }, + { + "epoch": 2.2086077017555135, + "grad_norm": 6.921217021726989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455400 + }, + { + "epoch": 2.2086561999483494, + "grad_norm": 7.269009927313164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455410 + }, + { + "epoch": 2.2087046981411853, + "grad_norm": 7.15652603844319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455420 + }, + { + "epoch": 2.2087531963340217, + "grad_norm": 5.96388858298269e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455430 + }, + { + "epoch": 2.2088016945268576, + "grad_norm": 7.426501014151654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455440 + }, + { + "epoch": 2.208850192719694, + "grad_norm": 7.002712010262258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455450 + }, + { + "epoch": 2.20889869091253, + "grad_norm": 7.126133994006523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455460 + }, + { + "epoch": 2.208947189105366, + "grad_norm": 7.169149540686703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455470 + }, + { + "epoch": 2.208995687298202, + "grad_norm": 5.683774162434929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455480 + }, + { + "epoch": 2.209044185491038, + "grad_norm": 7.011577451976336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455490 + }, + { + "epoch": 2.209092683683874, + "grad_norm": 7.17426473784144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455500 + }, + { + "epoch": 2.2091411818767104, + "grad_norm": 7.344723229607553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455510 + }, + { + "epoch": 2.2091896800695463, + "grad_norm": 7.230719489825788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455520 + }, + { + "epoch": 2.2092381782623827, + "grad_norm": 5.724645291138586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455530 + }, + { + "epoch": 2.2092866764552186, + "grad_norm": 6.891959714039331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455540 + }, + { + "epoch": 2.2093351746480545, + "grad_norm": 6.990061507394785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455550 + }, + { + "epoch": 2.209383672840891, + "grad_norm": 6.920243578178997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455560 + }, + { + "epoch": 2.209432171033727, + "grad_norm": 6.991038503656455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455570 + }, + { + "epoch": 2.209480669226563, + "grad_norm": 5.650945666957341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455580 + }, + { + "epoch": 2.209529167419399, + "grad_norm": 7.088871001315056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455590 + }, + { + "epoch": 2.209577665612235, + "grad_norm": 6.655000106547959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455600 + }, + { + "epoch": 2.2096261638050714, + "grad_norm": 7.063722762268299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455610 + }, + { + "epoch": 2.2096746619979073, + "grad_norm": 7.320417694245407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455620 + }, + { + "epoch": 2.209723160190743, + "grad_norm": 6.079002901060448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455630 + }, + { + "epoch": 2.2097716583835796, + "grad_norm": 6.89002419562712e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455640 + }, + { + "epoch": 2.2098201565764155, + "grad_norm": 6.948609154733276e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455650 + }, + { + "epoch": 2.2098686547692514, + "grad_norm": 6.588513201677415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455660 + }, + { + "epoch": 2.209917152962088, + "grad_norm": 6.494998672224028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455670 + }, + { + "epoch": 2.2099656511549237, + "grad_norm": 5.450542062135355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455680 + }, + { + "epoch": 2.21001414934776, + "grad_norm": 6.576029676352846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455690 + }, + { + "epoch": 2.210062647540596, + "grad_norm": 6.838521215968285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455700 + }, + { + "epoch": 2.210111145733432, + "grad_norm": 6.842100219728309e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455710 + }, + { + "epoch": 2.2101596439262683, + "grad_norm": 6.642445526949814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455720 + }, + { + "epoch": 2.210208142119104, + "grad_norm": 5.5706784962694655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455730 + }, + { + "epoch": 2.2102566403119406, + "grad_norm": 6.852557987713226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455740 + }, + { + "epoch": 2.2103051385047765, + "grad_norm": 6.601992197374784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455750 + }, + { + "epoch": 2.2103536366976124, + "grad_norm": 6.609026570458809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455760 + }, + { + "epoch": 2.210402134890449, + "grad_norm": 6.697835175373257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455770 + }, + { + "epoch": 2.2104506330832847, + "grad_norm": 5.2769475900049656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455780 + }, + { + "epoch": 2.2104991312761206, + "grad_norm": 6.463564972136737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455790 + }, + { + "epoch": 2.210547629468957, + "grad_norm": 6.690228104844209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455800 + }, + { + "epoch": 2.210596127661793, + "grad_norm": 6.616206604803665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455810 + }, + { + "epoch": 2.2106446258546293, + "grad_norm": 6.573474564675053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455820 + }, + { + "epoch": 2.210693124047465, + "grad_norm": 5.207838427168099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455830 + }, + { + "epoch": 2.210741622240301, + "grad_norm": 6.802203245115379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455840 + }, + { + "epoch": 2.2107901204331375, + "grad_norm": 6.669637286904617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455850 + }, + { + "epoch": 2.2108386186259734, + "grad_norm": 6.214470715804055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455860 + }, + { + "epoch": 2.2108871168188093, + "grad_norm": 6.66797603798841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455870 + }, + { + "epoch": 2.2109356150116457, + "grad_norm": 5.117658474773634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455880 + }, + { + "epoch": 2.2109841132044816, + "grad_norm": 6.588985712596696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455890 + }, + { + "epoch": 2.211032611397318, + "grad_norm": 6.387600137713889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455900 + }, + { + "epoch": 2.211081109590154, + "grad_norm": 6.392749440919943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455910 + }, + { + "epoch": 2.21112960778299, + "grad_norm": 6.45106439378651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455920 + }, + { + "epoch": 2.211178105975826, + "grad_norm": 5.0446338661913614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455930 + }, + { + "epoch": 2.211226604168662, + "grad_norm": 6.493882409586149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455940 + }, + { + "epoch": 2.2112751023614985, + "grad_norm": 6.488740922350189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455950 + }, + { + "epoch": 2.2113236005543344, + "grad_norm": 6.409103292526197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455960 + }, + { + "epoch": 2.2113720987471703, + "grad_norm": 6.299557497868591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455970 + }, + { + "epoch": 2.2114205969400067, + "grad_norm": 5.0335177803617626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455980 + }, + { + "epoch": 2.2114690951328426, + "grad_norm": 6.586586209778034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 455990 + }, + { + "epoch": 2.2115175933256785, + "grad_norm": 6.028185595141622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456000 + }, + { + "epoch": 2.211566091518515, + "grad_norm": 6.481098324684353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456010 + }, + { + "epoch": 2.211614589711351, + "grad_norm": 6.553507603257458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456020 + }, + { + "epoch": 2.2116630879041868, + "grad_norm": 4.819413845780218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456030 + }, + { + "epoch": 2.211711586097023, + "grad_norm": 6.06664656288558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456040 + }, + { + "epoch": 2.211760084289859, + "grad_norm": 6.452468426232372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456050 + }, + { + "epoch": 2.2118085824826954, + "grad_norm": 6.037446809159519e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456060 + }, + { + "epoch": 2.2118570806755313, + "grad_norm": 6.178881051255303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456070 + }, + { + "epoch": 2.2119055788683673, + "grad_norm": 5.440460526529023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456080 + }, + { + "epoch": 2.2119540770612036, + "grad_norm": 6.106327532506839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456090 + }, + { + "epoch": 2.2120025752540395, + "grad_norm": 5.959242344033555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456100 + }, + { + "epoch": 2.212051073446876, + "grad_norm": 6.228248849993179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456110 + }, + { + "epoch": 2.212099571639712, + "grad_norm": 6.299378441099179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456120 + }, + { + "epoch": 2.2121480698325477, + "grad_norm": 4.861665203748089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456130 + }, + { + "epoch": 2.212196568025384, + "grad_norm": 6.047466882819208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456140 + }, + { + "epoch": 2.21224506621822, + "grad_norm": 6.450652278999769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456150 + }, + { + "epoch": 2.212293564411056, + "grad_norm": 6.406609287523679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456160 + }, + { + "epoch": 2.2123420626038923, + "grad_norm": 6.109129913056677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456170 + }, + { + "epoch": 2.2123905607967282, + "grad_norm": 4.9510433086652483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456180 + }, + { + "epoch": 2.212439058989564, + "grad_norm": 6.170029820395939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456190 + }, + { + "epoch": 2.2124875571824005, + "grad_norm": 6.019629239517599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456200 + }, + { + "epoch": 2.2125360553752365, + "grad_norm": 6.181713274600042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456210 + }, + { + "epoch": 2.212584553568073, + "grad_norm": 6.123298845750469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456220 + }, + { + "epoch": 2.2126330517609087, + "grad_norm": 4.664061492576366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456230 + }, + { + "epoch": 2.2126815499537447, + "grad_norm": 6.083158154979174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456240 + }, + { + "epoch": 2.212730048146581, + "grad_norm": 5.7722729707165854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456250 + }, + { + "epoch": 2.212778546339417, + "grad_norm": 5.935056535122385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456260 + }, + { + "epoch": 2.2128270445322533, + "grad_norm": 6.034476740524042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456270 + }, + { + "epoch": 2.2128755427250892, + "grad_norm": 4.5912095458788826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456280 + }, + { + "epoch": 2.212924040917925, + "grad_norm": 5.831552130075579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456290 + }, + { + "epoch": 2.2129725391107615, + "grad_norm": 5.908743361260349e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456300 + }, + { + "epoch": 2.2130210373035974, + "grad_norm": 5.631537547401422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456310 + }, + { + "epoch": 2.2130695354964334, + "grad_norm": 6.013016928818615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456320 + }, + { + "epoch": 2.2131180336892697, + "grad_norm": 5.12252924522727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456330 + }, + { + "epoch": 2.2131665318821057, + "grad_norm": 5.7555535448727824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456340 + }, + { + "epoch": 2.213215030074942, + "grad_norm": 5.9408279184935964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456350 + }, + { + "epoch": 2.213263528267778, + "grad_norm": 6.135748265023722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456360 + }, + { + "epoch": 2.213312026460614, + "grad_norm": 5.91474780264889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456370 + }, + { + "epoch": 2.2133605246534502, + "grad_norm": 4.509705320288049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456380 + }, + { + "epoch": 2.213409022846286, + "grad_norm": 6.30300647230797e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456390 + }, + { + "epoch": 2.213457521039122, + "grad_norm": 5.582560902439582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456400 + }, + { + "epoch": 2.2135060192319584, + "grad_norm": 5.6106305379444166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456410 + }, + { + "epoch": 2.2135545174247944, + "grad_norm": 5.9039614086486836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456420 + }, + { + "epoch": 2.2136030156176307, + "grad_norm": 4.5306109086595825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456430 + }, + { + "epoch": 2.2136515138104667, + "grad_norm": 5.449750517527718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456440 + }, + { + "epoch": 2.2137000120033026, + "grad_norm": 5.7608605885661746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456450 + }, + { + "epoch": 2.213748510196139, + "grad_norm": 5.781761558409926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456460 + }, + { + "epoch": 2.213797008388975, + "grad_norm": 5.833859972881328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456470 + }, + { + "epoch": 2.2138455065818112, + "grad_norm": 4.2773535824380815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456480 + }, + { + "epoch": 2.213894004774647, + "grad_norm": 5.608790942801534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456490 + }, + { + "epoch": 2.213942502967483, + "grad_norm": 5.6825260941195666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456500 + }, + { + "epoch": 2.2139910011603194, + "grad_norm": 5.845556572126043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456510 + }, + { + "epoch": 2.2140394993531554, + "grad_norm": 5.5022866263243486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456520 + }, + { + "epoch": 2.2140879975459913, + "grad_norm": 4.396968122932776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456530 + }, + { + "epoch": 2.2141364957388276, + "grad_norm": 5.708434258622219e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456540 + }, + { + "epoch": 2.2141849939316636, + "grad_norm": 5.6143893090165875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456550 + }, + { + "epoch": 2.2142334921244995, + "grad_norm": 5.6897832223512523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456560 + }, + { + "epoch": 2.214281990317336, + "grad_norm": 5.4811721383885015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456570 + }, + { + "epoch": 2.214330488510172, + "grad_norm": 4.475496240274879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456580 + }, + { + "epoch": 2.214378986703008, + "grad_norm": 5.36290443164944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456590 + }, + { + "epoch": 2.214427484895844, + "grad_norm": 5.58225785596278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456600 + }, + { + "epoch": 2.21447598308868, + "grad_norm": 5.444947603905348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456610 + }, + { + "epoch": 2.2145244812815164, + "grad_norm": 5.520148249615886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456620 + }, + { + "epoch": 2.2145729794743523, + "grad_norm": 4.280815346646705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456630 + }, + { + "epoch": 2.2146214776671886, + "grad_norm": 5.384907453276355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456640 + }, + { + "epoch": 2.2146699758600246, + "grad_norm": 5.4130769200355644e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456650 + }, + { + "epoch": 2.2147184740528605, + "grad_norm": 5.3392000154417474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456660 + }, + { + "epoch": 2.214766972245697, + "grad_norm": 5.6898898037616163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456670 + }, + { + "epoch": 2.2148154704385328, + "grad_norm": 4.3779579073088826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456680 + }, + { + "epoch": 2.2148639686313687, + "grad_norm": 5.375904876814275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456690 + }, + { + "epoch": 2.214912466824205, + "grad_norm": 5.490668542051935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456700 + }, + { + "epoch": 2.214960965017041, + "grad_norm": 5.611920883552557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456710 + }, + { + "epoch": 2.2150094632098773, + "grad_norm": 5.2906759862025865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456720 + }, + { + "epoch": 2.2150579614027133, + "grad_norm": 4.1709345310891877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456730 + }, + { + "epoch": 2.215106459595549, + "grad_norm": 5.41820242005997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456740 + }, + { + "epoch": 2.2151549577883856, + "grad_norm": 5.5174133706259454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456750 + }, + { + "epoch": 2.2152034559812215, + "grad_norm": 5.3584667369932504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456760 + }, + { + "epoch": 2.2152519541740574, + "grad_norm": 4.9896712539521104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456770 + }, + { + "epoch": 2.2153004523668938, + "grad_norm": 4.1090089553108555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456780 + }, + { + "epoch": 2.2153489505597297, + "grad_norm": 5.253577839425816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456790 + }, + { + "epoch": 2.215397448752566, + "grad_norm": 5.5373607921183066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456800 + }, + { + "epoch": 2.215445946945402, + "grad_norm": 5.144680059743223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456810 + }, + { + "epoch": 2.215494445138238, + "grad_norm": 5.4341324329243434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456820 + }, + { + "epoch": 2.2155429433310743, + "grad_norm": 4.2234482577896415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456830 + }, + { + "epoch": 2.21559144152391, + "grad_norm": 5.013234627426755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456840 + }, + { + "epoch": 2.215639939716746, + "grad_norm": 5.272459802085905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456850 + }, + { + "epoch": 2.2156884379095825, + "grad_norm": 5.314150186563893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456860 + }, + { + "epoch": 2.2157369361024184, + "grad_norm": 5.0773351745192485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456870 + }, + { + "epoch": 2.2157854342952548, + "grad_norm": 4.052735036452759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456880 + }, + { + "epoch": 2.2158339324880907, + "grad_norm": 5.3715961456646255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456890 + }, + { + "epoch": 2.2158824306809266, + "grad_norm": 5.1671147360821124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456900 + }, + { + "epoch": 2.215930928873763, + "grad_norm": 5.2281251328167855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456910 + }, + { + "epoch": 2.215979427066599, + "grad_norm": 5.1012168711395134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456920 + }, + { + "epoch": 2.216027925259435, + "grad_norm": 4.096522943086711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456930 + }, + { + "epoch": 2.216076423452271, + "grad_norm": 5.221691168344478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456940 + }, + { + "epoch": 2.216124921645107, + "grad_norm": 5.117224333162085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456950 + }, + { + "epoch": 2.2161734198379435, + "grad_norm": 5.177813378054452e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456960 + }, + { + "epoch": 2.2162219180307794, + "grad_norm": 4.999751723744339e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456970 + }, + { + "epoch": 2.2162704162236153, + "grad_norm": 3.98890520614259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456980 + }, + { + "epoch": 2.2163189144164517, + "grad_norm": 5.102991806893442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 456990 + }, + { + "epoch": 2.2163674126092876, + "grad_norm": 4.859354163500029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457000 + }, + { + "epoch": 2.216415910802124, + "grad_norm": 4.906090467216018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457010 + }, + { + "epoch": 2.21646440899496, + "grad_norm": 4.862841862518508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457020 + }, + { + "epoch": 2.216512907187796, + "grad_norm": 3.8631476684258814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457030 + }, + { + "epoch": 2.216561405380632, + "grad_norm": 4.740211423381879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457040 + }, + { + "epoch": 2.216609903573468, + "grad_norm": 4.9237698362958326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457050 + }, + { + "epoch": 2.216658401766304, + "grad_norm": 4.992267932379946e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457060 + }, + { + "epoch": 2.2167068999591404, + "grad_norm": 4.870502934295473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457070 + }, + { + "epoch": 2.2167553981519763, + "grad_norm": 4.4035694202193554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457080 + }, + { + "epoch": 2.2168038963448122, + "grad_norm": 4.99442904811076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457090 + }, + { + "epoch": 2.2168523945376486, + "grad_norm": 4.997095004455332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457100 + }, + { + "epoch": 2.2169008927304845, + "grad_norm": 5.1383576504804296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457110 + }, + { + "epoch": 2.216949390923321, + "grad_norm": 5.143101589055732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457120 + }, + { + "epoch": 2.216997889116157, + "grad_norm": 4.1452679511166934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457130 + }, + { + "epoch": 2.2170463873089927, + "grad_norm": 4.9161613446813135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457140 + }, + { + "epoch": 2.217094885501829, + "grad_norm": 4.985540869029137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457150 + }, + { + "epoch": 2.217143383694665, + "grad_norm": 4.949942677967556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457160 + }, + { + "epoch": 2.2171918818875014, + "grad_norm": 4.632578409768939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457170 + }, + { + "epoch": 2.2172403800803373, + "grad_norm": 3.970574624645451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457180 + }, + { + "epoch": 2.2172888782731732, + "grad_norm": 4.766219774410274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457190 + }, + { + "epoch": 2.2173373764660096, + "grad_norm": 4.677892206927936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457200 + }, + { + "epoch": 2.2173858746588455, + "grad_norm": 4.845210099801989e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457210 + }, + { + "epoch": 2.2174343728516814, + "grad_norm": 4.828089927855217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457220 + }, + { + "epoch": 2.217482871044518, + "grad_norm": 3.729104847138842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457230 + }, + { + "epoch": 2.2175313692373537, + "grad_norm": 4.4998962778208806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457240 + }, + { + "epoch": 2.21757986743019, + "grad_norm": 5.0426162800931706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457250 + }, + { + "epoch": 2.217628365623026, + "grad_norm": 4.700768840848468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457260 + }, + { + "epoch": 2.217676863815862, + "grad_norm": 4.596892111408124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457270 + }, + { + "epoch": 2.2177253620086983, + "grad_norm": 4.158286870392658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457280 + }, + { + "epoch": 2.217773860201534, + "grad_norm": 4.58402062974983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457290 + }, + { + "epoch": 2.21782235839437, + "grad_norm": 5.045387041491267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457300 + }, + { + "epoch": 2.2178708565872065, + "grad_norm": 4.8237875915901895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457310 + }, + { + "epoch": 2.2179193547800424, + "grad_norm": 4.6551910770631366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457320 + }, + { + "epoch": 2.217967852972879, + "grad_norm": 3.96871797647691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457330 + }, + { + "epoch": 2.2180163511657147, + "grad_norm": 4.7255376500743296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457340 + }, + { + "epoch": 2.2180648493585506, + "grad_norm": 4.770635442241655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457350 + }, + { + "epoch": 2.218113347551387, + "grad_norm": 4.6736644776501635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457360 + }, + { + "epoch": 2.218161845744223, + "grad_norm": 4.6546436038852335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457370 + }, + { + "epoch": 2.218210343937059, + "grad_norm": 3.4432254381044913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457380 + }, + { + "epoch": 2.218258842129895, + "grad_norm": 4.4665551257594416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457390 + }, + { + "epoch": 2.218307340322731, + "grad_norm": 4.4484618655360464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457400 + }, + { + "epoch": 2.2183558385155675, + "grad_norm": 4.706506473439731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457410 + }, + { + "epoch": 2.2184043367084034, + "grad_norm": 4.52441568654649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457420 + }, + { + "epoch": 2.2184528349012393, + "grad_norm": 3.6364696143209585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457430 + }, + { + "epoch": 2.2185013330940757, + "grad_norm": 4.463045044644787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457440 + }, + { + "epoch": 2.2185498312869116, + "grad_norm": 4.467608860636574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457450 + }, + { + "epoch": 2.2185983294797476, + "grad_norm": 4.7114042445173254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457460 + }, + { + "epoch": 2.218646827672584, + "grad_norm": 4.9089489806419806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457470 + }, + { + "epoch": 2.21869532586542, + "grad_norm": 3.53100659822303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457480 + }, + { + "epoch": 2.218743824058256, + "grad_norm": 4.9212911079621335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457490 + }, + { + "epoch": 2.218792322251092, + "grad_norm": 4.479988646721722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457500 + }, + { + "epoch": 2.218840820443928, + "grad_norm": 4.4427181933315296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457510 + }, + { + "epoch": 2.2188893186367644, + "grad_norm": 4.4170597846004966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457520 + }, + { + "epoch": 2.2189378168296003, + "grad_norm": 3.581461172075251e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457530 + }, + { + "epoch": 2.2189863150224367, + "grad_norm": 4.268428810405567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457540 + }, + { + "epoch": 2.2190348132152726, + "grad_norm": 4.531343122948783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457550 + }, + { + "epoch": 2.2190833114081085, + "grad_norm": 4.500732941892238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457560 + }, + { + "epoch": 2.219131809600945, + "grad_norm": 4.5605759169120574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457570 + }, + { + "epoch": 2.219180307793781, + "grad_norm": 3.5346563009852616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457580 + }, + { + "epoch": 2.2192288059866168, + "grad_norm": 4.3443506569929013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457590 + }, + { + "epoch": 2.219277304179453, + "grad_norm": 4.6456481328505106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457600 + }, + { + "epoch": 2.219325802372289, + "grad_norm": 4.447068491231221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457610 + }, + { + "epoch": 2.219374300565125, + "grad_norm": 4.408191500715475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457620 + }, + { + "epoch": 2.2194227987579613, + "grad_norm": 4.947457554749235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457630 + }, + { + "epoch": 2.2194712969507973, + "grad_norm": 4.322191315964119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457640 + }, + { + "epoch": 2.2195197951436336, + "grad_norm": 4.2413880407821125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457650 + }, + { + "epoch": 2.2195682933364695, + "grad_norm": 4.096987638035898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457660 + }, + { + "epoch": 2.2196167915293055, + "grad_norm": 4.584201462876081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457670 + }, + { + "epoch": 2.219665289722142, + "grad_norm": 3.647097202019722e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457680 + }, + { + "epoch": 2.2197137879149778, + "grad_norm": 4.9164071924678865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457690 + }, + { + "epoch": 2.219762286107814, + "grad_norm": 4.234894035448633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457700 + }, + { + "epoch": 2.21981078430065, + "grad_norm": 4.33785700693079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457710 + }, + { + "epoch": 2.219859282493486, + "grad_norm": 4.2698513169625585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457720 + }, + { + "epoch": 2.2199077806863223, + "grad_norm": 3.382116986472283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457730 + }, + { + "epoch": 2.2199562788791583, + "grad_norm": 4.277544007891265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457740 + }, + { + "epoch": 2.220004777071994, + "grad_norm": 4.228411043527558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457750 + }, + { + "epoch": 2.2200532752648305, + "grad_norm": 4.1101969827650464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457760 + }, + { + "epoch": 2.2201017734576665, + "grad_norm": 4.146216525668933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457770 + }, + { + "epoch": 2.220150271650503, + "grad_norm": 4.00137523115518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457780 + }, + { + "epoch": 2.2201987698433387, + "grad_norm": 4.338373926771055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457790 + }, + { + "epoch": 2.2202472680361747, + "grad_norm": 4.0123705247196995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457800 + }, + { + "epoch": 2.220295766229011, + "grad_norm": 4.0958941127655635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457810 + }, + { + "epoch": 2.220344264421847, + "grad_norm": 4.346664539411904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457820 + }, + { + "epoch": 2.220392762614683, + "grad_norm": 3.58091334362598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457830 + }, + { + "epoch": 2.2204412608075192, + "grad_norm": 4.074967918654693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457840 + }, + { + "epoch": 2.220489759000355, + "grad_norm": 4.2248665010902187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457850 + }, + { + "epoch": 2.2205382571931915, + "grad_norm": 4.494116723208208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457860 + }, + { + "epoch": 2.2205867553860275, + "grad_norm": 3.9907135374050995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457870 + }, + { + "epoch": 2.2206352535788634, + "grad_norm": 3.5814299081948775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457880 + }, + { + "epoch": 2.2206837517716997, + "grad_norm": 4.126796682157874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457890 + }, + { + "epoch": 2.2207322499645357, + "grad_norm": 4.202290782018281e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457900 + }, + { + "epoch": 2.2207807481573716, + "grad_norm": 4.173619672087625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457910 + }, + { + "epoch": 2.220829246350208, + "grad_norm": 3.8469519125783336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457920 + }, + { + "epoch": 2.220877744543044, + "grad_norm": 4.4581259572851195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457930 + }, + { + "epoch": 2.2209262427358802, + "grad_norm": 4.1030702391253726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457940 + }, + { + "epoch": 2.220974740928716, + "grad_norm": 3.904874645854761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457950 + }, + { + "epoch": 2.221023239121552, + "grad_norm": 4.0289858560527136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457960 + }, + { + "epoch": 2.2210717373143885, + "grad_norm": 4.0984293292467555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457970 + }, + { + "epoch": 2.2211202355072244, + "grad_norm": 3.341070353712894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457980 + }, + { + "epoch": 2.2211687337000603, + "grad_norm": 4.0186581173884406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 457990 + }, + { + "epoch": 2.2212172318928967, + "grad_norm": 3.86043303990391e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458000 + }, + { + "epoch": 2.2212657300857326, + "grad_norm": 3.879845422716244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458010 + }, + { + "epoch": 2.221314228278569, + "grad_norm": 4.1232610215047316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458020 + }, + { + "epoch": 2.221362726471405, + "grad_norm": 3.1429308933184075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458030 + }, + { + "epoch": 2.221411224664241, + "grad_norm": 3.96610531083752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458040 + }, + { + "epoch": 2.221459722857077, + "grad_norm": 4.0226684205890706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458050 + }, + { + "epoch": 2.221508221049913, + "grad_norm": 3.870136566774818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458060 + }, + { + "epoch": 2.2215567192427494, + "grad_norm": 3.8118393774766446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458070 + }, + { + "epoch": 2.2216052174355854, + "grad_norm": 3.24845359500614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458080 + }, + { + "epoch": 2.2216537156284213, + "grad_norm": 3.988214558603431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458090 + }, + { + "epoch": 2.2217022138212577, + "grad_norm": 3.752871791107282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458100 + }, + { + "epoch": 2.2217507120140936, + "grad_norm": 3.7268794272904415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458110 + }, + { + "epoch": 2.2217992102069295, + "grad_norm": 4.00545587808665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458120 + }, + { + "epoch": 2.221847708399766, + "grad_norm": 4.287233679178826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458130 + }, + { + "epoch": 2.221896206592602, + "grad_norm": 3.859683417317683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458140 + }, + { + "epoch": 2.2219447047854377, + "grad_norm": 3.840490236939331e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458150 + }, + { + "epoch": 2.221993202978274, + "grad_norm": 3.7668201002816204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458160 + }, + { + "epoch": 2.22204170117111, + "grad_norm": 4.0670698808753514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458170 + }, + { + "epoch": 2.2220901993639464, + "grad_norm": 3.1088248420019227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458180 + }, + { + "epoch": 2.2221386975567823, + "grad_norm": 3.66860035683203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458190 + }, + { + "epoch": 2.222187195749618, + "grad_norm": 3.772624168618677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458200 + }, + { + "epoch": 2.2222356939424546, + "grad_norm": 3.791940272890315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458210 + }, + { + "epoch": 2.2222841921352905, + "grad_norm": 3.7914173134367957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458220 + }, + { + "epoch": 2.222332690328127, + "grad_norm": 3.090537603611665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458230 + }, + { + "epoch": 2.222381188520963, + "grad_norm": 3.7480948122947666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458240 + }, + { + "epoch": 2.2224296867137987, + "grad_norm": 3.937541492859964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458250 + }, + { + "epoch": 2.222478184906635, + "grad_norm": 3.681334703742323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458260 + }, + { + "epoch": 2.222526683099471, + "grad_norm": 3.9147334263134326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458270 + }, + { + "epoch": 2.222575181292307, + "grad_norm": 3.2024335183677977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458280 + }, + { + "epoch": 2.2226236794851433, + "grad_norm": 3.4808518734052996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458290 + }, + { + "epoch": 2.222672177677979, + "grad_norm": 3.620267463588789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458300 + }, + { + "epoch": 2.2227206758708156, + "grad_norm": 3.8948122949022945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458310 + }, + { + "epoch": 2.2227691740636515, + "grad_norm": 3.616854726828933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458320 + }, + { + "epoch": 2.2228176722564874, + "grad_norm": 4.182478008374346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458330 + }, + { + "epoch": 2.2228661704493238, + "grad_norm": 3.8301642746318976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458340 + }, + { + "epoch": 2.2229146686421597, + "grad_norm": 3.800627368377718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458350 + }, + { + "epoch": 2.2229631668349956, + "grad_norm": 3.634750811443155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458360 + }, + { + "epoch": 2.223011665027832, + "grad_norm": 3.561279271480089e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458370 + }, + { + "epoch": 2.223060163220668, + "grad_norm": 2.913621877098649e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458380 + }, + { + "epoch": 2.2231086614135043, + "grad_norm": 3.490120192850554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458390 + }, + { + "epoch": 2.22315715960634, + "grad_norm": 3.763747002949458e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458400 + }, + { + "epoch": 2.223205657799176, + "grad_norm": 3.686060523477863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458410 + }, + { + "epoch": 2.2232541559920125, + "grad_norm": 3.57863747524334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458420 + }, + { + "epoch": 2.2233026541848484, + "grad_norm": 3.114734781206607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458430 + }, + { + "epoch": 2.2233511523776843, + "grad_norm": 3.736248999075542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458440 + }, + { + "epoch": 2.2233996505705207, + "grad_norm": 3.632525391594754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458450 + }, + { + "epoch": 2.2234481487633566, + "grad_norm": 3.441002149884298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458460 + }, + { + "epoch": 2.223496646956193, + "grad_norm": 3.387078706396096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458470 + }, + { + "epoch": 2.223545145149029, + "grad_norm": 3.542002602330285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458480 + }, + { + "epoch": 2.223593643341865, + "grad_norm": 3.4418743410924435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458490 + }, + { + "epoch": 2.223642141534701, + "grad_norm": 3.370566759031135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458500 + }, + { + "epoch": 2.223690639727537, + "grad_norm": 3.6208906095680504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458510 + }, + { + "epoch": 2.223739137920373, + "grad_norm": 3.435610906876718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458520 + }, + { + "epoch": 2.2237876361132094, + "grad_norm": 3.0670186390580056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458530 + }, + { + "epoch": 2.2238361343060453, + "grad_norm": 3.234535839169439e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458540 + }, + { + "epoch": 2.2238846324988817, + "grad_norm": 3.4344047605827654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458550 + }, + { + "epoch": 2.2239331306917176, + "grad_norm": 3.34117657985189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458560 + }, + { + "epoch": 2.2239816288845535, + "grad_norm": 3.375850710085615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458570 + }, + { + "epoch": 2.22403012707739, + "grad_norm": 3.347442856238558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458580 + }, + { + "epoch": 2.224078625270226, + "grad_norm": 3.639993195747593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458590 + }, + { + "epoch": 2.224127123463062, + "grad_norm": 3.185825647733509e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458600 + }, + { + "epoch": 2.224175621655898, + "grad_norm": 3.4353256239683105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458610 + }, + { + "epoch": 2.224224119848734, + "grad_norm": 3.2523143289608925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458620 + }, + { + "epoch": 2.2242726180415704, + "grad_norm": 3.232296208466323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458630 + }, + { + "epoch": 2.2243211162344063, + "grad_norm": 3.325760644656839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458640 + }, + { + "epoch": 2.2243696144272422, + "grad_norm": 3.2867333743524796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458650 + }, + { + "epoch": 2.2244181126200786, + "grad_norm": 3.447665974931624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458660 + }, + { + "epoch": 2.2244666108129145, + "grad_norm": 3.3815926059332924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458670 + }, + { + "epoch": 2.2245151090057504, + "grad_norm": 3.0178732401964226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458680 + }, + { + "epoch": 2.224563607198587, + "grad_norm": 3.2948342720828805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458690 + }, + { + "epoch": 2.2246121053914227, + "grad_norm": 3.19781285895715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458700 + }, + { + "epoch": 2.224660603584259, + "grad_norm": 3.515689783739617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458710 + }, + { + "epoch": 2.224709101777095, + "grad_norm": 3.3654089293122524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458720 + }, + { + "epoch": 2.224757599969931, + "grad_norm": 2.9368944609586833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458730 + }, + { + "epoch": 2.2248060981627673, + "grad_norm": 3.163777506642873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458740 + }, + { + "epoch": 2.2248545963556032, + "grad_norm": 3.2060235355402256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458750 + }, + { + "epoch": 2.2249030945484396, + "grad_norm": 3.340676713037283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458760 + }, + { + "epoch": 2.2249515927412755, + "grad_norm": 3.2477558420396235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458770 + }, + { + "epoch": 2.2250000909341114, + "grad_norm": 2.930444864546189e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458780 + }, + { + "epoch": 2.225048589126948, + "grad_norm": 3.049397179211155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458790 + }, + { + "epoch": 2.2250970873197837, + "grad_norm": 3.178482543830796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458800 + }, + { + "epoch": 2.2251455855126197, + "grad_norm": 3.254960034837495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458810 + }, + { + "epoch": 2.225194083705456, + "grad_norm": 3.100660705968039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458820 + }, + { + "epoch": 2.225242581898292, + "grad_norm": 3.111886925921681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458830 + }, + { + "epoch": 2.2252910800911283, + "grad_norm": 3.1982107628891754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458840 + }, + { + "epoch": 2.2253395782839642, + "grad_norm": 3.3170273638916115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458850 + }, + { + "epoch": 2.2253880764768, + "grad_norm": 3.1862597893450584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458860 + }, + { + "epoch": 2.2254365746696365, + "grad_norm": 3.460177211422888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458870 + }, + { + "epoch": 2.2254850728624724, + "grad_norm": 3.152056748945142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458880 + }, + { + "epoch": 2.2255335710553084, + "grad_norm": 3.132971571062626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458890 + }, + { + "epoch": 2.2255820692481447, + "grad_norm": 3.020116778884585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458900 + }, + { + "epoch": 2.2256305674409806, + "grad_norm": 2.9665272904821904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458910 + }, + { + "epoch": 2.225679065633817, + "grad_norm": 3.0154353680700297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458920 + }, + { + "epoch": 2.225727563826653, + "grad_norm": 3.241103030404702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458930 + }, + { + "epoch": 2.225776062019489, + "grad_norm": 3.1581070203401396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458940 + }, + { + "epoch": 2.225824560212325, + "grad_norm": 3.238670487348827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458950 + }, + { + "epoch": 2.225873058405161, + "grad_norm": 3.215694732716656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458960 + }, + { + "epoch": 2.225921556597997, + "grad_norm": 3.0223247904359596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458970 + }, + { + "epoch": 2.2259700547908334, + "grad_norm": 2.4798836761874554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458980 + }, + { + "epoch": 2.2260185529836694, + "grad_norm": 2.8496707216163486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 458990 + }, + { + "epoch": 2.2260670511765057, + "grad_norm": 3.076567267612518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459000 + }, + { + "epoch": 2.2261155493693416, + "grad_norm": 3.137313342449488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459010 + }, + { + "epoch": 2.2261640475621776, + "grad_norm": 2.8442306287956853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459020 + }, + { + "epoch": 2.226212545755014, + "grad_norm": 3.296571549071814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459030 + }, + { + "epoch": 2.22626104394785, + "grad_norm": 3.008941718007918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459040 + }, + { + "epoch": 2.2263095421406858, + "grad_norm": 3.1852202653226414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459050 + }, + { + "epoch": 2.226358040333522, + "grad_norm": 3.08913747915085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459060 + }, + { + "epoch": 2.226406538526358, + "grad_norm": 2.995352943457874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459070 + }, + { + "epoch": 2.2264550367191944, + "grad_norm": 2.6673680153521673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459080 + }, + { + "epoch": 2.2265035349120303, + "grad_norm": 3.015609806311659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459090 + }, + { + "epoch": 2.2265520331048663, + "grad_norm": 3.00414342291333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459100 + }, + { + "epoch": 2.2266005312977026, + "grad_norm": 3.149371607946705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459110 + }, + { + "epoch": 2.2266490294905386, + "grad_norm": 2.8785050787405453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459120 + }, + { + "epoch": 2.226697527683375, + "grad_norm": 3.4671060689106525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459130 + }, + { + "epoch": 2.226746025876211, + "grad_norm": 2.9520707656160994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459140 + }, + { + "epoch": 2.2267945240690468, + "grad_norm": 2.914557484245961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459150 + }, + { + "epoch": 2.226843022261883, + "grad_norm": 3.024283401487082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459160 + }, + { + "epoch": 2.226891520454719, + "grad_norm": 2.9842688320513844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459170 + }, + { + "epoch": 2.226940018647555, + "grad_norm": 3.018309513436179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459180 + }, + { + "epoch": 2.2269885168403913, + "grad_norm": 3.113554214451142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459190 + }, + { + "epoch": 2.2270370150332273, + "grad_norm": 2.984654301485534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459200 + }, + { + "epoch": 2.227085513226063, + "grad_norm": 3.514564994588909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459210 + }, + { + "epoch": 2.2271340114188996, + "grad_norm": 3.737170928275191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459220 + }, + { + "epoch": 2.2271825096117355, + "grad_norm": 3.048105767788911e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459230 + }, + { + "epoch": 2.227231007804572, + "grad_norm": 2.9323560468696996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459240 + }, + { + "epoch": 2.2272795059974078, + "grad_norm": 2.9777083909721114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459250 + }, + { + "epoch": 2.2273280041902437, + "grad_norm": 2.7551505965561773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459260 + }, + { + "epoch": 2.22737650238308, + "grad_norm": 2.932985410097899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459270 + }, + { + "epoch": 2.227425000575916, + "grad_norm": 2.6001254482821423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459280 + }, + { + "epoch": 2.2274734987687523, + "grad_norm": 2.9011392399524993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459290 + }, + { + "epoch": 2.2275219969615883, + "grad_norm": 2.7416060532914344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459300 + }, + { + "epoch": 2.227570495154424, + "grad_norm": 2.9271634005567648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459310 + }, + { + "epoch": 2.2276189933472605, + "grad_norm": 3.014773142240301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459320 + }, + { + "epoch": 2.2276674915400965, + "grad_norm": 2.4994497138663974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459330 + }, + { + "epoch": 2.2277159897329324, + "grad_norm": 2.7396181323524615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459340 + }, + { + "epoch": 2.2277644879257688, + "grad_norm": 2.63148596246765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459350 + }, + { + "epoch": 2.2278129861186047, + "grad_norm": 2.810324239987949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459360 + }, + { + "epoch": 2.227861484311441, + "grad_norm": 3.0327679922947937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459370 + }, + { + "epoch": 2.227909982504277, + "grad_norm": 2.56846401924804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459380 + }, + { + "epoch": 2.227958480697113, + "grad_norm": 2.8716154787389314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459390 + }, + { + "epoch": 2.2280069788899493, + "grad_norm": 2.856884506741153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459400 + }, + { + "epoch": 2.228055477082785, + "grad_norm": 2.523610298510448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459410 + }, + { + "epoch": 2.228103975275621, + "grad_norm": 2.890921990683637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459420 + }, + { + "epoch": 2.2281524734684575, + "grad_norm": 2.6997570401476878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459430 + }, + { + "epoch": 2.2282009716612934, + "grad_norm": 2.751262684341782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459440 + }, + { + "epoch": 2.2282494698541297, + "grad_norm": 2.7090811371976997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459450 + }, + { + "epoch": 2.2282979680469657, + "grad_norm": 2.8482523006800875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459460 + }, + { + "epoch": 2.2283464662398016, + "grad_norm": 2.611402294405707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459470 + }, + { + "epoch": 2.228394964432638, + "grad_norm": 2.3386748537745916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459480 + }, + { + "epoch": 2.228443462625474, + "grad_norm": 2.772623908242622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459490 + }, + { + "epoch": 2.22849196081831, + "grad_norm": 2.7671188007616365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459500 + }, + { + "epoch": 2.228540459011146, + "grad_norm": 2.7410139935568623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459510 + }, + { + "epoch": 2.228588957203982, + "grad_norm": 2.579304414496164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459520 + }, + { + "epoch": 2.2286374553968185, + "grad_norm": 2.9460988315577197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459530 + }, + { + "epoch": 2.2286859535896544, + "grad_norm": 3.029834871881576e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459540 + }, + { + "epoch": 2.2287344517824903, + "grad_norm": 2.8005135987996255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459550 + }, + { + "epoch": 2.2287829499753267, + "grad_norm": 2.6587120061094538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459560 + }, + { + "epoch": 2.2288314481681626, + "grad_norm": 2.6427537491713338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459570 + }, + { + "epoch": 2.2288799463609985, + "grad_norm": 2.4753251892661865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459580 + }, + { + "epoch": 2.228928444553835, + "grad_norm": 2.3421289796488054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459590 + }, + { + "epoch": 2.228976942746671, + "grad_norm": 2.8968463183787208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459600 + }, + { + "epoch": 2.229025440939507, + "grad_norm": 2.8214900638090512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459610 + }, + { + "epoch": 2.229073939132343, + "grad_norm": 2.8082441261290114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459620 + }, + { + "epoch": 2.229122437325179, + "grad_norm": 2.3770924784116687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459630 + }, + { + "epoch": 2.2291709355180154, + "grad_norm": 3.016189253912671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459640 + }, + { + "epoch": 2.2292194337108513, + "grad_norm": 2.5292427707768184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459650 + }, + { + "epoch": 2.2292679319036877, + "grad_norm": 2.7407057956452263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459660 + }, + { + "epoch": 2.2293164300965236, + "grad_norm": 2.6787105866787897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459670 + }, + { + "epoch": 2.2293649282893595, + "grad_norm": 2.9217286368066198e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459680 + }, + { + "epoch": 2.229413426482196, + "grad_norm": 2.862775794199024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459690 + }, + { + "epoch": 2.229461924675032, + "grad_norm": 2.6195571933840256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459700 + }, + { + "epoch": 2.2295104228678677, + "grad_norm": 2.5950983584266396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459710 + }, + { + "epoch": 2.229558921060704, + "grad_norm": 2.712403102123062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459720 + }, + { + "epoch": 2.22960741925354, + "grad_norm": 2.9735728546143037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459730 + }, + { + "epoch": 2.229655917446376, + "grad_norm": 2.993945713569701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459740 + }, + { + "epoch": 2.2297044156392123, + "grad_norm": 2.820614142251543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459750 + }, + { + "epoch": 2.229752913832048, + "grad_norm": 2.4681630961254086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459760 + }, + { + "epoch": 2.2298014120248846, + "grad_norm": 2.76723888248398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459770 + }, + { + "epoch": 2.2298499102177205, + "grad_norm": 2.5966595984527885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459780 + }, + { + "epoch": 2.2298984084105564, + "grad_norm": 2.5502927769593953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459790 + }, + { + "epoch": 2.229946906603393, + "grad_norm": 2.766151396826899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459800 + }, + { + "epoch": 2.2299954047962287, + "grad_norm": 2.7492291110320366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459810 + }, + { + "epoch": 2.230043902989065, + "grad_norm": 2.453975156413435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459820 + }, + { + "epoch": 2.230092401181901, + "grad_norm": 2.226754602929759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459830 + }, + { + "epoch": 2.230140899374737, + "grad_norm": 2.5427919325693438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459840 + }, + { + "epoch": 2.2301893975675733, + "grad_norm": 2.651298025568849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459850 + }, + { + "epoch": 2.230237895760409, + "grad_norm": 2.4481384031105335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459860 + }, + { + "epoch": 2.230286393953245, + "grad_norm": 2.3088015055350297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459870 + }, + { + "epoch": 2.2303348921460815, + "grad_norm": 2.4754561067652503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459880 + }, + { + "epoch": 2.2303833903389174, + "grad_norm": 2.5182815832636152e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459890 + }, + { + "epoch": 2.230431888531754, + "grad_norm": 2.5543267057059893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459900 + }, + { + "epoch": 2.2304803867245897, + "grad_norm": 2.4674555731962755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459910 + }, + { + "epoch": 2.2305288849174256, + "grad_norm": 2.496228113102461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459920 + }, + { + "epoch": 2.230577383110262, + "grad_norm": 2.4919554419966516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459930 + }, + { + "epoch": 2.230625881303098, + "grad_norm": 2.691614220395877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459940 + }, + { + "epoch": 2.230674379495934, + "grad_norm": 2.742890004014953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459950 + }, + { + "epoch": 2.23072287768877, + "grad_norm": 2.573552571050186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459960 + }, + { + "epoch": 2.230771375881606, + "grad_norm": 2.464337889307444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459970 + }, + { + "epoch": 2.2308198740744425, + "grad_norm": 2.3449365116334775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459980 + }, + { + "epoch": 2.2308683722672784, + "grad_norm": 2.447378655290322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 459990 + }, + { + "epoch": 2.2309168704601143, + "grad_norm": 2.4589175140476982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460000 + }, + { + "epoch": 2.2309653686529507, + "grad_norm": 2.3741907995145084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460010 + }, + { + "epoch": 2.2310138668457866, + "grad_norm": 2.676298471726568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460020 + }, + { + "epoch": 2.2310623650386225, + "grad_norm": 2.7958861892329878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460030 + }, + { + "epoch": 2.231110863231459, + "grad_norm": 2.369103135890782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460040 + }, + { + "epoch": 2.231159361424295, + "grad_norm": 2.3793598202814792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460050 + }, + { + "epoch": 2.231207859617131, + "grad_norm": 2.392463649414367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460060 + }, + { + "epoch": 2.231256357809967, + "grad_norm": 2.3624288303381036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460070 + }, + { + "epoch": 2.231304856002803, + "grad_norm": 2.5303734219050966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460080 + }, + { + "epoch": 2.2313533541956394, + "grad_norm": 2.6826892707276784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460090 + }, + { + "epoch": 2.2314018523884753, + "grad_norm": 2.475709059979181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460100 + }, + { + "epoch": 2.2314503505813112, + "grad_norm": 2.5426093230862534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460110 + }, + { + "epoch": 2.2314988487741476, + "grad_norm": 2.411019472958742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460120 + }, + { + "epoch": 2.2315473469669835, + "grad_norm": 2.588278569248814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460130 + }, + { + "epoch": 2.23159584515982, + "grad_norm": 2.611426097587355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460140 + }, + { + "epoch": 2.231644343352656, + "grad_norm": 2.6892935878208846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460150 + }, + { + "epoch": 2.2316928415454917, + "grad_norm": 2.7598570540021683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460160 + }, + { + "epoch": 2.231741339738328, + "grad_norm": 2.6057970003989794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460170 + }, + { + "epoch": 2.231789837931164, + "grad_norm": 2.4960277400509767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460180 + }, + { + "epoch": 2.2318383361240004, + "grad_norm": 2.2835227042605766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460190 + }, + { + "epoch": 2.2318868343168363, + "grad_norm": 2.5752298071779478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460200 + }, + { + "epoch": 2.2319353325096722, + "grad_norm": 2.198623150206913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460210 + }, + { + "epoch": 2.2319838307025086, + "grad_norm": 2.4374081419864524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460220 + }, + { + "epoch": 2.2320323288953445, + "grad_norm": 2.3094262502354468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460230 + }, + { + "epoch": 2.2320808270881805, + "grad_norm": 2.4422387667755174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460240 + }, + { + "epoch": 2.232129325281017, + "grad_norm": 2.3046116126579363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460250 + }, + { + "epoch": 2.2321778234738527, + "grad_norm": 2.3096166756886305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460260 + }, + { + "epoch": 2.2322263216666887, + "grad_norm": 2.0625943619734244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460270 + }, + { + "epoch": 2.232274819859525, + "grad_norm": 2.466005888379641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460280 + }, + { + "epoch": 2.232323318052361, + "grad_norm": 2.212686744940129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460290 + }, + { + "epoch": 2.2323718162451973, + "grad_norm": 2.3540618343531605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460300 + }, + { + "epoch": 2.2324203144380332, + "grad_norm": 2.1791480619981485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460310 + }, + { + "epoch": 2.232468812630869, + "grad_norm": 2.698201484463425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460320 + }, + { + "epoch": 2.2325173108237055, + "grad_norm": 2.6790687002176128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460330 + }, + { + "epoch": 2.2325658090165414, + "grad_norm": 2.3789681335983914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460340 + }, + { + "epoch": 2.232614307209378, + "grad_norm": 2.0550363188931442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460350 + }, + { + "epoch": 2.2326628054022137, + "grad_norm": 2.34312196312203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460360 + }, + { + "epoch": 2.2327113035950497, + "grad_norm": 2.2516882580703168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460370 + }, + { + "epoch": 2.232759801787886, + "grad_norm": 2.5122535163291104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460380 + }, + { + "epoch": 2.232808299980722, + "grad_norm": 2.468990878412569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460390 + }, + { + "epoch": 2.232856798173558, + "grad_norm": 2.0673718736929914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460400 + }, + { + "epoch": 2.2329052963663942, + "grad_norm": 2.4022183353622495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460410 + }, + { + "epoch": 2.23295379455923, + "grad_norm": 2.3159726580956885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460420 + }, + { + "epoch": 2.2330022927520665, + "grad_norm": 2.2278976885559132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460430 + }, + { + "epoch": 2.2330507909449024, + "grad_norm": 2.0110698883968325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460440 + }, + { + "epoch": 2.2330992891377384, + "grad_norm": 2.3602023446755993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460450 + }, + { + "epoch": 2.2331477873305747, + "grad_norm": 2.094906292882115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460460 + }, + { + "epoch": 2.2331962855234107, + "grad_norm": 2.1399449323666886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460470 + }, + { + "epoch": 2.2332447837162466, + "grad_norm": 2.527333720081515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460480 + }, + { + "epoch": 2.233293281909083, + "grad_norm": 2.1605567113169855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460490 + }, + { + "epoch": 2.233341780101919, + "grad_norm": 2.378714469841725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460500 + }, + { + "epoch": 2.2333902782947552, + "grad_norm": 2.387253594804406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460510 + }, + { + "epoch": 2.233438776487591, + "grad_norm": 2.1340540001801855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460520 + }, + { + "epoch": 2.233487274680427, + "grad_norm": 2.1052557030998287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460530 + }, + { + "epoch": 2.2335357728732634, + "grad_norm": 2.3193670983800985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460540 + }, + { + "epoch": 2.2335842710660994, + "grad_norm": 2.2122941700786214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460550 + }, + { + "epoch": 2.2336327692589357, + "grad_norm": 2.3755237776867943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460560 + }, + { + "epoch": 2.2336812674517716, + "grad_norm": 1.9738447321060448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460570 + }, + { + "epoch": 2.2337297656446076, + "grad_norm": 2.643662355694687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460580 + }, + { + "epoch": 2.233778263837444, + "grad_norm": 2.1170913910850686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460590 + }, + { + "epoch": 2.23382676203028, + "grad_norm": 2.5263314995527253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460600 + }, + { + "epoch": 2.2338752602231158, + "grad_norm": 2.2316548609069287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460610 + }, + { + "epoch": 2.233923758415952, + "grad_norm": 2.001300636322867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460620 + }, + { + "epoch": 2.233972256608788, + "grad_norm": 2.091482720345539e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460630 + }, + { + "epoch": 2.234020754801624, + "grad_norm": 2.0581634174732244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460640 + }, + { + "epoch": 2.2340692529944604, + "grad_norm": 1.9839747622540926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460650 + }, + { + "epoch": 2.2341177511872963, + "grad_norm": 2.2691256873486054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460660 + }, + { + "epoch": 2.2341662493801326, + "grad_norm": 2.3126286663455176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460670 + }, + { + "epoch": 2.2342147475729686, + "grad_norm": 2.4721854785525466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460680 + }, + { + "epoch": 2.2342632457658045, + "grad_norm": 2.1195646127125656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460690 + }, + { + "epoch": 2.234311743958641, + "grad_norm": 1.949942607382127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460700 + }, + { + "epoch": 2.2343602421514768, + "grad_norm": 2.4460176106799736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460710 + }, + { + "epoch": 2.234408740344313, + "grad_norm": 1.8937223345005805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460720 + }, + { + "epoch": 2.234457238537149, + "grad_norm": 2.22596430177191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460730 + }, + { + "epoch": 2.234505736729985, + "grad_norm": 1.929991100269035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460740 + }, + { + "epoch": 2.2345542349228213, + "grad_norm": 1.9343618262723794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460750 + }, + { + "epoch": 2.2346027331156573, + "grad_norm": 2.3408379234979293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460760 + }, + { + "epoch": 2.234651231308493, + "grad_norm": 2.06046255613046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460770 + }, + { + "epoch": 2.2346997295013296, + "grad_norm": 2.5660218838652327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460780 + }, + { + "epoch": 2.2347482276941655, + "grad_norm": 1.8069737706127853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460790 + }, + { + "epoch": 2.2347967258870014, + "grad_norm": 2.2406139166264438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460800 + }, + { + "epoch": 2.2348452240798378, + "grad_norm": 2.8631848891791378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460810 + }, + { + "epoch": 2.2348937222726737, + "grad_norm": 2.256663478306109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460820 + }, + { + "epoch": 2.23494222046551, + "grad_norm": 2.899345474816073e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460830 + }, + { + "epoch": 2.234990718658346, + "grad_norm": 1.9844080156872224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460840 + }, + { + "epoch": 2.235039216851182, + "grad_norm": 1.9259390526826792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460850 + }, + { + "epoch": 2.2350877150440183, + "grad_norm": 2.0962280800063127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460860 + }, + { + "epoch": 2.235136213236854, + "grad_norm": 2.0368045028362758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460870 + }, + { + "epoch": 2.2351847114296906, + "grad_norm": 2.6197435332164787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460880 + }, + { + "epoch": 2.2352332096225265, + "grad_norm": 1.7950014807865955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460890 + }, + { + "epoch": 2.2352817078153624, + "grad_norm": 2.3033553731011125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460900 + }, + { + "epoch": 2.2353302060081988, + "grad_norm": 2.017160660727768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460910 + }, + { + "epoch": 2.2353787042010347, + "grad_norm": 1.9377228710482086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460920 + }, + { + "epoch": 2.2354272023938706, + "grad_norm": 2.3413139871308886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460930 + }, + { + "epoch": 2.235475700586707, + "grad_norm": 1.6821587678350625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460940 + }, + { + "epoch": 2.235524198779543, + "grad_norm": 1.875268829110155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460950 + }, + { + "epoch": 2.2355726969723793, + "grad_norm": 2.1705506725311352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460960 + }, + { + "epoch": 2.235621195165215, + "grad_norm": 2.0543511780601875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460970 + }, + { + "epoch": 2.235669693358051, + "grad_norm": 2.3619644906602844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460980 + }, + { + "epoch": 2.2357181915508875, + "grad_norm": 2.1007920736337837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 460990 + }, + { + "epoch": 2.2357666897437234, + "grad_norm": 1.9799820449861727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461000 + }, + { + "epoch": 2.2358151879365593, + "grad_norm": 2.0986155035984666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461010 + }, + { + "epoch": 2.2358636861293957, + "grad_norm": 1.825924478282559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461020 + }, + { + "epoch": 2.2359121843222316, + "grad_norm": 2.4350642391368638e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461030 + }, + { + "epoch": 2.235960682515068, + "grad_norm": 1.931326920612264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461040 + }, + { + "epoch": 2.236009180707904, + "grad_norm": 1.732002097298846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461050 + }, + { + "epoch": 2.23605767890074, + "grad_norm": 1.8787471134373845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461060 + }, + { + "epoch": 2.236106177093576, + "grad_norm": 1.857474174471463e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461070 + }, + { + "epoch": 2.236154675286412, + "grad_norm": 2.8005812779952066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461080 + }, + { + "epoch": 2.2362031734792485, + "grad_norm": 1.9613997537248906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461090 + }, + { + "epoch": 2.2362516716720844, + "grad_norm": 1.9650052252018213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461100 + }, + { + "epoch": 2.2363001698649203, + "grad_norm": 1.9050480304372286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461110 + }, + { + "epoch": 2.2363486680577567, + "grad_norm": 1.9397102590801296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461120 + }, + { + "epoch": 2.2363971662505926, + "grad_norm": 2.2741014404914495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461130 + }, + { + "epoch": 2.2364456644434285, + "grad_norm": 2.0640447573327947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461140 + }, + { + "epoch": 2.236494162636265, + "grad_norm": 1.9007533325066106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461150 + }, + { + "epoch": 2.236542660829101, + "grad_norm": 1.9179390520207562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461160 + }, + { + "epoch": 2.2365911590219367, + "grad_norm": 2.1161655539003732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461170 + }, + { + "epoch": 2.236639657214773, + "grad_norm": 1.8974166238194812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461180 + }, + { + "epoch": 2.236688155407609, + "grad_norm": 1.712080965887708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461190 + }, + { + "epoch": 2.2367366536004454, + "grad_norm": 1.8209107111033518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461200 + }, + { + "epoch": 2.2367851517932813, + "grad_norm": 1.659327431013935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461210 + }, + { + "epoch": 2.2368336499861172, + "grad_norm": 1.6988080275837092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461220 + }, + { + "epoch": 2.2368821481789536, + "grad_norm": 2.08665102974237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461230 + }, + { + "epoch": 2.2369306463717895, + "grad_norm": 1.9835285414160353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461240 + }, + { + "epoch": 2.236979144564626, + "grad_norm": 2.1686959783551174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461250 + }, + { + "epoch": 2.237027642757462, + "grad_norm": 1.9781134952268076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461260 + }, + { + "epoch": 2.2370761409502977, + "grad_norm": 1.9124648531487765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461270 + }, + { + "epoch": 2.237124639143134, + "grad_norm": 2.113000086012562e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461280 + }, + { + "epoch": 2.23717313733597, + "grad_norm": 1.9010370166938628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461290 + }, + { + "epoch": 2.237221635528806, + "grad_norm": 1.7937802354595078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461300 + }, + { + "epoch": 2.2372701337216423, + "grad_norm": 1.8443623517327978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461310 + }, + { + "epoch": 2.237318631914478, + "grad_norm": 1.917456948774543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461320 + }, + { + "epoch": 2.2373671301073146, + "grad_norm": 2.4911480878131442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461330 + }, + { + "epoch": 2.2374156283001505, + "grad_norm": 2.3305092966552365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461340 + }, + { + "epoch": 2.2374641264929864, + "grad_norm": 2.524886788535241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461350 + }, + { + "epoch": 2.237512624685823, + "grad_norm": 1.804770732860561e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461360 + }, + { + "epoch": 2.2375611228786587, + "grad_norm": 1.7034366806001344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461370 + }, + { + "epoch": 2.2376096210714946, + "grad_norm": 2.0277779455568634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461380 + }, + { + "epoch": 2.237658119264331, + "grad_norm": 1.9124104966294908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461390 + }, + { + "epoch": 2.237706617457167, + "grad_norm": 1.9240157911326605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461400 + }, + { + "epoch": 2.2377551156500033, + "grad_norm": 2.179380231837058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461410 + }, + { + "epoch": 2.237803613842839, + "grad_norm": 1.9130578010617683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461420 + }, + { + "epoch": 2.237852112035675, + "grad_norm": 1.8643556032316155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461430 + }, + { + "epoch": 2.2379006102285115, + "grad_norm": 1.9818376273406102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461440 + }, + { + "epoch": 2.2379491084213474, + "grad_norm": 1.7431160515002375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461450 + }, + { + "epoch": 2.2379976066141833, + "grad_norm": 1.618703038275271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461460 + }, + { + "epoch": 2.2380461048070197, + "grad_norm": 2.057966241864051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461470 + }, + { + "epoch": 2.2380946029998556, + "grad_norm": 2.6204553194020264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461480 + }, + { + "epoch": 2.238143101192692, + "grad_norm": 1.6580324668780122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461490 + }, + { + "epoch": 2.238191599385528, + "grad_norm": 2.0877898521121097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461500 + }, + { + "epoch": 2.238240097578364, + "grad_norm": 2.430692092048048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461510 + }, + { + "epoch": 2.2382885957712, + "grad_norm": 1.8569680904079178e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461520 + }, + { + "epoch": 2.238337093964036, + "grad_norm": 1.7874789648431033e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461530 + }, + { + "epoch": 2.238385592156872, + "grad_norm": 1.8234063148270252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461540 + }, + { + "epoch": 2.2384340903497084, + "grad_norm": 1.7217425707372058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461550 + }, + { + "epoch": 2.2384825885425443, + "grad_norm": 1.6014375603390363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461560 + }, + { + "epoch": 2.2385310867353807, + "grad_norm": 1.8972228232883026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461570 + }, + { + "epoch": 2.2385795849282166, + "grad_norm": 1.9593409561480257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461580 + }, + { + "epoch": 2.2386280831210525, + "grad_norm": 1.982347264117834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461590 + }, + { + "epoch": 2.238676581313889, + "grad_norm": 1.566506924177702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461600 + }, + { + "epoch": 2.238725079506725, + "grad_norm": 1.772226632112961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461610 + }, + { + "epoch": 2.238773577699561, + "grad_norm": 1.8288906389329895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461620 + }, + { + "epoch": 2.238822075892397, + "grad_norm": 1.9359529090934302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461630 + }, + { + "epoch": 2.238870574085233, + "grad_norm": 1.7283044329019503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461640 + }, + { + "epoch": 2.2389190722780694, + "grad_norm": 2.0268187128635873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461650 + }, + { + "epoch": 2.2389675704709053, + "grad_norm": 1.9724824440459088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461660 + }, + { + "epoch": 2.2390160686637413, + "grad_norm": 1.7038280120118543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461670 + }, + { + "epoch": 2.2390645668565776, + "grad_norm": 2.016127176318605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461680 + }, + { + "epoch": 2.2391130650494135, + "grad_norm": 1.6478601594371867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461690 + }, + { + "epoch": 2.2391615632422495, + "grad_norm": 1.8109806987354204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461700 + }, + { + "epoch": 2.239210061435086, + "grad_norm": 1.7884527636624625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461710 + }, + { + "epoch": 2.2392585596279218, + "grad_norm": 2.1344954248547765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461720 + }, + { + "epoch": 2.239307057820758, + "grad_norm": 2.0459639316072753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461730 + }, + { + "epoch": 2.239355556013594, + "grad_norm": 2.1836893182580752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461740 + }, + { + "epoch": 2.23940405420643, + "grad_norm": 1.7309366384665736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461750 + }, + { + "epoch": 2.2394525523992663, + "grad_norm": 2.1466220800903102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461760 + }, + { + "epoch": 2.2395010505921022, + "grad_norm": 1.7945417596365587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461770 + }, + { + "epoch": 2.2395495487849386, + "grad_norm": 1.5496521399427365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461780 + }, + { + "epoch": 2.2395980469777745, + "grad_norm": 1.7389790940569583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461790 + }, + { + "epoch": 2.2396465451706105, + "grad_norm": 1.7059093693205796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461800 + }, + { + "epoch": 2.239695043363447, + "grad_norm": 1.4918482449388648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461810 + }, + { + "epoch": 2.2397435415562827, + "grad_norm": 1.8915532251639888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461820 + }, + { + "epoch": 2.2397920397491187, + "grad_norm": 1.9023911335125376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461830 + }, + { + "epoch": 2.239840537941955, + "grad_norm": 2.489910677638818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461840 + }, + { + "epoch": 2.239889036134791, + "grad_norm": 2.09245705207195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461850 + }, + { + "epoch": 2.2399375343276273, + "grad_norm": 1.9025314657028503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461860 + }, + { + "epoch": 2.2399860325204632, + "grad_norm": 1.710608188432161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461870 + }, + { + "epoch": 2.240034530713299, + "grad_norm": 1.6374759326254207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461880 + }, + { + "epoch": 2.2400830289061355, + "grad_norm": 1.5535860597992723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461890 + }, + { + "epoch": 2.2401315270989715, + "grad_norm": 1.8268373480623268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461900 + }, + { + "epoch": 2.2401800252918074, + "grad_norm": 1.7336439839255036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461910 + }, + { + "epoch": 2.2402285234846437, + "grad_norm": 1.664842308457537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461920 + }, + { + "epoch": 2.2402770216774797, + "grad_norm": 2.0313176918307363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461930 + }, + { + "epoch": 2.240325519870316, + "grad_norm": 1.970802010475836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461940 + }, + { + "epoch": 2.240374018063152, + "grad_norm": 1.5749375137374955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461950 + }, + { + "epoch": 2.240422516255988, + "grad_norm": 1.6291391347067474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461960 + }, + { + "epoch": 2.2404710144488242, + "grad_norm": 1.6416224823956327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461970 + }, + { + "epoch": 2.24051951264166, + "grad_norm": 2.2836434965256558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461980 + }, + { + "epoch": 2.240568010834496, + "grad_norm": 1.6138056224690445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 461990 + }, + { + "epoch": 2.2406165090273324, + "grad_norm": 2.3828013340221332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462000 + }, + { + "epoch": 2.2406650072201684, + "grad_norm": 2.8363340121018155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462010 + }, + { + "epoch": 2.2407135054130047, + "grad_norm": 2.175400126702698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462020 + }, + { + "epoch": 2.2407620036058407, + "grad_norm": 1.6062685403994692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462030 + }, + { + "epoch": 2.2408105017986766, + "grad_norm": 1.9003149276386466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462040 + }, + { + "epoch": 2.240858999991513, + "grad_norm": 1.6374654521200682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462050 + }, + { + "epoch": 2.240907498184349, + "grad_norm": 1.7519393935572225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462060 + }, + { + "epoch": 2.240955996377185, + "grad_norm": 1.743038069434988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462070 + }, + { + "epoch": 2.241004494570021, + "grad_norm": 1.8954191105535756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462080 + }, + { + "epoch": 2.241052992762857, + "grad_norm": 1.6412442960245244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462090 + }, + { + "epoch": 2.2411014909556934, + "grad_norm": 1.768779256394737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462100 + }, + { + "epoch": 2.2411499891485294, + "grad_norm": 1.676322547439213e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462110 + }, + { + "epoch": 2.2411984873413653, + "grad_norm": 1.583791764403486e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462120 + }, + { + "epoch": 2.2412469855342017, + "grad_norm": 2.0115875187798338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462130 + }, + { + "epoch": 2.2412954837270376, + "grad_norm": 2.3558893502695355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462140 + }, + { + "epoch": 2.241343981919874, + "grad_norm": 1.4857766572617948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462150 + }, + { + "epoch": 2.24139248011271, + "grad_norm": 1.7446867062176352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462160 + }, + { + "epoch": 2.241440978305546, + "grad_norm": 1.450968589722379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462170 + }, + { + "epoch": 2.241489476498382, + "grad_norm": 1.6740884234422992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462180 + }, + { + "epoch": 2.241537974691218, + "grad_norm": 1.8033057713751077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462190 + }, + { + "epoch": 2.241586472884054, + "grad_norm": 1.6177809314399383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462200 + }, + { + "epoch": 2.2416349710768904, + "grad_norm": 1.4568016126759176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462210 + }, + { + "epoch": 2.2416834692697263, + "grad_norm": 1.4431615902310568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462220 + }, + { + "epoch": 2.241731967462562, + "grad_norm": 2.0110261900185833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462230 + }, + { + "epoch": 2.2417804656553986, + "grad_norm": 1.8115940747520654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462240 + }, + { + "epoch": 2.2418289638482345, + "grad_norm": 1.740703581276648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462250 + }, + { + "epoch": 2.241877462041071, + "grad_norm": 1.5451151469392244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462260 + }, + { + "epoch": 2.241925960233907, + "grad_norm": 1.633041257775858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462270 + }, + { + "epoch": 2.2419744584267427, + "grad_norm": 2.0097461472801115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462280 + }, + { + "epoch": 2.242022956619579, + "grad_norm": 1.7281021058579427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462290 + }, + { + "epoch": 2.242071454812415, + "grad_norm": 1.5628382143972885e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462300 + }, + { + "epoch": 2.2421199530052514, + "grad_norm": 1.5527469088283397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462310 + }, + { + "epoch": 2.2421684511980873, + "grad_norm": 1.5741942860358904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462320 + }, + { + "epoch": 2.242216949390923, + "grad_norm": 1.4617631549640464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462330 + }, + { + "epoch": 2.2422654475837596, + "grad_norm": 1.6252595713694973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462340 + }, + { + "epoch": 2.2423139457765955, + "grad_norm": 1.72186087610271e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462350 + }, + { + "epoch": 2.2423624439694314, + "grad_norm": 2.174481217309676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462360 + }, + { + "epoch": 2.2424109421622678, + "grad_norm": 2.1479957368342184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462370 + }, + { + "epoch": 2.2424594403551037, + "grad_norm": 2.063205783997546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462380 + }, + { + "epoch": 2.24250793854794, + "grad_norm": 2.0078410045698547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462390 + }, + { + "epoch": 2.242556436740776, + "grad_norm": 1.810330907403568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462400 + }, + { + "epoch": 2.242604934933612, + "grad_norm": 2.0009963463962777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462410 + }, + { + "epoch": 2.2426534331264483, + "grad_norm": 1.7156157383624304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462420 + }, + { + "epoch": 2.242701931319284, + "grad_norm": 1.777652158807541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462430 + }, + { + "epoch": 2.24275042951212, + "grad_norm": 1.3250064156977714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462440 + }, + { + "epoch": 2.2427989277049565, + "grad_norm": 1.422629924974217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462450 + }, + { + "epoch": 2.2428474258977924, + "grad_norm": 1.7924092432508587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462460 + }, + { + "epoch": 2.2428959240906288, + "grad_norm": 1.8917971189580385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462470 + }, + { + "epoch": 2.2429444222834647, + "grad_norm": 1.6616516163026063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462480 + }, + { + "epoch": 2.2429929204763006, + "grad_norm": 1.622790080091363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462490 + }, + { + "epoch": 2.243041418669137, + "grad_norm": 2.0473461148640126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462500 + }, + { + "epoch": 2.243089916861973, + "grad_norm": 1.5452627621925785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462510 + }, + { + "epoch": 2.243138415054809, + "grad_norm": 1.7028174426059195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462520 + }, + { + "epoch": 2.243186913247645, + "grad_norm": 2.0350141127778443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462530 + }, + { + "epoch": 2.243235411440481, + "grad_norm": 1.3016015820710436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462540 + }, + { + "epoch": 2.2432839096333175, + "grad_norm": 1.619351408521652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462550 + }, + { + "epoch": 2.2433324078261534, + "grad_norm": 1.492991152929335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462560 + }, + { + "epoch": 2.2433809060189893, + "grad_norm": 1.7715118261207863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462570 + }, + { + "epoch": 2.2434294042118257, + "grad_norm": 1.6837704563954503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462580 + }, + { + "epoch": 2.2434779024046616, + "grad_norm": 1.4678307458382278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462590 + }, + { + "epoch": 2.2435264005974975, + "grad_norm": 1.8585883054811347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462600 + }, + { + "epoch": 2.243574898790334, + "grad_norm": 1.3554767086532138e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462610 + }, + { + "epoch": 2.24362339698317, + "grad_norm": 1.3842952562015398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462620 + }, + { + "epoch": 2.243671895176006, + "grad_norm": 2.050318848034749e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462630 + }, + { + "epoch": 2.243720393368842, + "grad_norm": 1.564444929158526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462640 + }, + { + "epoch": 2.243768891561678, + "grad_norm": 1.6232307942232183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462650 + }, + { + "epoch": 2.2438173897545144, + "grad_norm": 1.3740842241816154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462660 + }, + { + "epoch": 2.2438658879473503, + "grad_norm": 1.6181751050226012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462670 + }, + { + "epoch": 2.2439143861401867, + "grad_norm": 2.4961844147242118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462680 + }, + { + "epoch": 2.2439628843330226, + "grad_norm": 2.4594013936507508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462690 + }, + { + "epoch": 2.2440113825258585, + "grad_norm": 1.6215386366980056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462700 + }, + { + "epoch": 2.244059880718695, + "grad_norm": 1.3859553504858013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462710 + }, + { + "epoch": 2.244108378911531, + "grad_norm": 1.740736621513861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462720 + }, + { + "epoch": 2.2441568771043667, + "grad_norm": 1.6511195966018022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462730 + }, + { + "epoch": 2.244205375297203, + "grad_norm": 1.3171947976786669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462740 + }, + { + "epoch": 2.244253873490039, + "grad_norm": 1.989790732181973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462750 + }, + { + "epoch": 2.244302371682875, + "grad_norm": 1.5418235577158157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462760 + }, + { + "epoch": 2.2443508698757113, + "grad_norm": 1.4138550774589476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462770 + }, + { + "epoch": 2.2443993680685472, + "grad_norm": 1.805701188573039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462780 + }, + { + "epoch": 2.2444478662613836, + "grad_norm": 1.546505501437423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462790 + }, + { + "epoch": 2.2444963644542195, + "grad_norm": 1.4195173037023778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462800 + }, + { + "epoch": 2.2445448626470554, + "grad_norm": 1.3823563627113344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462810 + }, + { + "epoch": 2.244593360839892, + "grad_norm": 1.3871460424752513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462820 + }, + { + "epoch": 2.2446418590327277, + "grad_norm": 2.232426687953648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462830 + }, + { + "epoch": 2.244690357225564, + "grad_norm": 1.3920243624454542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462840 + }, + { + "epoch": 2.2447388554184, + "grad_norm": 1.6572645478163395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462850 + }, + { + "epoch": 2.244787353611236, + "grad_norm": 1.680824013305937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462860 + }, + { + "epoch": 2.2448358518040723, + "grad_norm": 1.4792869151847299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462870 + }, + { + "epoch": 2.2448843499969082, + "grad_norm": 1.7552487463490252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462880 + }, + { + "epoch": 2.244932848189744, + "grad_norm": 1.346626099518744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462890 + }, + { + "epoch": 2.2449813463825805, + "grad_norm": 1.953831407774942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462900 + }, + { + "epoch": 2.2450298445754164, + "grad_norm": 1.44070595453627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462910 + }, + { + "epoch": 2.245078342768253, + "grad_norm": 1.7176084554648696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462920 + }, + { + "epoch": 2.2451268409610887, + "grad_norm": 1.862943932451344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462930 + }, + { + "epoch": 2.2451753391539246, + "grad_norm": 1.6562937688036072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462940 + }, + { + "epoch": 2.245223837346761, + "grad_norm": 1.3843571622373929e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462950 + }, + { + "epoch": 2.245272335539597, + "grad_norm": 1.67712492782357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462960 + }, + { + "epoch": 2.245320833732433, + "grad_norm": 1.3446854296716992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462970 + }, + { + "epoch": 2.245369331925269, + "grad_norm": 2.4230605077946166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462980 + }, + { + "epoch": 2.245417830118105, + "grad_norm": 1.5270526176891508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 462990 + }, + { + "epoch": 2.2454663283109415, + "grad_norm": 1.9500349779377757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463000 + }, + { + "epoch": 2.2455148265037774, + "grad_norm": 1.492540846470547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463010 + }, + { + "epoch": 2.2455633246966133, + "grad_norm": 1.6742866648655763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463020 + }, + { + "epoch": 2.2456118228894497, + "grad_norm": 1.6777152112013027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463030 + }, + { + "epoch": 2.2456603210822856, + "grad_norm": 2.180506442073238e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463040 + }, + { + "epoch": 2.2457088192751216, + "grad_norm": 1.3772769591469114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463050 + }, + { + "epoch": 2.245757317467958, + "grad_norm": 1.4285707727879071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463060 + }, + { + "epoch": 2.245805815660794, + "grad_norm": 1.361651591480495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463070 + }, + { + "epoch": 2.24585431385363, + "grad_norm": 1.633746116169732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463080 + }, + { + "epoch": 2.245902812046466, + "grad_norm": 1.9234466464013167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463090 + }, + { + "epoch": 2.245951310239302, + "grad_norm": 1.4333598308269302e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463100 + }, + { + "epoch": 2.2459998084321384, + "grad_norm": 1.2773536894883364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463110 + }, + { + "epoch": 2.2460483066249743, + "grad_norm": 1.3954911892710697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463120 + }, + { + "epoch": 2.2460968048178103, + "grad_norm": 1.4559008221226577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463130 + }, + { + "epoch": 2.2461453030106466, + "grad_norm": 1.49665453363923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463140 + }, + { + "epoch": 2.2461938012034826, + "grad_norm": 1.5658802254847615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463150 + }, + { + "epoch": 2.246242299396319, + "grad_norm": 1.6715279826939877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463160 + }, + { + "epoch": 2.246290797589155, + "grad_norm": 2.40783233351749e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463170 + }, + { + "epoch": 2.2463392957819908, + "grad_norm": 1.860601450687227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463180 + }, + { + "epoch": 2.246387793974827, + "grad_norm": 1.7120399320447177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463190 + }, + { + "epoch": 2.246436292167663, + "grad_norm": 1.8547188673778692e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463200 + }, + { + "epoch": 2.2464847903604994, + "grad_norm": 1.555189044211147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463210 + }, + { + "epoch": 2.2465332885533353, + "grad_norm": 1.564306018053685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463220 + }, + { + "epoch": 2.2465817867461713, + "grad_norm": 1.8286812064616242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463230 + }, + { + "epoch": 2.2466302849390076, + "grad_norm": 1.4708634310522939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463240 + }, + { + "epoch": 2.2466787831318435, + "grad_norm": 1.357232548571119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463250 + }, + { + "epoch": 2.2467272813246795, + "grad_norm": 1.622982992444122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463260 + }, + { + "epoch": 2.246775779517516, + "grad_norm": 1.375542435511079e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463270 + }, + { + "epoch": 2.2468242777103518, + "grad_norm": 1.4834173889255453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463280 + }, + { + "epoch": 2.2468727759031877, + "grad_norm": 1.2398118975909256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463290 + }, + { + "epoch": 2.246921274096024, + "grad_norm": 1.4346869470216461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463300 + }, + { + "epoch": 2.24696977228886, + "grad_norm": 1.4217109267633532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463310 + }, + { + "epoch": 2.2470182704816963, + "grad_norm": 1.2902583890195274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463320 + }, + { + "epoch": 2.2470667686745323, + "grad_norm": 1.8147352065511768e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463330 + }, + { + "epoch": 2.247115266867368, + "grad_norm": 1.5305280598454374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463340 + }, + { + "epoch": 2.2471637650602045, + "grad_norm": 1.4806628811925293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463350 + }, + { + "epoch": 2.2472122632530405, + "grad_norm": 1.3833483691882975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463360 + }, + { + "epoch": 2.247260761445877, + "grad_norm": 1.8819514835399787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463370 + }, + { + "epoch": 2.2473092596387128, + "grad_norm": 1.4396801972793583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463380 + }, + { + "epoch": 2.2473577578315487, + "grad_norm": 1.5503859529530928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463390 + }, + { + "epoch": 2.247406256024385, + "grad_norm": 1.4675155313170762e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463400 + }, + { + "epoch": 2.247454754217221, + "grad_norm": 2.0946496093188216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463410 + }, + { + "epoch": 2.247503252410057, + "grad_norm": 1.4092171873869574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463420 + }, + { + "epoch": 2.2475517506028933, + "grad_norm": 2.089358197565616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463430 + }, + { + "epoch": 2.247600248795729, + "grad_norm": 1.598927212853596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463440 + }, + { + "epoch": 2.2476487469885655, + "grad_norm": 1.2033098073516157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463450 + }, + { + "epoch": 2.2476972451814015, + "grad_norm": 1.419721229467541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463460 + }, + { + "epoch": 2.2477457433742374, + "grad_norm": 1.7238665606100767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463470 + }, + { + "epoch": 2.2477942415670737, + "grad_norm": 1.5846206125047502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463480 + }, + { + "epoch": 2.2478427397599097, + "grad_norm": 1.3306338253471495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463490 + }, + { + "epoch": 2.2478912379527456, + "grad_norm": 1.43449776501825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463500 + }, + { + "epoch": 2.247939736145582, + "grad_norm": 2.727229997390168e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463510 + }, + { + "epoch": 2.247988234338418, + "grad_norm": 1.6238310251992516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463520 + }, + { + "epoch": 2.2480367325312542, + "grad_norm": 1.5597262148503432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463530 + }, + { + "epoch": 2.24808523072409, + "grad_norm": 1.375536395897825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463540 + }, + { + "epoch": 2.248133728916926, + "grad_norm": 1.4593894093195559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463550 + }, + { + "epoch": 2.2481822271097625, + "grad_norm": 1.393757909085025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463560 + }, + { + "epoch": 2.2482307253025984, + "grad_norm": 1.2890350120642324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463570 + }, + { + "epoch": 2.2482792234954343, + "grad_norm": 1.6564994709256098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463580 + }, + { + "epoch": 2.2483277216882707, + "grad_norm": 1.1285576917430262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463590 + }, + { + "epoch": 2.2483762198811066, + "grad_norm": 1.790181158867199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463600 + }, + { + "epoch": 2.248424718073943, + "grad_norm": 1.4494141886700618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463610 + }, + { + "epoch": 2.248473216266779, + "grad_norm": 1.254770687353357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463620 + }, + { + "epoch": 2.248521714459615, + "grad_norm": 1.1376211972446981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463630 + }, + { + "epoch": 2.248570212652451, + "grad_norm": 1.061363175125507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463640 + }, + { + "epoch": 2.248618710845287, + "grad_norm": 1.630079538017526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463650 + }, + { + "epoch": 2.248667209038123, + "grad_norm": 1.566044183221038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463660 + }, + { + "epoch": 2.2487157072309594, + "grad_norm": 1.6983904060907662e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463670 + }, + { + "epoch": 2.2487642054237953, + "grad_norm": 1.4642258960861909e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463680 + }, + { + "epoch": 2.2488127036166317, + "grad_norm": 1.7430542342822264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463690 + }, + { + "epoch": 2.2488612018094676, + "grad_norm": 1.6227453158990102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463700 + }, + { + "epoch": 2.2489097000023035, + "grad_norm": 1.547702055404443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463710 + }, + { + "epoch": 2.24895819819514, + "grad_norm": 1.6543983960559672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463720 + }, + { + "epoch": 2.249006696387976, + "grad_norm": 1.7014217590372027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463730 + }, + { + "epoch": 2.249055194580812, + "grad_norm": 1.584007947030841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463740 + }, + { + "epoch": 2.249103692773648, + "grad_norm": 1.7166197352480594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463750 + }, + { + "epoch": 2.249152190966484, + "grad_norm": 1.1344087447184847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463760 + }, + { + "epoch": 2.2492006891593204, + "grad_norm": 1.6775391742385182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463770 + }, + { + "epoch": 2.2492491873521563, + "grad_norm": 1.5087143978576023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463780 + }, + { + "epoch": 2.249297685544992, + "grad_norm": 1.5639132655564936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463790 + }, + { + "epoch": 2.2493461837378286, + "grad_norm": 1.4464234254774055e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463800 + }, + { + "epoch": 2.2493946819306645, + "grad_norm": 1.5478299530968798e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463810 + }, + { + "epoch": 2.2494431801235004, + "grad_norm": 1.976172292472711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463820 + }, + { + "epoch": 2.249491678316337, + "grad_norm": 1.3360290651576179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463830 + }, + { + "epoch": 2.2495401765091727, + "grad_norm": 1.5994169544342185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463840 + }, + { + "epoch": 2.249588674702009, + "grad_norm": 1.2454327347199978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463850 + }, + { + "epoch": 2.249637172894845, + "grad_norm": 1.8777587484919422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463860 + }, + { + "epoch": 2.249685671087681, + "grad_norm": 1.8172775284597265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463870 + }, + { + "epoch": 2.2497341692805173, + "grad_norm": 1.5487406912484403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463880 + }, + { + "epoch": 2.249782667473353, + "grad_norm": 1.0678098405492165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463890 + }, + { + "epoch": 2.2498311656661896, + "grad_norm": 1.4122562674856454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463900 + }, + { + "epoch": 2.2498796638590255, + "grad_norm": 1.3495148998288187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463910 + }, + { + "epoch": 2.2499281620518614, + "grad_norm": 1.4705747730658913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463920 + }, + { + "epoch": 2.249976660244698, + "grad_norm": 1.2513591052254469e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463930 + }, + { + "epoch": 2.2500251584375337, + "grad_norm": 1.6890115972501007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463940 + }, + { + "epoch": 2.2500736566303696, + "grad_norm": 1.1842277380935684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463950 + }, + { + "epoch": 2.250122154823206, + "grad_norm": 1.3076440374959475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463960 + }, + { + "epoch": 2.250170653016042, + "grad_norm": 1.2956391515217547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463970 + }, + { + "epoch": 2.250219151208878, + "grad_norm": 1.5576379297499443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463980 + }, + { + "epoch": 2.250267649401714, + "grad_norm": 1.404177307762211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 463990 + }, + { + "epoch": 2.25031614759455, + "grad_norm": 1.2608381005918545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464000 + }, + { + "epoch": 2.2503646457873865, + "grad_norm": 1.2012224992474785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464010 + }, + { + "epoch": 2.2504131439802224, + "grad_norm": 1.3458405057065193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464020 + }, + { + "epoch": 2.2504616421730583, + "grad_norm": 2.0812205292486397e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464030 + }, + { + "epoch": 2.2505101403658947, + "grad_norm": 1.636550983619145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464040 + }, + { + "epoch": 2.2505586385587306, + "grad_norm": 1.2307558527879792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464050 + }, + { + "epoch": 2.250607136751567, + "grad_norm": 1.7862545220737047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464060 + }, + { + "epoch": 2.250655634944403, + "grad_norm": 1.4444812457270473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464070 + }, + { + "epoch": 2.250704133137239, + "grad_norm": 2.1075427625305565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464080 + }, + { + "epoch": 2.250752631330075, + "grad_norm": 1.8884344754610538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464090 + }, + { + "epoch": 2.250801129522911, + "grad_norm": 1.3837387236037557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464100 + }, + { + "epoch": 2.2508496277157475, + "grad_norm": 1.242079505914262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464110 + }, + { + "epoch": 2.2508981259085834, + "grad_norm": 1.3818704402979165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464120 + }, + { + "epoch": 2.2509466241014193, + "grad_norm": 1.7286327036458715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464130 + }, + { + "epoch": 2.2509951222942557, + "grad_norm": 1.148139094908629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464140 + }, + { + "epoch": 2.2510436204870916, + "grad_norm": 1.6397299518189357e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464150 + }, + { + "epoch": 2.2510921186799275, + "grad_norm": 9.796914390847178e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464160 + }, + { + "epoch": 2.251140616872764, + "grad_norm": 1.473321642464498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464170 + }, + { + "epoch": 2.2511891150656, + "grad_norm": 1.528311166509866e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464180 + }, + { + "epoch": 2.2512376132584357, + "grad_norm": 2.171635138381589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464190 + }, + { + "epoch": 2.251286111451272, + "grad_norm": 1.1810074695972617e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464200 + }, + { + "epoch": 2.251334609644108, + "grad_norm": 1.783398673183001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464210 + }, + { + "epoch": 2.2513831078369444, + "grad_norm": 1.263016446984011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464220 + }, + { + "epoch": 2.2514316060297803, + "grad_norm": 1.9104129833635852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464230 + }, + { + "epoch": 2.2514801042226162, + "grad_norm": 1.771941882111605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464240 + }, + { + "epoch": 2.2515286024154526, + "grad_norm": 1.660675152947988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464250 + }, + { + "epoch": 2.2515771006082885, + "grad_norm": 1.6438182370848153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464260 + }, + { + "epoch": 2.251625598801125, + "grad_norm": 1.731305943053485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464270 + }, + { + "epoch": 2.251674096993961, + "grad_norm": 1.7586389233770205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464280 + }, + { + "epoch": 2.2517225951867967, + "grad_norm": 1.3146653543572029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464290 + }, + { + "epoch": 2.251771093379633, + "grad_norm": 1.6427778248839786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464300 + }, + { + "epoch": 2.251819591572469, + "grad_norm": 1.7457956857924728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464310 + }, + { + "epoch": 2.251868089765305, + "grad_norm": 1.7402543406319637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464320 + }, + { + "epoch": 2.2519165879581413, + "grad_norm": 1.502874624748074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464330 + }, + { + "epoch": 2.2519650861509772, + "grad_norm": 1.057338039345268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464340 + }, + { + "epoch": 2.252013584343813, + "grad_norm": 1.4812409077080702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464350 + }, + { + "epoch": 2.2520620825366495, + "grad_norm": 1.9396155792605896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464360 + }, + { + "epoch": 2.2521105807294854, + "grad_norm": 1.4200611353487602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464370 + }, + { + "epoch": 2.252159078922322, + "grad_norm": 1.360887580403869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464380 + }, + { + "epoch": 2.2522075771151577, + "grad_norm": 1.4893649868952252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464390 + }, + { + "epoch": 2.2522560753079937, + "grad_norm": 1.789035231070102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464400 + }, + { + "epoch": 2.25230457350083, + "grad_norm": 1.3646959118318591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464410 + }, + { + "epoch": 2.252353071693666, + "grad_norm": 1.554825068694754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464420 + }, + { + "epoch": 2.2524015698865023, + "grad_norm": 1.8778548493969538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464430 + }, + { + "epoch": 2.2524500680793382, + "grad_norm": 1.843399033418791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464440 + }, + { + "epoch": 2.252498566272174, + "grad_norm": 1.1866837290597232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464450 + }, + { + "epoch": 2.2525470644650105, + "grad_norm": 1.5358951444000013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464460 + }, + { + "epoch": 2.2525955626578464, + "grad_norm": 2.077448790771541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464470 + }, + { + "epoch": 2.2526440608506824, + "grad_norm": 1.1960660017962255e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464480 + }, + { + "epoch": 2.2526925590435187, + "grad_norm": 1.4540352033520776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464490 + }, + { + "epoch": 2.2527410572363546, + "grad_norm": 1.2279060435105293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464500 + }, + { + "epoch": 2.2527895554291906, + "grad_norm": 1.1554276646563721e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464510 + }, + { + "epoch": 2.252838053622027, + "grad_norm": 1.3148353517067335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464520 + }, + { + "epoch": 2.252886551814863, + "grad_norm": 1.3070610371812563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464530 + }, + { + "epoch": 2.2529350500076992, + "grad_norm": 1.5832245736646655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464540 + }, + { + "epoch": 2.252983548200535, + "grad_norm": 1.2629173262723725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464550 + }, + { + "epoch": 2.253032046393371, + "grad_norm": 1.3079529459503192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464560 + }, + { + "epoch": 2.2530805445862074, + "grad_norm": 1.3694923417517657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464570 + }, + { + "epoch": 2.2531290427790434, + "grad_norm": 2.1338905753509607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464580 + }, + { + "epoch": 2.2531775409718797, + "grad_norm": 1.1353010087589155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464590 + }, + { + "epoch": 2.2532260391647156, + "grad_norm": 1.1628290330634172e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464600 + }, + { + "epoch": 2.2532745373575516, + "grad_norm": 1.7638488003512975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464610 + }, + { + "epoch": 2.253323035550388, + "grad_norm": 1.1010097722419232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464620 + }, + { + "epoch": 2.253371533743224, + "grad_norm": 1.4017053295845017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464630 + }, + { + "epoch": 2.25342003193606, + "grad_norm": 1.2614305155977945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464640 + }, + { + "epoch": 2.253468530128896, + "grad_norm": 1.4267353520835968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464650 + }, + { + "epoch": 2.253517028321732, + "grad_norm": 1.3932456965903839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464660 + }, + { + "epoch": 2.2535655265145684, + "grad_norm": 1.4603851461458817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464670 + }, + { + "epoch": 2.2536140247074044, + "grad_norm": 1.275622185659131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464680 + }, + { + "epoch": 2.2536625229002403, + "grad_norm": 1.4136636750095022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464690 + }, + { + "epoch": 2.2537110210930766, + "grad_norm": 1.737051746886209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464700 + }, + { + "epoch": 2.2537595192859126, + "grad_norm": 1.3412155830394568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464710 + }, + { + "epoch": 2.2538080174787485, + "grad_norm": 1.8526508327454394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464720 + }, + { + "epoch": 2.253856515671585, + "grad_norm": 1.732247234542683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464730 + }, + { + "epoch": 2.2539050138644208, + "grad_norm": 1.7680605424175155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464740 + }, + { + "epoch": 2.253953512057257, + "grad_norm": 2.173486279843928e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464750 + }, + { + "epoch": 2.254002010250093, + "grad_norm": 1.1493677121166002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464760 + }, + { + "epoch": 2.254050508442929, + "grad_norm": 1.388589510042948e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464770 + }, + { + "epoch": 2.2540990066357653, + "grad_norm": 1.5970709199564226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464780 + }, + { + "epoch": 2.2541475048286013, + "grad_norm": 1.6996807516989065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464790 + }, + { + "epoch": 2.2541960030214376, + "grad_norm": 1.4672807857607495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464800 + }, + { + "epoch": 2.2542445012142736, + "grad_norm": 1.2323154052751306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464810 + }, + { + "epoch": 2.2542929994071095, + "grad_norm": 1.2721464770493185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464820 + }, + { + "epoch": 2.254341497599946, + "grad_norm": 2.216969186008555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464830 + }, + { + "epoch": 2.2543899957927818, + "grad_norm": 1.7072096625270206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464840 + }, + { + "epoch": 2.2544384939856177, + "grad_norm": 1.557017270670258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464850 + }, + { + "epoch": 2.254486992178454, + "grad_norm": 1.421066020412809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464860 + }, + { + "epoch": 2.25453549037129, + "grad_norm": 1.5929284558069412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464870 + }, + { + "epoch": 2.254583988564126, + "grad_norm": 1.3676051402455869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464880 + }, + { + "epoch": 2.2546324867569623, + "grad_norm": 1.2668911253399529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464890 + }, + { + "epoch": 2.254680984949798, + "grad_norm": 1.1193771243256379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464900 + }, + { + "epoch": 2.2547294831426345, + "grad_norm": 9.47418765662178e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464910 + }, + { + "epoch": 2.2547779813354705, + "grad_norm": 1.5945088804869556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464920 + }, + { + "epoch": 2.2548264795283064, + "grad_norm": 1.6348222331430406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464930 + }, + { + "epoch": 2.2548749777211428, + "grad_norm": 1.779782010657982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464940 + }, + { + "epoch": 2.2549234759139787, + "grad_norm": 1.7139607066951612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464950 + }, + { + "epoch": 2.254971974106815, + "grad_norm": 1.6871075203539476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464960 + }, + { + "epoch": 2.255020472299651, + "grad_norm": 1.1545746581020921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464970 + }, + { + "epoch": 2.255068970492487, + "grad_norm": 1.160827522994623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464980 + }, + { + "epoch": 2.2551174686853233, + "grad_norm": 1.1605601812902933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 464990 + }, + { + "epoch": 2.255165966878159, + "grad_norm": 1.4137880199882602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465000 + }, + { + "epoch": 2.255214465070995, + "grad_norm": 1.5421122157022182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465010 + }, + { + "epoch": 2.2552629632638315, + "grad_norm": 1.1035986346996651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465020 + }, + { + "epoch": 2.2553114614566674, + "grad_norm": 1.8836495030427614e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465030 + }, + { + "epoch": 2.2553599596495038, + "grad_norm": 1.0962511787226958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465040 + }, + { + "epoch": 2.2554084578423397, + "grad_norm": 1.1699785140706354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465050 + }, + { + "epoch": 2.2554569560351756, + "grad_norm": 1.6301378025218582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465060 + }, + { + "epoch": 2.255505454228012, + "grad_norm": 1.1376848796373906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465070 + }, + { + "epoch": 2.255553952420848, + "grad_norm": 2.487609762624743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465080 + }, + { + "epoch": 2.255602450613684, + "grad_norm": 2.0779269860327076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465090 + }, + { + "epoch": 2.25565094880652, + "grad_norm": 2.2138491928558324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465100 + }, + { + "epoch": 2.255699446999356, + "grad_norm": 2.6835243360778804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465110 + }, + { + "epoch": 2.2557479451921925, + "grad_norm": 1.8546652214013193e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465120 + }, + { + "epoch": 2.2557964433850284, + "grad_norm": 1.6170613292842972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465130 + }, + { + "epoch": 2.2558449415778643, + "grad_norm": 9.181966298399402e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465140 + }, + { + "epoch": 2.2558934397707007, + "grad_norm": 2.0615093632159187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465150 + }, + { + "epoch": 2.2559419379635366, + "grad_norm": 1.4433332751195849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465160 + }, + { + "epoch": 2.255990436156373, + "grad_norm": 1.5199161040868603e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465170 + }, + { + "epoch": 2.256038934349209, + "grad_norm": 1.3682403654513564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465180 + }, + { + "epoch": 2.256087432542045, + "grad_norm": 1.6036224792514986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465190 + }, + { + "epoch": 2.256135930734881, + "grad_norm": 1.3237166918145249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465200 + }, + { + "epoch": 2.256184428927717, + "grad_norm": 1.3109573870906388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465210 + }, + { + "epoch": 2.256232927120553, + "grad_norm": 1.585020648064983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465220 + }, + { + "epoch": 2.2562814253133894, + "grad_norm": 1.659780579643666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465230 + }, + { + "epoch": 2.2563299235062253, + "grad_norm": 1.2427494588962418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465240 + }, + { + "epoch": 2.256378421699061, + "grad_norm": 1.2275806149375512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465250 + }, + { + "epoch": 2.2564269198918976, + "grad_norm": 1.2284929518102672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465260 + }, + { + "epoch": 2.2564754180847335, + "grad_norm": 1.3111776553387244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465270 + }, + { + "epoch": 2.25652391627757, + "grad_norm": 1.5697313671125812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465280 + }, + { + "epoch": 2.256572414470406, + "grad_norm": 9.631554220845828e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465290 + }, + { + "epoch": 2.2566209126632417, + "grad_norm": 1.4476974286026234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465300 + }, + { + "epoch": 2.256669410856078, + "grad_norm": 1.3974204016165004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465310 + }, + { + "epoch": 2.256717909048914, + "grad_norm": 9.562572955701398e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465320 + }, + { + "epoch": 2.2567664072417504, + "grad_norm": 1.5563729860446074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465330 + }, + { + "epoch": 2.2568149054345863, + "grad_norm": 1.1310540060094354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465340 + }, + { + "epoch": 2.256863403627422, + "grad_norm": 1.399642801658274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465350 + }, + { + "epoch": 2.2569119018202586, + "grad_norm": 1.26490906637855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465360 + }, + { + "epoch": 2.2569604000130945, + "grad_norm": 1.0976357600611664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465370 + }, + { + "epoch": 2.2570088982059304, + "grad_norm": 1.5565571942488532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465380 + }, + { + "epoch": 2.257057396398767, + "grad_norm": 1.5440155820556356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465390 + }, + { + "epoch": 2.2571058945916027, + "grad_norm": 9.875301465456232e-09, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465400 + }, + { + "epoch": 2.2571543927844386, + "grad_norm": 1.4681567961360997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465410 + }, + { + "epoch": 2.257202890977275, + "grad_norm": 1.1407396804941072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465420 + }, + { + "epoch": 2.257251389170111, + "grad_norm": 1.4314415430760619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465430 + }, + { + "epoch": 2.2572998873629473, + "grad_norm": 1.4826972538628524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465440 + }, + { + "epoch": 2.257348385555783, + "grad_norm": 1.1144112299632525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465450 + }, + { + "epoch": 2.257396883748619, + "grad_norm": 1.967840468353188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465460 + }, + { + "epoch": 2.2574453819414555, + "grad_norm": 1.182830988710748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465470 + }, + { + "epoch": 2.2574938801342914, + "grad_norm": 1.4293278560728595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465480 + }, + { + "epoch": 2.257542378327128, + "grad_norm": 1.535351223935777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465490 + }, + { + "epoch": 2.2575908765199637, + "grad_norm": 1.0449300980042153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465500 + }, + { + "epoch": 2.2576393747127996, + "grad_norm": 1.181696607233107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465510 + }, + { + "epoch": 2.257687872905636, + "grad_norm": 1.665135407336038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465520 + }, + { + "epoch": 2.257736371098472, + "grad_norm": 1.6628764143433727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465530 + }, + { + "epoch": 2.257784869291308, + "grad_norm": 1.0937664995935847e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465540 + }, + { + "epoch": 2.257833367484144, + "grad_norm": 1.1290408608033431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465550 + }, + { + "epoch": 2.25788186567698, + "grad_norm": 1.4175434159824363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465560 + }, + { + "epoch": 2.2579303638698165, + "grad_norm": 1.3927101250033047e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465570 + }, + { + "epoch": 2.2579788620626524, + "grad_norm": 1.5357695559714557e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465580 + }, + { + "epoch": 2.2580273602554883, + "grad_norm": 0.1546812355518341, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 465590 + }, + { + "epoch": 2.2580758584483247, + "grad_norm": 3.5737818393499765e-07, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 465600 + }, + { + "epoch": 2.2581243566411606, + "grad_norm": 2.0290666725486517e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465610 + }, + { + "epoch": 2.2581728548339965, + "grad_norm": 9.840508937486447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465620 + }, + { + "epoch": 2.258221353026833, + "grad_norm": 5.191009677218972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465630 + }, + { + "epoch": 2.258269851219669, + "grad_norm": 1.3626970940094907e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465640 + }, + { + "epoch": 2.258318349412505, + "grad_norm": 4.519132744462695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465650 + }, + { + "epoch": 2.258366847605341, + "grad_norm": 2.104048007822712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465660 + }, + { + "epoch": 2.258415345798177, + "grad_norm": 1.4704833120049443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465670 + }, + { + "epoch": 2.2584638439910134, + "grad_norm": 6.69771225147997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465680 + }, + { + "epoch": 2.2585123421838493, + "grad_norm": 8.657121384203492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465690 + }, + { + "epoch": 2.2585608403766857, + "grad_norm": 8.814262173473253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465700 + }, + { + "epoch": 2.2586093385695216, + "grad_norm": 4.5974203999321617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465710 + }, + { + "epoch": 2.2586578367623575, + "grad_norm": 3.8823532122478355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465720 + }, + { + "epoch": 2.258706334955194, + "grad_norm": 4.6748732529522385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465730 + }, + { + "epoch": 2.25875483314803, + "grad_norm": 3.630289313605317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465740 + }, + { + "epoch": 2.2588033313408657, + "grad_norm": 3.3134921295641107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465750 + }, + { + "epoch": 2.258851829533702, + "grad_norm": 3.1295903113459644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465760 + }, + { + "epoch": 2.258900327726538, + "grad_norm": 0.0016524717211723328, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465770 + }, + { + "epoch": 2.258948825919374, + "grad_norm": 4.2707165448518936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465780 + }, + { + "epoch": 2.2589973241122103, + "grad_norm": 3.4576993357404717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465790 + }, + { + "epoch": 2.2590458223050462, + "grad_norm": 3.5278810628369683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465800 + }, + { + "epoch": 2.2590943204978826, + "grad_norm": 2.983182127991313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465810 + }, + { + "epoch": 2.2591428186907185, + "grad_norm": 3.010757154697785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465820 + }, + { + "epoch": 2.2591913168835545, + "grad_norm": 3.374386494670034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465830 + }, + { + "epoch": 2.259239815076391, + "grad_norm": 3.072924812386191e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465840 + }, + { + "epoch": 2.2592883132692267, + "grad_norm": 2.5860734353955195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465850 + }, + { + "epoch": 2.259336811462063, + "grad_norm": 2.976549922095728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465860 + }, + { + "epoch": 2.259385309654899, + "grad_norm": 2.8141377583779104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465870 + }, + { + "epoch": 2.259433807847735, + "grad_norm": 3.1882302664598683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465880 + }, + { + "epoch": 2.2594823060405713, + "grad_norm": 3.0038242471164267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465890 + }, + { + "epoch": 2.2595308042334072, + "grad_norm": 2.490040742486599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465900 + }, + { + "epoch": 2.259579302426243, + "grad_norm": 2.01880283157152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465910 + }, + { + "epoch": 2.2596278006190795, + "grad_norm": 2.205213434081088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465920 + }, + { + "epoch": 2.2596762988119155, + "grad_norm": 2.571067625467549e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465930 + }, + { + "epoch": 2.2597247970047514, + "grad_norm": 1.8363702736223786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465940 + }, + { + "epoch": 2.2597732951975877, + "grad_norm": 1.717106812293423e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465950 + }, + { + "epoch": 2.2598217933904237, + "grad_norm": 1.7967778376259957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465960 + }, + { + "epoch": 2.25987029158326, + "grad_norm": 2.2578461766897817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465970 + }, + { + "epoch": 2.259918789776096, + "grad_norm": 2.2959036982683756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465980 + }, + { + "epoch": 2.259967287968932, + "grad_norm": 1.7192276402511197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 465990 + }, + { + "epoch": 2.2600157861617682, + "grad_norm": 1.7388794049111311e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466000 + }, + { + "epoch": 2.260064284354604, + "grad_norm": 2.2778691288749542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466010 + }, + { + "epoch": 2.2601127825474405, + "grad_norm": 1.59690713985583e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466020 + }, + { + "epoch": 2.2601612807402764, + "grad_norm": 2.284115367956474e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466030 + }, + { + "epoch": 2.2602097789331124, + "grad_norm": 1.4740469111984567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466040 + }, + { + "epoch": 2.2602582771259487, + "grad_norm": 1.4117536295543687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466050 + }, + { + "epoch": 2.2603067753187847, + "grad_norm": 1.6329602203768445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466060 + }, + { + "epoch": 2.260355273511621, + "grad_norm": 1.7663246865140536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466070 + }, + { + "epoch": 2.260403771704457, + "grad_norm": 1.8311561689188238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466080 + }, + { + "epoch": 2.260452269897293, + "grad_norm": 1.43275713071489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466090 + }, + { + "epoch": 2.2605007680901292, + "grad_norm": 1.3292921607899189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466100 + }, + { + "epoch": 2.260549266282965, + "grad_norm": 1.664303539428147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466110 + }, + { + "epoch": 2.260597764475801, + "grad_norm": 1.3198642534462124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466120 + }, + { + "epoch": 2.2606462626686374, + "grad_norm": 1.6055844298534794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466130 + }, + { + "epoch": 2.2606947608614734, + "grad_norm": 1.3406236121227266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466140 + }, + { + "epoch": 2.2607432590543093, + "grad_norm": 1.471818649179113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466150 + }, + { + "epoch": 2.2607917572471457, + "grad_norm": 1.3058341608029878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466160 + }, + { + "epoch": 2.2608402554399816, + "grad_norm": 1.1719110659669241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466170 + }, + { + "epoch": 2.260888753632818, + "grad_norm": 1.4080039534292155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466180 + }, + { + "epoch": 2.260937251825654, + "grad_norm": 1.3046064850641415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466190 + }, + { + "epoch": 2.26098575001849, + "grad_norm": 1.2963394624421198e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466200 + }, + { + "epoch": 2.261034248211326, + "grad_norm": 1.4376932711002155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466210 + }, + { + "epoch": 2.261082746404162, + "grad_norm": 1.2021973816445097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466220 + }, + { + "epoch": 2.2611312445969984, + "grad_norm": 1.534999114483071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466230 + }, + { + "epoch": 2.2611797427898344, + "grad_norm": 1.119700883123187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466240 + }, + { + "epoch": 2.2612282409826703, + "grad_norm": 9.962317903955409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466250 + }, + { + "epoch": 2.2612767391755066, + "grad_norm": 1.1801881072415199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466260 + }, + { + "epoch": 2.2613252373683426, + "grad_norm": 9.842901960155359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466270 + }, + { + "epoch": 2.2613737355611785, + "grad_norm": 1.3859981606856309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466280 + }, + { + "epoch": 2.261422233754015, + "grad_norm": 1.1226655516338724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466290 + }, + { + "epoch": 2.2614707319468508, + "grad_norm": 1.5127764640965324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466300 + }, + { + "epoch": 2.2615192301396867, + "grad_norm": 9.449638582736952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466310 + }, + { + "epoch": 2.261567728332523, + "grad_norm": 1.2025702744722366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466320 + }, + { + "epoch": 2.261616226525359, + "grad_norm": 1.316409736773494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466330 + }, + { + "epoch": 2.2616647247181954, + "grad_norm": 9.680285018021095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466340 + }, + { + "epoch": 2.2617132229110313, + "grad_norm": 9.770278808218791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466350 + }, + { + "epoch": 2.261761721103867, + "grad_norm": 1.8551018001744524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466360 + }, + { + "epoch": 2.2618102192967036, + "grad_norm": 9.434646131012414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466370 + }, + { + "epoch": 2.2618587174895395, + "grad_norm": 1.2201581967019592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466380 + }, + { + "epoch": 2.261907215682376, + "grad_norm": 9.67621858194434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466390 + }, + { + "epoch": 2.2619557138752118, + "grad_norm": 1.3772440070169978e-05, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 466400 + }, + { + "epoch": 2.2620042120680477, + "grad_norm": 0.00020283334015402943, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 466410 + }, + { + "epoch": 2.262052710260884, + "grad_norm": 0.00045913024223409593, + "learning_rate": 0.0002, + "loss": 0.0011, + "step": 466420 + }, + { + "epoch": 2.26210120845372, + "grad_norm": 0.1670963615179062, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 466430 + }, + { + "epoch": 2.262149706646556, + "grad_norm": 0.00024132135149557143, + "learning_rate": 0.0002, + "loss": 0.0049, + "step": 466440 + }, + { + "epoch": 2.2621982048393923, + "grad_norm": 0.00020669704827014357, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 466450 + }, + { + "epoch": 2.262246703032228, + "grad_norm": 0.06114362180233002, + "learning_rate": 0.0002, + "loss": 0.001, + "step": 466460 + }, + { + "epoch": 2.262295201225064, + "grad_norm": 0.010259431786835194, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 466470 + }, + { + "epoch": 2.2623436994179005, + "grad_norm": 0.0006334386998787522, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 466480 + }, + { + "epoch": 2.2623921976107364, + "grad_norm": 0.0033354603219777346, + "learning_rate": 0.0002, + "loss": 0.0056, + "step": 466490 + }, + { + "epoch": 2.2624406958035728, + "grad_norm": 0.01763077639043331, + "learning_rate": 0.0002, + "loss": 0.0019, + "step": 466500 + }, + { + "epoch": 2.2624891939964087, + "grad_norm": 0.0013299317797645926, + "learning_rate": 0.0002, + "loss": 0.0088, + "step": 466510 + }, + { + "epoch": 2.2625376921892446, + "grad_norm": 0.0010831598192453384, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 466520 + }, + { + "epoch": 2.262586190382081, + "grad_norm": 0.0003786294546443969, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 466530 + }, + { + "epoch": 2.262634688574917, + "grad_norm": 0.00040835264371708035, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 466540 + }, + { + "epoch": 2.2626831867677533, + "grad_norm": 0.0003937399305868894, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466550 + }, + { + "epoch": 2.262731684960589, + "grad_norm": 0.00014608891797252, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 466560 + }, + { + "epoch": 2.262780183153425, + "grad_norm": 9.099371527554467e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466570 + }, + { + "epoch": 2.2628286813462615, + "grad_norm": 6.751451292075217e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466580 + }, + { + "epoch": 2.2628771795390974, + "grad_norm": 5.615854388452135e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466590 + }, + { + "epoch": 2.2629256777319338, + "grad_norm": 5.856224743183702e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466600 + }, + { + "epoch": 2.2629741759247697, + "grad_norm": 4.0613278542878106e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 466610 + }, + { + "epoch": 2.2630226741176056, + "grad_norm": 3.6128851206740364e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466620 + }, + { + "epoch": 2.263071172310442, + "grad_norm": 3.44462605426088e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466630 + }, + { + "epoch": 2.263119670503278, + "grad_norm": 2.959892481158022e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466640 + }, + { + "epoch": 2.263168168696114, + "grad_norm": 2.8801587177440524e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466650 + }, + { + "epoch": 2.26321666688895, + "grad_norm": 2.667238914000336e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466660 + }, + { + "epoch": 2.263265165081786, + "grad_norm": 2.4870996639947407e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466670 + }, + { + "epoch": 2.263313663274622, + "grad_norm": 2.957663127745036e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 466680 + }, + { + "epoch": 2.2633621614674584, + "grad_norm": 3.639854185166769e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466690 + }, + { + "epoch": 2.2634106596602943, + "grad_norm": 0.11252429336309433, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466700 + }, + { + "epoch": 2.2634591578531307, + "grad_norm": 8.36263207020238e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466710 + }, + { + "epoch": 2.2635076560459666, + "grad_norm": 0.00017712368571665138, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466720 + }, + { + "epoch": 2.2635561542388025, + "grad_norm": 0.00028999458299949765, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466730 + }, + { + "epoch": 2.263604652431639, + "grad_norm": 3.111404657829553e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466740 + }, + { + "epoch": 2.263653150624475, + "grad_norm": 2.792713712551631e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466750 + }, + { + "epoch": 2.263701648817311, + "grad_norm": 2.61082732322393e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466760 + }, + { + "epoch": 2.263750147010147, + "grad_norm": 2.4951354134827852e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466770 + }, + { + "epoch": 2.263798645202983, + "grad_norm": 2.259549728478305e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466780 + }, + { + "epoch": 2.2638471433958194, + "grad_norm": 2.5631949029047973e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466790 + }, + { + "epoch": 2.2638956415886553, + "grad_norm": 2.093768307531718e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466800 + }, + { + "epoch": 2.2639441397814912, + "grad_norm": 1.987529503821861e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466810 + }, + { + "epoch": 2.2639926379743276, + "grad_norm": 1.8682319932850078e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466820 + }, + { + "epoch": 2.2640411361671635, + "grad_norm": 1.7681946701486595e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466830 + }, + { + "epoch": 2.2640896343599994, + "grad_norm": 2.235790998383891e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 466840 + }, + { + "epoch": 2.264138132552836, + "grad_norm": 4.138274380238727e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466850 + }, + { + "epoch": 2.2641866307456717, + "grad_norm": 9.833413059823215e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466860 + }, + { + "epoch": 2.264235128938508, + "grad_norm": 5.0868966354755685e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466870 + }, + { + "epoch": 2.264283627131344, + "grad_norm": 4.9610771384323016e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466880 + }, + { + "epoch": 2.26433212532418, + "grad_norm": 3.823629958787933e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466890 + }, + { + "epoch": 2.2643806235170163, + "grad_norm": 3.7011897802585736e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466900 + }, + { + "epoch": 2.264429121709852, + "grad_norm": 3.273875699960627e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466910 + }, + { + "epoch": 2.2644776199026886, + "grad_norm": 3.5006978578167036e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466920 + }, + { + "epoch": 2.2645261180955245, + "grad_norm": 2.753978878899943e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466930 + }, + { + "epoch": 2.2645746162883604, + "grad_norm": 2.5712020942592062e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466940 + }, + { + "epoch": 2.264623114481197, + "grad_norm": 2.9657598133780994e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 466950 + }, + { + "epoch": 2.2646716126740327, + "grad_norm": 5.493417484103702e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466960 + }, + { + "epoch": 2.2647201108668686, + "grad_norm": 5.770154530182481e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466970 + }, + { + "epoch": 2.264768609059705, + "grad_norm": 9.332359331892803e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 466980 + }, + { + "epoch": 2.264817107252541, + "grad_norm": 0.00019521992362570018, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 466990 + }, + { + "epoch": 2.264865605445377, + "grad_norm": 0.00010958468919852749, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467000 + }, + { + "epoch": 2.264914103638213, + "grad_norm": 6.476127600762993e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467010 + }, + { + "epoch": 2.264962601831049, + "grad_norm": 5.2991570555604994e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467020 + }, + { + "epoch": 2.2650111000238855, + "grad_norm": 0.0001202432467835024, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467030 + }, + { + "epoch": 2.2650595982167214, + "grad_norm": 3.738257146324031e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467040 + }, + { + "epoch": 2.2651080964095573, + "grad_norm": 3.707001087605022e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467050 + }, + { + "epoch": 2.2651565946023937, + "grad_norm": 3.2653209927957505e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467060 + }, + { + "epoch": 2.2652050927952296, + "grad_norm": 2.8118553018430248e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467070 + }, + { + "epoch": 2.265253590988066, + "grad_norm": 1.9029555915039964e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467080 + }, + { + "epoch": 2.265302089180902, + "grad_norm": 2.5267459932365455e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467090 + }, + { + "epoch": 2.265350587373738, + "grad_norm": 2.4199189283535816e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467100 + }, + { + "epoch": 2.265399085566574, + "grad_norm": 2.2398997316486202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467110 + }, + { + "epoch": 2.26544758375941, + "grad_norm": 2.1002999346819706e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467120 + }, + { + "epoch": 2.2654960819522465, + "grad_norm": 1.431314831279451e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467130 + }, + { + "epoch": 2.2655445801450824, + "grad_norm": 1.7433711036574095e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467140 + }, + { + "epoch": 2.2655930783379183, + "grad_norm": 1.7465668861404993e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467150 + }, + { + "epoch": 2.2656415765307547, + "grad_norm": 1.8335942513658665e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467160 + }, + { + "epoch": 2.2656900747235906, + "grad_norm": 1.5941925084916875e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467170 + }, + { + "epoch": 2.2657385729164266, + "grad_norm": 1.1462982001830824e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467180 + }, + { + "epoch": 2.265787071109263, + "grad_norm": 1.3850325558451004e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467190 + }, + { + "epoch": 2.265835569302099, + "grad_norm": 1.379408422508277e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467200 + }, + { + "epoch": 2.2658840674949348, + "grad_norm": 1.4158084013615735e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467210 + }, + { + "epoch": 2.265932565687771, + "grad_norm": 1.3305388165463228e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467220 + }, + { + "epoch": 2.265981063880607, + "grad_norm": 9.561659680912271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467230 + }, + { + "epoch": 2.2660295620734434, + "grad_norm": 1.3024484360357746e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467240 + }, + { + "epoch": 2.2660780602662793, + "grad_norm": 1.2385999980324414e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467250 + }, + { + "epoch": 2.2661265584591153, + "grad_norm": 1.1658695257210638e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467260 + }, + { + "epoch": 2.2661750566519516, + "grad_norm": 1.1068907042499632e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467270 + }, + { + "epoch": 2.2662235548447875, + "grad_norm": 8.191263077605981e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467280 + }, + { + "epoch": 2.266272053037624, + "grad_norm": 1.0267751349601895e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467290 + }, + { + "epoch": 2.26632055123046, + "grad_norm": 1.0164969353354536e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467300 + }, + { + "epoch": 2.2663690494232958, + "grad_norm": 9.962152034859173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467310 + }, + { + "epoch": 2.266417547616132, + "grad_norm": 9.63659113040194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467320 + }, + { + "epoch": 2.266466045808968, + "grad_norm": 7.13475401425967e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467330 + }, + { + "epoch": 2.266514544001804, + "grad_norm": 9.264951586374082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467340 + }, + { + "epoch": 2.2665630421946403, + "grad_norm": 8.826380508253351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467350 + }, + { + "epoch": 2.2666115403874763, + "grad_norm": 8.72654163686093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467360 + }, + { + "epoch": 2.266660038580312, + "grad_norm": 8.869468729244545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467370 + }, + { + "epoch": 2.2667085367731485, + "grad_norm": 6.831715836597141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467380 + }, + { + "epoch": 2.2667570349659845, + "grad_norm": 8.078493010543752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467390 + }, + { + "epoch": 2.266805533158821, + "grad_norm": 8.046825314522721e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467400 + }, + { + "epoch": 2.2668540313516568, + "grad_norm": 7.794998964527622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467410 + }, + { + "epoch": 2.2669025295444927, + "grad_norm": 7.706138603680301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467420 + }, + { + "epoch": 2.266951027737329, + "grad_norm": 5.955828328296775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467430 + }, + { + "epoch": 2.266999525930165, + "grad_norm": 7.559590358141577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467440 + }, + { + "epoch": 2.2670480241230013, + "grad_norm": 6.95160633767955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467450 + }, + { + "epoch": 2.2670965223158372, + "grad_norm": 7.037341219984228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467460 + }, + { + "epoch": 2.267145020508673, + "grad_norm": 6.945563654880971e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467470 + }, + { + "epoch": 2.2671935187015095, + "grad_norm": 5.885607151867589e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467480 + }, + { + "epoch": 2.2672420168943455, + "grad_norm": 1.3363815924094524e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467490 + }, + { + "epoch": 2.2672905150871814, + "grad_norm": 6.4822934291441925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467500 + }, + { + "epoch": 2.2673390132800177, + "grad_norm": 6.411041340470547e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467510 + }, + { + "epoch": 2.2673875114728537, + "grad_norm": 6.139000561233843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467520 + }, + { + "epoch": 2.2674360096656896, + "grad_norm": 4.978716333425837e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467530 + }, + { + "epoch": 2.267484507858526, + "grad_norm": 6.178443527460331e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467540 + }, + { + "epoch": 2.267533006051362, + "grad_norm": 6.090795068303123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467550 + }, + { + "epoch": 2.2675815042441982, + "grad_norm": 6.015447979734745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467560 + }, + { + "epoch": 2.267630002437034, + "grad_norm": 5.686796157533536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467570 + }, + { + "epoch": 2.26767850062987, + "grad_norm": 4.699775217886781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467580 + }, + { + "epoch": 2.2677269988227065, + "grad_norm": 5.431392310129013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467590 + }, + { + "epoch": 2.2677754970155424, + "grad_norm": 5.399694146035472e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467600 + }, + { + "epoch": 2.2678239952083787, + "grad_norm": 5.464279638545122e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467610 + }, + { + "epoch": 2.2678724934012147, + "grad_norm": 5.26216035723337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467620 + }, + { + "epoch": 2.2679209915940506, + "grad_norm": 4.240060661686584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467630 + }, + { + "epoch": 2.267969489786887, + "grad_norm": 5.027175120631e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 467640 + }, + { + "epoch": 2.268017987979723, + "grad_norm": 4.482006261241622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467650 + }, + { + "epoch": 2.2680664861725592, + "grad_norm": 4.2827496145037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467660 + }, + { + "epoch": 2.268114984365395, + "grad_norm": 4.4349467316351365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467670 + }, + { + "epoch": 2.268163482558231, + "grad_norm": 3.3936958061531186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467680 + }, + { + "epoch": 2.2682119807510674, + "grad_norm": 4.417810032464331e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467690 + }, + { + "epoch": 2.2682604789439034, + "grad_norm": 4.513632120506372e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467700 + }, + { + "epoch": 2.2683089771367393, + "grad_norm": 4.439976237335941e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467710 + }, + { + "epoch": 2.2683574753295757, + "grad_norm": 4.171674845565576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467720 + }, + { + "epoch": 2.2684059735224116, + "grad_norm": 3.4648664950509556e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467730 + }, + { + "epoch": 2.2684544717152475, + "grad_norm": 4.580318091029767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467740 + }, + { + "epoch": 2.268502969908084, + "grad_norm": 4.694272320193704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467750 + }, + { + "epoch": 2.26855146810092, + "grad_norm": 4.067083864356391e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467760 + }, + { + "epoch": 2.268599966293756, + "grad_norm": 3.988615844718879e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467770 + }, + { + "epoch": 2.268648464486592, + "grad_norm": 3.0581231840187684e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467780 + }, + { + "epoch": 2.268696962679428, + "grad_norm": 3.846883828373393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467790 + }, + { + "epoch": 2.2687454608722644, + "grad_norm": 3.7835930015717167e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467800 + }, + { + "epoch": 2.2687939590651003, + "grad_norm": 3.774613787754788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467810 + }, + { + "epoch": 2.2688424572579367, + "grad_norm": 3.8021944419597276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467820 + }, + { + "epoch": 2.2688909554507726, + "grad_norm": 2.854296099030762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467830 + }, + { + "epoch": 2.2689394536436085, + "grad_norm": 3.6300518786447356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467840 + }, + { + "epoch": 2.268987951836445, + "grad_norm": 0.15585914254188538, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467850 + }, + { + "epoch": 2.269036450029281, + "grad_norm": 3.383401235623751e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467860 + }, + { + "epoch": 2.2690849482221167, + "grad_norm": 3.4931979371322086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467870 + }, + { + "epoch": 2.269133446414953, + "grad_norm": 3.483155751382583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467880 + }, + { + "epoch": 2.269181944607789, + "grad_norm": 3.3487115160824033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467890 + }, + { + "epoch": 2.269230442800625, + "grad_norm": 3.4727634101727745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467900 + }, + { + "epoch": 2.2692789409934613, + "grad_norm": 4.0953195821202826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467910 + }, + { + "epoch": 2.269327439186297, + "grad_norm": 3.3021285616996465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467920 + }, + { + "epoch": 2.2693759373791336, + "grad_norm": 3.1232145829562796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467930 + }, + { + "epoch": 2.2694244355719695, + "grad_norm": 3.2403281693405006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467940 + }, + { + "epoch": 2.2694729337648054, + "grad_norm": 3.2111374821397476e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467950 + }, + { + "epoch": 2.2695214319576418, + "grad_norm": 3.2145796922122827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467960 + }, + { + "epoch": 2.2695699301504777, + "grad_norm": 3.2142804684553994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467970 + }, + { + "epoch": 2.269618428343314, + "grad_norm": 3.1129393391893245e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467980 + }, + { + "epoch": 2.26966692653615, + "grad_norm": 3.34206924890168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 467990 + }, + { + "epoch": 2.269715424728986, + "grad_norm": 3.1521840355708264e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468000 + }, + { + "epoch": 2.2697639229218223, + "grad_norm": 3.0494936709146714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468010 + }, + { + "epoch": 2.269812421114658, + "grad_norm": 3.003231540787965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468020 + }, + { + "epoch": 2.269860919307494, + "grad_norm": 2.786984168778872e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468030 + }, + { + "epoch": 2.2699094175003305, + "grad_norm": 2.884559535232256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468040 + }, + { + "epoch": 2.2699579156931664, + "grad_norm": 2.845875997081748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468050 + }, + { + "epoch": 2.2700064138860023, + "grad_norm": 2.81119878309255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468060 + }, + { + "epoch": 2.2700549120788387, + "grad_norm": 2.69927454610297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468070 + }, + { + "epoch": 2.2701034102716746, + "grad_norm": 2.5849258236121386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468080 + }, + { + "epoch": 2.270151908464511, + "grad_norm": 2.6060372420033673e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468090 + }, + { + "epoch": 2.270200406657347, + "grad_norm": 2.6367308691988e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468100 + }, + { + "epoch": 2.270248904850183, + "grad_norm": 2.746857035162975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468110 + }, + { + "epoch": 2.270297403043019, + "grad_norm": 2.541589537941036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468120 + }, + { + "epoch": 2.270345901235855, + "grad_norm": 2.479635213603615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468130 + }, + { + "epoch": 2.2703943994286915, + "grad_norm": 3.2378502510255203e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468140 + }, + { + "epoch": 2.2704428976215274, + "grad_norm": 2.535421344873612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468150 + }, + { + "epoch": 2.2704913958143633, + "grad_norm": 2.5289334644185146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468160 + }, + { + "epoch": 2.2705398940071997, + "grad_norm": 2.5817657842708286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468170 + }, + { + "epoch": 2.2705883922000356, + "grad_norm": 2.2624328721576603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468180 + }, + { + "epoch": 2.270636890392872, + "grad_norm": 2.5174958864226937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468190 + }, + { + "epoch": 2.270685388585708, + "grad_norm": 2.3772161057422636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468200 + }, + { + "epoch": 2.270733886778544, + "grad_norm": 2.3020315893518273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468210 + }, + { + "epoch": 2.27078238497138, + "grad_norm": 2.353646550545818e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468220 + }, + { + "epoch": 2.270830883164216, + "grad_norm": 2.1005523649364477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468230 + }, + { + "epoch": 2.270879381357052, + "grad_norm": 2.2596695998799987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468240 + }, + { + "epoch": 2.2709278795498884, + "grad_norm": 3.2733928492234554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468250 + }, + { + "epoch": 2.2709763777427243, + "grad_norm": 2.189725137213827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468260 + }, + { + "epoch": 2.2710248759355602, + "grad_norm": 2.193927002736018e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468270 + }, + { + "epoch": 2.2710733741283966, + "grad_norm": 2.147152599718538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468280 + }, + { + "epoch": 2.2711218723212325, + "grad_norm": 2.129580025211908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468290 + }, + { + "epoch": 2.271170370514069, + "grad_norm": 2.3065751975082094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468300 + }, + { + "epoch": 2.271218868706905, + "grad_norm": 2.10869211514364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468310 + }, + { + "epoch": 2.2712673668997407, + "grad_norm": 2.1313730940164533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468320 + }, + { + "epoch": 2.271315865092577, + "grad_norm": 1.9959684323112015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468330 + }, + { + "epoch": 2.271364363285413, + "grad_norm": 2.1216642380750272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468340 + }, + { + "epoch": 2.2714128614782494, + "grad_norm": 2.160020358132897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468350 + }, + { + "epoch": 2.2714613596710853, + "grad_norm": 1.991734052353422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468360 + }, + { + "epoch": 2.2715098578639212, + "grad_norm": 2.030390760410228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468370 + }, + { + "epoch": 2.2715583560567576, + "grad_norm": 1.9391998193896143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468380 + }, + { + "epoch": 2.2716068542495935, + "grad_norm": 1.8864204776036786e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468390 + }, + { + "epoch": 2.2716553524424294, + "grad_norm": 1.940319634741172e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468400 + }, + { + "epoch": 2.271703850635266, + "grad_norm": 1.9616893496277044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468410 + }, + { + "epoch": 2.2717523488281017, + "grad_norm": 1.8766446601148346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468420 + }, + { + "epoch": 2.2718008470209377, + "grad_norm": 1.8539603843237273e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468430 + }, + { + "epoch": 2.271849345213774, + "grad_norm": 1.8264653363075922e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468440 + }, + { + "epoch": 2.27189784340661, + "grad_norm": 1.8596164181872155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468450 + }, + { + "epoch": 2.2719463415994463, + "grad_norm": 1.8410257780487882e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468460 + }, + { + "epoch": 2.2719948397922822, + "grad_norm": 1.7556139937369153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468470 + }, + { + "epoch": 2.272043337985118, + "grad_norm": 1.6879578197404044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468480 + }, + { + "epoch": 2.2720918361779545, + "grad_norm": 1.8329327531318995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468490 + }, + { + "epoch": 2.2721403343707904, + "grad_norm": 1.7011077488859883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468500 + }, + { + "epoch": 2.272188832563627, + "grad_norm": 1.7206957636517473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468510 + }, + { + "epoch": 2.2722373307564627, + "grad_norm": 1.6573043239986873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468520 + }, + { + "epoch": 2.2722858289492986, + "grad_norm": 1.5962884845066583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468530 + }, + { + "epoch": 2.272334327142135, + "grad_norm": 1.6096623767225537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468540 + }, + { + "epoch": 2.272382825334971, + "grad_norm": 1.6542037428735057e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468550 + }, + { + "epoch": 2.272431323527807, + "grad_norm": 1.6414669516962022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468560 + }, + { + "epoch": 2.2724798217206432, + "grad_norm": 1.6055370224421495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468570 + }, + { + "epoch": 2.272528319913479, + "grad_norm": 1.5332441307691624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468580 + }, + { + "epoch": 2.272576818106315, + "grad_norm": 1.5814612197573297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468590 + }, + { + "epoch": 2.2726253162991514, + "grad_norm": 1.6249224472630885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468600 + }, + { + "epoch": 2.2726738144919874, + "grad_norm": 1.5311428569475538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468610 + }, + { + "epoch": 2.2727223126848237, + "grad_norm": 1.5527825780736748e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468620 + }, + { + "epoch": 2.2727708108776596, + "grad_norm": 1.4620004549215082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468630 + }, + { + "epoch": 2.2728193090704956, + "grad_norm": 1.527173708382179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468640 + }, + { + "epoch": 2.272867807263332, + "grad_norm": 1.5207640444714343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468650 + }, + { + "epoch": 2.272916305456168, + "grad_norm": 1.6937710825004615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468660 + }, + { + "epoch": 2.272964803649004, + "grad_norm": 1.4524329117193702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468670 + }, + { + "epoch": 2.27301330184184, + "grad_norm": 1.4631212934546056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468680 + }, + { + "epoch": 2.273061800034676, + "grad_norm": 1.4357957525135134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468690 + }, + { + "epoch": 2.2731102982275124, + "grad_norm": 1.5207954220386455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468700 + }, + { + "epoch": 2.2731587964203483, + "grad_norm": 1.4404971580006531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468710 + }, + { + "epoch": 2.2732072946131847, + "grad_norm": 1.4303740272225696e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468720 + }, + { + "epoch": 2.2732557928060206, + "grad_norm": 1.3676695971298614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468730 + }, + { + "epoch": 2.2733042909988566, + "grad_norm": 1.4788490716455271e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468740 + }, + { + "epoch": 2.273352789191693, + "grad_norm": 1.375861870656081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468750 + }, + { + "epoch": 2.273401287384529, + "grad_norm": 1.3468755923895515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468760 + }, + { + "epoch": 2.2734497855773648, + "grad_norm": 1.4583256415789947e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468770 + }, + { + "epoch": 2.273498283770201, + "grad_norm": 1.4169841051625554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468780 + }, + { + "epoch": 2.273546781963037, + "grad_norm": 1.3870723023501341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468790 + }, + { + "epoch": 2.273595280155873, + "grad_norm": 1.3555852547142422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468800 + }, + { + "epoch": 2.2736437783487093, + "grad_norm": 1.3303397281561047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468810 + }, + { + "epoch": 2.2736922765415453, + "grad_norm": 1.3078093843432725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468820 + }, + { + "epoch": 2.2737407747343816, + "grad_norm": 1.390271222589945e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468830 + }, + { + "epoch": 2.2737892729272176, + "grad_norm": 1.3534694289774052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468840 + }, + { + "epoch": 2.2738377711200535, + "grad_norm": 1.2499729109549662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468850 + }, + { + "epoch": 2.27388626931289, + "grad_norm": 1.2645455171877984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468860 + }, + { + "epoch": 2.2739347675057258, + "grad_norm": 1.3324555538929417e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468870 + }, + { + "epoch": 2.273983265698562, + "grad_norm": 1.3661543789567077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468880 + }, + { + "epoch": 2.274031763891398, + "grad_norm": 1.2237429700689972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468890 + }, + { + "epoch": 2.274080262084234, + "grad_norm": 1.2452037481125444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468900 + }, + { + "epoch": 2.2741287602770703, + "grad_norm": 1.2620503184734844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468910 + }, + { + "epoch": 2.2741772584699063, + "grad_norm": 1.1995796285191318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468920 + }, + { + "epoch": 2.274225756662742, + "grad_norm": 1.2151880355304456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468930 + }, + { + "epoch": 2.2742742548555785, + "grad_norm": 1.2190980669402052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468940 + }, + { + "epoch": 2.2743227530484145, + "grad_norm": 1.199726966660819e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468950 + }, + { + "epoch": 2.2743712512412504, + "grad_norm": 1.2444303365555243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468960 + }, + { + "epoch": 2.2744197494340868, + "grad_norm": 1.2233386996740592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468970 + }, + { + "epoch": 2.2744682476269227, + "grad_norm": 1.1861953908010037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468980 + }, + { + "epoch": 2.274516745819759, + "grad_norm": 1.134162971538899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 468990 + }, + { + "epoch": 2.274565244012595, + "grad_norm": 1.1627505500655388e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469000 + }, + { + "epoch": 2.274613742205431, + "grad_norm": 1.1508088846312603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469010 + }, + { + "epoch": 2.2746622403982673, + "grad_norm": 1.1516017366375308e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469020 + }, + { + "epoch": 2.274710738591103, + "grad_norm": 1.1092613476648694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469030 + }, + { + "epoch": 2.2747592367839395, + "grad_norm": 1.157292103926011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469040 + }, + { + "epoch": 2.2748077349767755, + "grad_norm": 1.1245796258663177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469050 + }, + { + "epoch": 2.2748562331696114, + "grad_norm": 1.109317054215353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469060 + }, + { + "epoch": 2.2749047313624478, + "grad_norm": 1.0762976216938114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469070 + }, + { + "epoch": 2.2749532295552837, + "grad_norm": 1.0875784255404142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469080 + }, + { + "epoch": 2.2750017277481196, + "grad_norm": 1.1251073601670214e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469090 + }, + { + "epoch": 2.275050225940956, + "grad_norm": 1.079135245163343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469100 + }, + { + "epoch": 2.275098724133792, + "grad_norm": 1.0961092584693688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469110 + }, + { + "epoch": 2.275147222326628, + "grad_norm": 1.0646265309333103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469120 + }, + { + "epoch": 2.275195720519464, + "grad_norm": 1.1289380381640512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469130 + }, + { + "epoch": 2.2752442187123, + "grad_norm": 1.0074718375108205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469140 + }, + { + "epoch": 2.2752927169051365, + "grad_norm": 1.0892824775510235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469150 + }, + { + "epoch": 2.2753412150979724, + "grad_norm": 1.062835053744493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469160 + }, + { + "epoch": 2.2753897132908083, + "grad_norm": 1.0032539421445108e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469170 + }, + { + "epoch": 2.2754382114836447, + "grad_norm": 1.0462777026987169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469180 + }, + { + "epoch": 2.2754867096764806, + "grad_norm": 1.0055208576886798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469190 + }, + { + "epoch": 2.275535207869317, + "grad_norm": 1.0146626436835504e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469200 + }, + { + "epoch": 2.275583706062153, + "grad_norm": 9.900710438159877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469210 + }, + { + "epoch": 2.275632204254989, + "grad_norm": 1.0063174613605952e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469220 + }, + { + "epoch": 2.275680702447825, + "grad_norm": 9.868730330708786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469230 + }, + { + "epoch": 2.275729200640661, + "grad_norm": 9.769274811333162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469240 + }, + { + "epoch": 2.2757776988334975, + "grad_norm": 1.003190163828549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469250 + }, + { + "epoch": 2.2758261970263334, + "grad_norm": 9.615897624826175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469260 + }, + { + "epoch": 2.2758746952191693, + "grad_norm": 9.749830951477634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469270 + }, + { + "epoch": 2.2759231934120057, + "grad_norm": 9.562861578160664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469280 + }, + { + "epoch": 2.2759716916048416, + "grad_norm": 9.450628795093508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469290 + }, + { + "epoch": 2.2760201897976775, + "grad_norm": 9.411543828719005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469300 + }, + { + "epoch": 2.276068687990514, + "grad_norm": 9.012505302052887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469310 + }, + { + "epoch": 2.27611718618335, + "grad_norm": 9.331770343123935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469320 + }, + { + "epoch": 2.2761656843761857, + "grad_norm": 9.30729811443598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469330 + }, + { + "epoch": 2.276214182569022, + "grad_norm": 9.168815608973091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469340 + }, + { + "epoch": 2.276262680761858, + "grad_norm": 9.248845458387223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469350 + }, + { + "epoch": 2.2763111789546944, + "grad_norm": 9.246463150702766e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469360 + }, + { + "epoch": 2.2763596771475303, + "grad_norm": 8.88926251718658e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469370 + }, + { + "epoch": 2.276408175340366, + "grad_norm": 8.713219585843035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469380 + }, + { + "epoch": 2.2764566735332026, + "grad_norm": 1.0859163239729241e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469390 + }, + { + "epoch": 2.2765051717260385, + "grad_norm": 8.86866814653331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469400 + }, + { + "epoch": 2.276553669918875, + "grad_norm": 8.592602398493909e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469410 + }, + { + "epoch": 2.276602168111711, + "grad_norm": 8.846093351166928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469420 + }, + { + "epoch": 2.2766506663045467, + "grad_norm": 8.309037298204203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469430 + }, + { + "epoch": 2.276699164497383, + "grad_norm": 8.776796107667906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469440 + }, + { + "epoch": 2.276747662690219, + "grad_norm": 8.830565434436721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469450 + }, + { + "epoch": 2.276796160883055, + "grad_norm": 8.585635669078329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469460 + }, + { + "epoch": 2.2768446590758913, + "grad_norm": 8.574768912694708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469470 + }, + { + "epoch": 2.276893157268727, + "grad_norm": 8.870104579727922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469480 + }, + { + "epoch": 2.276941655461563, + "grad_norm": 8.30976944143913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469490 + }, + { + "epoch": 2.2769901536543995, + "grad_norm": 8.374977937819494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469500 + }, + { + "epoch": 2.2770386518472354, + "grad_norm": 8.361008667634451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469510 + }, + { + "epoch": 2.277087150040072, + "grad_norm": 8.321626978613494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469520 + }, + { + "epoch": 2.2771356482329077, + "grad_norm": 8.386683134631312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469530 + }, + { + "epoch": 2.2771841464257436, + "grad_norm": 8.186049171854393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469540 + }, + { + "epoch": 2.27723264461858, + "grad_norm": 1.1258382528467337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469550 + }, + { + "epoch": 2.277281142811416, + "grad_norm": 8.111887268569262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469560 + }, + { + "epoch": 2.2773296410042523, + "grad_norm": 8.053329452195612e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469570 + }, + { + "epoch": 2.277378139197088, + "grad_norm": 7.429750326082285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469580 + }, + { + "epoch": 2.277426637389924, + "grad_norm": 7.901256253717293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469590 + }, + { + "epoch": 2.2774751355827605, + "grad_norm": 7.738802878520801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469600 + }, + { + "epoch": 2.2775236337755964, + "grad_norm": 7.637514727321104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469610 + }, + { + "epoch": 2.2775721319684323, + "grad_norm": 7.635179031240114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469620 + }, + { + "epoch": 2.2776206301612687, + "grad_norm": 6.834841315139784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469630 + }, + { + "epoch": 2.2776691283541046, + "grad_norm": 7.728669970674673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469640 + }, + { + "epoch": 2.277717626546941, + "grad_norm": 7.710278850936447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469650 + }, + { + "epoch": 2.277766124739777, + "grad_norm": 7.405952260342019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469660 + }, + { + "epoch": 2.277814622932613, + "grad_norm": 7.165505735429178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469670 + }, + { + "epoch": 2.277863121125449, + "grad_norm": 1.010602659334836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469680 + }, + { + "epoch": 2.277911619318285, + "grad_norm": 7.50912533931114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469690 + }, + { + "epoch": 2.277960117511121, + "grad_norm": 7.256891763063322e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469700 + }, + { + "epoch": 2.2780086157039574, + "grad_norm": 7.452112527062127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469710 + }, + { + "epoch": 2.2780571138967933, + "grad_norm": 7.375577411039558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469720 + }, + { + "epoch": 2.2781056120896297, + "grad_norm": 6.569129027411691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469730 + }, + { + "epoch": 2.2781541102824656, + "grad_norm": 7.334307383644045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469740 + }, + { + "epoch": 2.2782026084753015, + "grad_norm": 7.424715704473783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469750 + }, + { + "epoch": 2.278251106668138, + "grad_norm": 6.733803843417263e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469760 + }, + { + "epoch": 2.278299604860974, + "grad_norm": 6.978185638217838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469770 + }, + { + "epoch": 2.27834810305381, + "grad_norm": 7.363012173300376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469780 + }, + { + "epoch": 2.278396601246646, + "grad_norm": 6.914973482707865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469790 + }, + { + "epoch": 2.278445099439482, + "grad_norm": 6.648764383498929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469800 + }, + { + "epoch": 2.2784935976323184, + "grad_norm": 6.973559720790945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469810 + }, + { + "epoch": 2.2785420958251543, + "grad_norm": 6.640814262937056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469820 + }, + { + "epoch": 2.2785905940179902, + "grad_norm": 6.667297043350118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469830 + }, + { + "epoch": 2.2786390922108266, + "grad_norm": 6.710155275868601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469840 + }, + { + "epoch": 2.2786875904036625, + "grad_norm": 6.395055720531673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469850 + }, + { + "epoch": 2.2787360885964985, + "grad_norm": 6.328367021524173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469860 + }, + { + "epoch": 2.278784586789335, + "grad_norm": 6.624905495300482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469870 + }, + { + "epoch": 2.2788330849821707, + "grad_norm": 6.106329806243593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469880 + }, + { + "epoch": 2.278881583175007, + "grad_norm": 6.189422947500134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469890 + }, + { + "epoch": 2.278930081367843, + "grad_norm": 5.926221433583123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469900 + }, + { + "epoch": 2.278978579560679, + "grad_norm": 6.128181553322065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469910 + }, + { + "epoch": 2.2790270777535153, + "grad_norm": 6.296626793300675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469920 + }, + { + "epoch": 2.2790755759463512, + "grad_norm": 5.467455821417389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469930 + }, + { + "epoch": 2.2791240741391876, + "grad_norm": 5.965931677565095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469940 + }, + { + "epoch": 2.2791725723320235, + "grad_norm": 6.687015456918743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469950 + }, + { + "epoch": 2.2792210705248594, + "grad_norm": 5.619399985334894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469960 + }, + { + "epoch": 2.279269568717696, + "grad_norm": 6.246348220884101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469970 + }, + { + "epoch": 2.2793180669105317, + "grad_norm": 5.087518388791068e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469980 + }, + { + "epoch": 2.2793665651033677, + "grad_norm": 5.877631679140904e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 469990 + }, + { + "epoch": 2.279415063296204, + "grad_norm": 5.619652370114636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470000 + }, + { + "epoch": 2.27946356148904, + "grad_norm": 5.700616156900651e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470010 + }, + { + "epoch": 2.279512059681876, + "grad_norm": 2.375991698500002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470020 + }, + { + "epoch": 2.2795605578747122, + "grad_norm": 4.863894105255895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470030 + }, + { + "epoch": 2.279609056067548, + "grad_norm": 5.466801553666301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470040 + }, + { + "epoch": 2.2796575542603845, + "grad_norm": 5.902612656427664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470050 + }, + { + "epoch": 2.2797060524532204, + "grad_norm": 5.66975529636693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470060 + }, + { + "epoch": 2.2797545506460564, + "grad_norm": 5.370901021706231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470070 + }, + { + "epoch": 2.2798030488388927, + "grad_norm": 4.948049081576755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470080 + }, + { + "epoch": 2.2798515470317287, + "grad_norm": 5.508189246938855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470090 + }, + { + "epoch": 2.279900045224565, + "grad_norm": 5.588802309830498e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470100 + }, + { + "epoch": 2.279948543417401, + "grad_norm": 5.491546062330599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470110 + }, + { + "epoch": 2.279997041610237, + "grad_norm": 5.525432698050281e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470120 + }, + { + "epoch": 2.2800455398030732, + "grad_norm": 4.521440075677674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470130 + }, + { + "epoch": 2.280094037995909, + "grad_norm": 5.21371134709625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470140 + }, + { + "epoch": 2.280142536188745, + "grad_norm": 5.005325078855094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470150 + }, + { + "epoch": 2.2801910343815814, + "grad_norm": 5.040936912337202e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470160 + }, + { + "epoch": 2.2802395325744174, + "grad_norm": 5.302589443090255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470170 + }, + { + "epoch": 2.2802880307672537, + "grad_norm": 4.2297779145883396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470180 + }, + { + "epoch": 2.2803365289600896, + "grad_norm": 5.10491872773855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470190 + }, + { + "epoch": 2.2803850271529256, + "grad_norm": 4.829598196920415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470200 + }, + { + "epoch": 2.280433525345762, + "grad_norm": 5.169056294107577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470210 + }, + { + "epoch": 2.280482023538598, + "grad_norm": 4.796408461515966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470220 + }, + { + "epoch": 2.280530521731434, + "grad_norm": 4.340309942563181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470230 + }, + { + "epoch": 2.28057901992427, + "grad_norm": 4.7620108034607256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470240 + }, + { + "epoch": 2.280627518117106, + "grad_norm": 4.889298566013167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470250 + }, + { + "epoch": 2.2806760163099424, + "grad_norm": 4.844721956942522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470260 + }, + { + "epoch": 2.2807245145027784, + "grad_norm": 4.93559980441205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470270 + }, + { + "epoch": 2.2807730126956143, + "grad_norm": 4.0614011709294573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470280 + }, + { + "epoch": 2.2808215108884506, + "grad_norm": 4.6515921781065117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470290 + }, + { + "epoch": 2.2808700090812866, + "grad_norm": 4.3493707835295936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470300 + }, + { + "epoch": 2.280918507274123, + "grad_norm": 4.791326091435621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470310 + }, + { + "epoch": 2.280967005466959, + "grad_norm": 4.378611038191593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470320 + }, + { + "epoch": 2.2810155036597948, + "grad_norm": 3.7847601674911857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470330 + }, + { + "epoch": 2.281064001852631, + "grad_norm": 4.7302725647568877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470340 + }, + { + "epoch": 2.281112500045467, + "grad_norm": 4.6046920942899305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470350 + }, + { + "epoch": 2.281160998238303, + "grad_norm": 4.3119786141687655e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470360 + }, + { + "epoch": 2.2812094964311393, + "grad_norm": 4.392208552417287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470370 + }, + { + "epoch": 2.2812579946239753, + "grad_norm": 3.532366861236369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470380 + }, + { + "epoch": 2.281306492816811, + "grad_norm": 4.4191966708240216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470390 + }, + { + "epoch": 2.2813549910096476, + "grad_norm": 4.4641447516369226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470400 + }, + { + "epoch": 2.2814034892024835, + "grad_norm": 4.081738325112383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470410 + }, + { + "epoch": 2.28145198739532, + "grad_norm": 4.4449717506722664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470420 + }, + { + "epoch": 2.2815004855881558, + "grad_norm": 3.84387561780386e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470430 + }, + { + "epoch": 2.2815489837809917, + "grad_norm": 4.2027218682960665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470440 + }, + { + "epoch": 2.281597481973828, + "grad_norm": 4.197764269520121e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470450 + }, + { + "epoch": 2.281645980166664, + "grad_norm": 4.259612467194529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470460 + }, + { + "epoch": 2.2816944783595003, + "grad_norm": 4.384649514577177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470470 + }, + { + "epoch": 2.2817429765523363, + "grad_norm": 3.687087541948131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470480 + }, + { + "epoch": 2.281791474745172, + "grad_norm": 3.901346303791797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470490 + }, + { + "epoch": 2.2818399729380086, + "grad_norm": 4.358896035228099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470500 + }, + { + "epoch": 2.2818884711308445, + "grad_norm": 3.939788655316079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470510 + }, + { + "epoch": 2.2819369693236804, + "grad_norm": 3.908081112058426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470520 + }, + { + "epoch": 2.2819854675165168, + "grad_norm": 3.3619386385908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470530 + }, + { + "epoch": 2.2820339657093527, + "grad_norm": 3.873092850881221e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470540 + }, + { + "epoch": 2.2820824639021886, + "grad_norm": 3.9062280166035634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470550 + }, + { + "epoch": 2.282130962095025, + "grad_norm": 3.7618244164150383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470560 + }, + { + "epoch": 2.282179460287861, + "grad_norm": 3.728157480509253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470570 + }, + { + "epoch": 2.2822279584806973, + "grad_norm": 3.263591850100056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470580 + }, + { + "epoch": 2.282276456673533, + "grad_norm": 3.665903420824179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470590 + }, + { + "epoch": 2.282324954866369, + "grad_norm": 3.674258550745435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470600 + }, + { + "epoch": 2.2823734530592055, + "grad_norm": 3.565123165572004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470610 + }, + { + "epoch": 2.2824219512520414, + "grad_norm": 3.890841924203414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470620 + }, + { + "epoch": 2.2824704494448778, + "grad_norm": 3.16927042831594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470630 + }, + { + "epoch": 2.2825189476377137, + "grad_norm": 3.818435914126894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470640 + }, + { + "epoch": 2.2825674458305496, + "grad_norm": 3.7358933013820206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470650 + }, + { + "epoch": 2.282615944023386, + "grad_norm": 3.6455548979574814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470660 + }, + { + "epoch": 2.282664442216222, + "grad_norm": 3.4999223430531856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470670 + }, + { + "epoch": 2.2827129404090583, + "grad_norm": 3.348725954310794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470680 + }, + { + "epoch": 2.282761438601894, + "grad_norm": 3.502944991851109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470690 + }, + { + "epoch": 2.28280993679473, + "grad_norm": 3.3605431326577673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470700 + }, + { + "epoch": 2.2828584349875665, + "grad_norm": 3.271636899171426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470710 + }, + { + "epoch": 2.2829069331804024, + "grad_norm": 3.2996334198287514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470720 + }, + { + "epoch": 2.2829554313732383, + "grad_norm": 3.0329141509355395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470730 + }, + { + "epoch": 2.2830039295660747, + "grad_norm": 0.0002732018765527755, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 470740 + }, + { + "epoch": 2.2830524277589106, + "grad_norm": 0.0001289590582018718, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470750 + }, + { + "epoch": 2.2831009259517465, + "grad_norm": 0.00032741771428845823, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 470760 + }, + { + "epoch": 2.283149424144583, + "grad_norm": 1.4374219063029159e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470770 + }, + { + "epoch": 2.283197922337419, + "grad_norm": 7.340013326029293e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470780 + }, + { + "epoch": 2.283246420530255, + "grad_norm": 4.2465567275939975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470790 + }, + { + "epoch": 2.283294918723091, + "grad_norm": 3.478865210126969e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470800 + }, + { + "epoch": 2.283343416915927, + "grad_norm": 3.1733725336380303e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470810 + }, + { + "epoch": 2.2833919151087634, + "grad_norm": 3.1019162634038366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470820 + }, + { + "epoch": 2.2834404133015993, + "grad_norm": 3.8451116779469885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470830 + }, + { + "epoch": 2.2834889114944357, + "grad_norm": 2.75425168183574e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470840 + }, + { + "epoch": 2.2835374096872716, + "grad_norm": 2.7098501504951855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470850 + }, + { + "epoch": 2.2835859078801075, + "grad_norm": 2.4815351480356185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470860 + }, + { + "epoch": 2.283634406072944, + "grad_norm": 2.438661795167718e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470870 + }, + { + "epoch": 2.28368290426578, + "grad_norm": 3.291047733000596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470880 + }, + { + "epoch": 2.2837314024586157, + "grad_norm": 2.22070866584545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470890 + }, + { + "epoch": 2.283779900651452, + "grad_norm": 2.1823905171913793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470900 + }, + { + "epoch": 2.283828398844288, + "grad_norm": 2.179978764615953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470910 + }, + { + "epoch": 2.283876897037124, + "grad_norm": 2.225689740953385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470920 + }, + { + "epoch": 2.2839253952299603, + "grad_norm": 2.7239389055466745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470930 + }, + { + "epoch": 2.283973893422796, + "grad_norm": 1.949772467924049e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470940 + }, + { + "epoch": 2.2840223916156326, + "grad_norm": 2.045602514044731e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470950 + }, + { + "epoch": 2.2840708898084685, + "grad_norm": 1.8578414255898679e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470960 + }, + { + "epoch": 2.2841193880013044, + "grad_norm": 1.9196211269445485e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470970 + }, + { + "epoch": 2.284167886194141, + "grad_norm": 2.39131895796163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470980 + }, + { + "epoch": 2.2842163843869767, + "grad_norm": 1.8004091089096619e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 470990 + }, + { + "epoch": 2.284264882579813, + "grad_norm": 1.7088325421354966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471000 + }, + { + "epoch": 2.284313380772649, + "grad_norm": 1.711762934064609e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471010 + }, + { + "epoch": 2.284361878965485, + "grad_norm": 1.6798650221971911e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471020 + }, + { + "epoch": 2.2844103771583213, + "grad_norm": 2.057508481811965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471030 + }, + { + "epoch": 2.284458875351157, + "grad_norm": 1.6574415440118173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471040 + }, + { + "epoch": 2.284507373543993, + "grad_norm": 1.5772475308040157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471050 + }, + { + "epoch": 2.2845558717368295, + "grad_norm": 1.579805484652752e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471060 + }, + { + "epoch": 2.2846043699296654, + "grad_norm": 1.4069570397623465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471070 + }, + { + "epoch": 2.2846528681225013, + "grad_norm": 1.93842811313516e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471080 + }, + { + "epoch": 2.2847013663153377, + "grad_norm": 1.4405292176888906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471090 + }, + { + "epoch": 2.2847498645081736, + "grad_norm": 1.462706222810084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471100 + }, + { + "epoch": 2.28479836270101, + "grad_norm": 1.414402845512086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471110 + }, + { + "epoch": 2.284846860893846, + "grad_norm": 1.3574408512795344e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471120 + }, + { + "epoch": 2.284895359086682, + "grad_norm": 1.7837309087553876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471130 + }, + { + "epoch": 2.284943857279518, + "grad_norm": 1.326116944255773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471140 + }, + { + "epoch": 2.284992355472354, + "grad_norm": 1.2979201073903823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471150 + }, + { + "epoch": 2.2850408536651905, + "grad_norm": 1.2638178077395423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471160 + }, + { + "epoch": 2.2850893518580264, + "grad_norm": 1.268839014301193e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471170 + }, + { + "epoch": 2.2851378500508623, + "grad_norm": 1.626672542442975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471180 + }, + { + "epoch": 2.2851863482436987, + "grad_norm": 1.212763322655519e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471190 + }, + { + "epoch": 2.2852348464365346, + "grad_norm": 1.2419094446158851e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471200 + }, + { + "epoch": 2.285283344629371, + "grad_norm": 1.1920823226319044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471210 + }, + { + "epoch": 2.285331842822207, + "grad_norm": 1.1720009069904336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471220 + }, + { + "epoch": 2.285380341015043, + "grad_norm": 1.4428600252358592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471230 + }, + { + "epoch": 2.285428839207879, + "grad_norm": 1.1023596471204655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471240 + }, + { + "epoch": 2.285477337400715, + "grad_norm": 1.10966948341229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471250 + }, + { + "epoch": 2.285525835593551, + "grad_norm": 1.0971665460601798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471260 + }, + { + "epoch": 2.2855743337863874, + "grad_norm": 1.086071961253765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471270 + }, + { + "epoch": 2.2856228319792233, + "grad_norm": 1.3174618516131886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471280 + }, + { + "epoch": 2.2856713301720593, + "grad_norm": 1.0441304993946687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471290 + }, + { + "epoch": 2.2857198283648956, + "grad_norm": 1.0495355127204675e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471300 + }, + { + "epoch": 2.2857683265577315, + "grad_norm": 1.0088623412229936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471310 + }, + { + "epoch": 2.285816824750568, + "grad_norm": 1.0083135748573113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471320 + }, + { + "epoch": 2.285865322943404, + "grad_norm": 1.234198066413228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471330 + }, + { + "epoch": 2.2859138211362398, + "grad_norm": 9.827746225710143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471340 + }, + { + "epoch": 2.285962319329076, + "grad_norm": 9.508735274721403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471350 + }, + { + "epoch": 2.286010817521912, + "grad_norm": 9.52684388266789e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471360 + }, + { + "epoch": 2.2860593157147484, + "grad_norm": 9.55895870902168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471370 + }, + { + "epoch": 2.2861078139075843, + "grad_norm": 1.169728875538567e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471380 + }, + { + "epoch": 2.2861563121004203, + "grad_norm": 9.174353294838511e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471390 + }, + { + "epoch": 2.2862048102932566, + "grad_norm": 9.181731002172455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471400 + }, + { + "epoch": 2.2862533084860925, + "grad_norm": 8.937886377680115e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471410 + }, + { + "epoch": 2.2863018066789285, + "grad_norm": 8.789649541540712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471420 + }, + { + "epoch": 2.286350304871765, + "grad_norm": 1.1246471558479243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471430 + }, + { + "epoch": 2.2863988030646007, + "grad_norm": 8.882051929504087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471440 + }, + { + "epoch": 2.2864473012574367, + "grad_norm": 8.406385063608468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471450 + }, + { + "epoch": 2.286495799450273, + "grad_norm": 8.5249212133931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471460 + }, + { + "epoch": 2.286544297643109, + "grad_norm": 8.313405714943656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471470 + }, + { + "epoch": 2.2865927958359453, + "grad_norm": 1.0394157925475156e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471480 + }, + { + "epoch": 2.2866412940287812, + "grad_norm": 7.840596936148359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471490 + }, + { + "epoch": 2.286689792221617, + "grad_norm": 7.762589007143106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471500 + }, + { + "epoch": 2.2867382904144535, + "grad_norm": 7.571401283712476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471510 + }, + { + "epoch": 2.2867867886072895, + "grad_norm": 7.674987045902526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471520 + }, + { + "epoch": 2.286835286800126, + "grad_norm": 9.881222240437637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471530 + }, + { + "epoch": 2.2868837849929617, + "grad_norm": 7.761676101836201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471540 + }, + { + "epoch": 2.2869322831857977, + "grad_norm": 7.577681913062406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471550 + }, + { + "epoch": 2.286980781378634, + "grad_norm": 7.958632863847015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471560 + }, + { + "epoch": 2.28702927957147, + "grad_norm": 7.238563739520032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471570 + }, + { + "epoch": 2.287077777764306, + "grad_norm": 9.066081929631764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471580 + }, + { + "epoch": 2.2871262759571422, + "grad_norm": 6.965180432416673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471590 + }, + { + "epoch": 2.287174774149978, + "grad_norm": 6.799168659199495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471600 + }, + { + "epoch": 2.287223272342814, + "grad_norm": 6.893814088471117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471610 + }, + { + "epoch": 2.2872717705356505, + "grad_norm": 6.786021344851179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471620 + }, + { + "epoch": 2.2873202687284864, + "grad_norm": 8.762877996559837e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471630 + }, + { + "epoch": 2.2873687669213227, + "grad_norm": 6.865088835183997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471640 + }, + { + "epoch": 2.2874172651141587, + "grad_norm": 6.61852709527011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471650 + }, + { + "epoch": 2.2874657633069946, + "grad_norm": 6.513066068691842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471660 + }, + { + "epoch": 2.287514261499831, + "grad_norm": 6.264700687097502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471670 + }, + { + "epoch": 2.287562759692667, + "grad_norm": 8.327331215696177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471680 + }, + { + "epoch": 2.2876112578855032, + "grad_norm": 6.19971558535326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471690 + }, + { + "epoch": 2.287659756078339, + "grad_norm": 6.427921448448615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471700 + }, + { + "epoch": 2.287708254271175, + "grad_norm": 5.852821800544916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471710 + }, + { + "epoch": 2.2877567524640114, + "grad_norm": 6.060495820747747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471720 + }, + { + "epoch": 2.2878052506568474, + "grad_norm": 7.878515475567838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471730 + }, + { + "epoch": 2.2878537488496837, + "grad_norm": 5.817317401124456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471740 + }, + { + "epoch": 2.2879022470425197, + "grad_norm": 5.782003427157179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471750 + }, + { + "epoch": 2.2879507452353556, + "grad_norm": 5.397149038799398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471760 + }, + { + "epoch": 2.287999243428192, + "grad_norm": 5.284853159537306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471770 + }, + { + "epoch": 2.288047741621028, + "grad_norm": 7.692818826399161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471780 + }, + { + "epoch": 2.288096239813864, + "grad_norm": 5.715434667763475e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471790 + }, + { + "epoch": 2.2881447380067, + "grad_norm": 5.261062483441492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471800 + }, + { + "epoch": 2.288193236199536, + "grad_norm": 5.22420691595471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471810 + }, + { + "epoch": 2.288241734392372, + "grad_norm": 5.182665745451231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471820 + }, + { + "epoch": 2.2882902325852084, + "grad_norm": 6.79858260355104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471830 + }, + { + "epoch": 2.2883387307780443, + "grad_norm": 5.259365707388497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471840 + }, + { + "epoch": 2.2883872289708806, + "grad_norm": 5.354369818633131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471850 + }, + { + "epoch": 2.2884357271637166, + "grad_norm": 5.100866928842152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471860 + }, + { + "epoch": 2.2884842253565525, + "grad_norm": 5.186731186768156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471870 + }, + { + "epoch": 2.288532723549389, + "grad_norm": 6.491129056485079e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471880 + }, + { + "epoch": 2.288581221742225, + "grad_norm": 4.876854404756159e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471890 + }, + { + "epoch": 2.288629719935061, + "grad_norm": 4.846791625823244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471900 + }, + { + "epoch": 2.288678218127897, + "grad_norm": 4.749194317810179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471910 + }, + { + "epoch": 2.288726716320733, + "grad_norm": 4.830683906220656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471920 + }, + { + "epoch": 2.2887752145135694, + "grad_norm": 5.924089805375843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471930 + }, + { + "epoch": 2.2888237127064053, + "grad_norm": 4.6718255930500163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471940 + }, + { + "epoch": 2.288872210899241, + "grad_norm": 4.695803852428071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471950 + }, + { + "epoch": 2.2889207090920776, + "grad_norm": 4.6186252689039975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471960 + }, + { + "epoch": 2.2889692072849135, + "grad_norm": 4.3992312726004457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471970 + }, + { + "epoch": 2.2890177054777494, + "grad_norm": 5.839017376274569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471980 + }, + { + "epoch": 2.2890662036705858, + "grad_norm": 4.325394513671199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 471990 + }, + { + "epoch": 2.2891147018634217, + "grad_norm": 4.3091264956274244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472000 + }, + { + "epoch": 2.289163200056258, + "grad_norm": 4.3140323668922065e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472010 + }, + { + "epoch": 2.289211698249094, + "grad_norm": 4.290699280318222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472020 + }, + { + "epoch": 2.28926019644193, + "grad_norm": 5.10190886870987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472030 + }, + { + "epoch": 2.2893086946347663, + "grad_norm": 4.0045102878139005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472040 + }, + { + "epoch": 2.289357192827602, + "grad_norm": 4.0410583324046456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472050 + }, + { + "epoch": 2.2894056910204386, + "grad_norm": 4.118905110317428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472060 + }, + { + "epoch": 2.2894541892132745, + "grad_norm": 4.088255707301869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472070 + }, + { + "epoch": 2.2895026874061104, + "grad_norm": 5.330899739419692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472080 + }, + { + "epoch": 2.2895511855989468, + "grad_norm": 4.13045540881285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472090 + }, + { + "epoch": 2.2895996837917827, + "grad_norm": 3.8762749454690493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472100 + }, + { + "epoch": 2.2896481819846186, + "grad_norm": 4.052170652357745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472110 + }, + { + "epoch": 2.289696680177455, + "grad_norm": 3.7302257283045037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472120 + }, + { + "epoch": 2.289745178370291, + "grad_norm": 4.985316763850278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472130 + }, + { + "epoch": 2.289793676563127, + "grad_norm": 3.7332989677452133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472140 + }, + { + "epoch": 2.289842174755963, + "grad_norm": 3.7442831057887815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472150 + }, + { + "epoch": 2.289890672948799, + "grad_norm": 3.588248205232958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472160 + }, + { + "epoch": 2.2899391711416355, + "grad_norm": 3.731861966116412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472170 + }, + { + "epoch": 2.2899876693344714, + "grad_norm": 4.7566703642587527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472180 + }, + { + "epoch": 2.2900361675273073, + "grad_norm": 3.5530601394384576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472190 + }, + { + "epoch": 2.2900846657201437, + "grad_norm": 3.582335637020151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472200 + }, + { + "epoch": 2.2901331639129796, + "grad_norm": 3.5041074397668126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472210 + }, + { + "epoch": 2.290181662105816, + "grad_norm": 3.5266688769297616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472220 + }, + { + "epoch": 2.290230160298652, + "grad_norm": 4.3513395553418377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472230 + }, + { + "epoch": 2.290278658491488, + "grad_norm": 3.466294060672226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472240 + }, + { + "epoch": 2.290327156684324, + "grad_norm": 3.310257170596742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472250 + }, + { + "epoch": 2.29037565487716, + "grad_norm": 3.1935775268721045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472260 + }, + { + "epoch": 2.2904241530699965, + "grad_norm": 3.8158989923431363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472270 + }, + { + "epoch": 2.2904726512628324, + "grad_norm": 4.035613585529063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472280 + }, + { + "epoch": 2.2905211494556683, + "grad_norm": 3.1190634786071314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472290 + }, + { + "epoch": 2.2905696476485047, + "grad_norm": 3.2066191124613397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472300 + }, + { + "epoch": 2.2906181458413406, + "grad_norm": 3.4653618286029086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472310 + }, + { + "epoch": 2.2906666440341765, + "grad_norm": 3.207163388196932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472320 + }, + { + "epoch": 2.290715142227013, + "grad_norm": 3.8148326098053076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472330 + }, + { + "epoch": 2.290763640419849, + "grad_norm": 3.1798043664821307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472340 + }, + { + "epoch": 2.2908121386126847, + "grad_norm": 2.9355135211517336e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472350 + }, + { + "epoch": 2.290860636805521, + "grad_norm": 3.0095554848230677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472360 + }, + { + "epoch": 2.290909134998357, + "grad_norm": 3.223956355213886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472370 + }, + { + "epoch": 2.2909576331911934, + "grad_norm": 3.6992818763792457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472380 + }, + { + "epoch": 2.2910061313840293, + "grad_norm": 2.8538053697957366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472390 + }, + { + "epoch": 2.2910546295768652, + "grad_norm": 2.9247013344502193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472400 + }, + { + "epoch": 2.2911031277697016, + "grad_norm": 2.9115804522916733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472410 + }, + { + "epoch": 2.2911516259625375, + "grad_norm": 3.0038137310839375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472420 + }, + { + "epoch": 2.291200124155374, + "grad_norm": 3.5973175727122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472430 + }, + { + "epoch": 2.29124862234821, + "grad_norm": 2.7606719754658116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472440 + }, + { + "epoch": 2.2912971205410457, + "grad_norm": 2.928812250502233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472450 + }, + { + "epoch": 2.291345618733882, + "grad_norm": 2.889948689244193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472460 + }, + { + "epoch": 2.291394116926718, + "grad_norm": 2.8068313895346364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472470 + }, + { + "epoch": 2.291442615119554, + "grad_norm": 3.560800507784734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472480 + }, + { + "epoch": 2.2914911133123903, + "grad_norm": 2.8337404955891543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472490 + }, + { + "epoch": 2.2915396115052262, + "grad_norm": 2.7817517889161536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472500 + }, + { + "epoch": 2.291588109698062, + "grad_norm": 4.203698154015001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472510 + }, + { + "epoch": 2.2916366078908985, + "grad_norm": 2.782392414246715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472520 + }, + { + "epoch": 2.2916851060837344, + "grad_norm": 3.429095727369713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472530 + }, + { + "epoch": 2.291733604276571, + "grad_norm": 2.7569936378313287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472540 + }, + { + "epoch": 2.2917821024694067, + "grad_norm": 2.687632445486088e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472550 + }, + { + "epoch": 2.2918306006622426, + "grad_norm": 2.607579006053129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472560 + }, + { + "epoch": 2.291879098855079, + "grad_norm": 2.746209872839245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472570 + }, + { + "epoch": 2.291927597047915, + "grad_norm": 3.0929200534046686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472580 + }, + { + "epoch": 2.2919760952407513, + "grad_norm": 2.645738561568578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472590 + }, + { + "epoch": 2.292024593433587, + "grad_norm": 2.6512617523621884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472600 + }, + { + "epoch": 2.292073091626423, + "grad_norm": 2.65509385144469e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472610 + }, + { + "epoch": 2.2921215898192595, + "grad_norm": 2.6489669835427776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472620 + }, + { + "epoch": 2.2921700880120954, + "grad_norm": 3.05744947581843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472630 + }, + { + "epoch": 2.2922185862049314, + "grad_norm": 2.5727132424435695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472640 + }, + { + "epoch": 2.2922670843977677, + "grad_norm": 2.547168378441711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472650 + }, + { + "epoch": 2.2923155825906036, + "grad_norm": 2.463765440552379e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472660 + }, + { + "epoch": 2.2923640807834396, + "grad_norm": 2.419238001039048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472670 + }, + { + "epoch": 2.292412578976276, + "grad_norm": 3.037872602362768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472680 + }, + { + "epoch": 2.292461077169112, + "grad_norm": 2.3480829725031072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472690 + }, + { + "epoch": 2.292509575361948, + "grad_norm": 2.476995462075138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472700 + }, + { + "epoch": 2.292558073554784, + "grad_norm": 2.340392200039787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472710 + }, + { + "epoch": 2.29260657174762, + "grad_norm": 2.4382092078667483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472720 + }, + { + "epoch": 2.2926550699404564, + "grad_norm": 2.838308432728809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472730 + }, + { + "epoch": 2.2927035681332923, + "grad_norm": 2.3537316451438528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472740 + }, + { + "epoch": 2.2927520663261287, + "grad_norm": 2.4278023147417116e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472750 + }, + { + "epoch": 2.2928005645189646, + "grad_norm": 2.284878064529039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472760 + }, + { + "epoch": 2.2928490627118006, + "grad_norm": 2.3107210722628224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472770 + }, + { + "epoch": 2.292897560904637, + "grad_norm": 2.966514500712947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472780 + }, + { + "epoch": 2.292946059097473, + "grad_norm": 2.3351547895344993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472790 + }, + { + "epoch": 2.292994557290309, + "grad_norm": 2.1985663067880523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472800 + }, + { + "epoch": 2.293043055483145, + "grad_norm": 2.2734755589226552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472810 + }, + { + "epoch": 2.293091553675981, + "grad_norm": 2.2537020072377345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472820 + }, + { + "epoch": 2.2931400518688174, + "grad_norm": 2.7993894491373794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472830 + }, + { + "epoch": 2.2931885500616533, + "grad_norm": 2.1734187782840309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472840 + }, + { + "epoch": 2.2932370482544893, + "grad_norm": 2.2139840893942164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472850 + }, + { + "epoch": 2.2932855464473256, + "grad_norm": 2.1955870010970102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472860 + }, + { + "epoch": 2.2933340446401616, + "grad_norm": 2.2594595350255986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472870 + }, + { + "epoch": 2.2933825428329975, + "grad_norm": 2.648722272624582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472880 + }, + { + "epoch": 2.293431041025834, + "grad_norm": 2.1856149601262587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472890 + }, + { + "epoch": 2.2934795392186698, + "grad_norm": 2.189593857337968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472900 + }, + { + "epoch": 2.293528037411506, + "grad_norm": 2.138920933703048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472910 + }, + { + "epoch": 2.293576535604342, + "grad_norm": 2.1357759294460266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472920 + }, + { + "epoch": 2.293625033797178, + "grad_norm": 2.6204705250165716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472930 + }, + { + "epoch": 2.2936735319900143, + "grad_norm": 2.1582296483302343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472940 + }, + { + "epoch": 2.2937220301828503, + "grad_norm": 2.0647158294195833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472950 + }, + { + "epoch": 2.2937705283756866, + "grad_norm": 2.0853920545960136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472960 + }, + { + "epoch": 2.2938190265685225, + "grad_norm": 2.0856749927133933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472970 + }, + { + "epoch": 2.2938675247613585, + "grad_norm": 2.5692170879665355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472980 + }, + { + "epoch": 2.293916022954195, + "grad_norm": 2.0340304729415948e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 472990 + }, + { + "epoch": 2.2939645211470308, + "grad_norm": 1.9920676663787162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473000 + }, + { + "epoch": 2.2940130193398667, + "grad_norm": 1.914918215106809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473010 + }, + { + "epoch": 2.294061517532703, + "grad_norm": 2.0159092173344106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473020 + }, + { + "epoch": 2.294110015725539, + "grad_norm": 2.5414377091692586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473030 + }, + { + "epoch": 2.294158513918375, + "grad_norm": 2.0049273530275968e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473040 + }, + { + "epoch": 2.2942070121112113, + "grad_norm": 1.989319997619532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473050 + }, + { + "epoch": 2.294255510304047, + "grad_norm": 1.9497541359214665e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473060 + }, + { + "epoch": 2.2943040084968835, + "grad_norm": 1.9274911267075368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473070 + }, + { + "epoch": 2.2943525066897195, + "grad_norm": 2.3904118506834493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473080 + }, + { + "epoch": 2.2944010048825554, + "grad_norm": 1.9667209016915876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473090 + }, + { + "epoch": 2.2944495030753917, + "grad_norm": 1.9229982228807785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473100 + }, + { + "epoch": 2.2944980012682277, + "grad_norm": 1.8777117816171085e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473110 + }, + { + "epoch": 2.294546499461064, + "grad_norm": 1.8200438489657245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473120 + }, + { + "epoch": 2.2945949976539, + "grad_norm": 2.4617961003059463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473130 + }, + { + "epoch": 2.294643495846736, + "grad_norm": 1.8845743454676267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473140 + }, + { + "epoch": 2.2946919940395722, + "grad_norm": 1.9242581572598283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473150 + }, + { + "epoch": 2.294740492232408, + "grad_norm": 1.816132311205365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473160 + }, + { + "epoch": 2.294788990425244, + "grad_norm": 1.7819067466007255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473170 + }, + { + "epoch": 2.2948374886180805, + "grad_norm": 2.3127385873067396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473180 + }, + { + "epoch": 2.2948859868109164, + "grad_norm": 1.742853186215143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473190 + }, + { + "epoch": 2.2949344850037523, + "grad_norm": 1.7851606060048653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473200 + }, + { + "epoch": 2.2949829831965887, + "grad_norm": 1.7285185549553717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473210 + }, + { + "epoch": 2.2950314813894246, + "grad_norm": 1.7730459944687027e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473220 + }, + { + "epoch": 2.295079979582261, + "grad_norm": 2.2092997653544444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473230 + }, + { + "epoch": 2.295128477775097, + "grad_norm": 1.751620715140234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473240 + }, + { + "epoch": 2.295176975967933, + "grad_norm": 1.771794586602482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473250 + }, + { + "epoch": 2.295225474160769, + "grad_norm": 1.6830732363359857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473260 + }, + { + "epoch": 2.295273972353605, + "grad_norm": 1.6165618887953315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473270 + }, + { + "epoch": 2.2953224705464415, + "grad_norm": 2.1856061493963352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473280 + }, + { + "epoch": 2.2953709687392774, + "grad_norm": 1.6362157850835501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473290 + }, + { + "epoch": 2.2954194669321133, + "grad_norm": 1.7157594811578747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473300 + }, + { + "epoch": 2.2954679651249497, + "grad_norm": 1.719287752166565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473310 + }, + { + "epoch": 2.2955164633177856, + "grad_norm": 1.6588788298577128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473320 + }, + { + "epoch": 2.295564961510622, + "grad_norm": 2.1207722511462634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473330 + }, + { + "epoch": 2.295613459703458, + "grad_norm": 1.6774377797901252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473340 + }, + { + "epoch": 2.295661957896294, + "grad_norm": 1.647151890438181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473350 + }, + { + "epoch": 2.29571045608913, + "grad_norm": 1.582482411777164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473360 + }, + { + "epoch": 2.295758954281966, + "grad_norm": 1.6143171421845182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473370 + }, + { + "epoch": 2.295807452474802, + "grad_norm": 2.1036318287315225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473380 + }, + { + "epoch": 2.2958559506676384, + "grad_norm": 1.563095395340497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473390 + }, + { + "epoch": 2.2959044488604743, + "grad_norm": 1.542275498422896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473400 + }, + { + "epoch": 2.29595294705331, + "grad_norm": 1.512923830659929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473410 + }, + { + "epoch": 2.2960014452461466, + "grad_norm": 1.587964106875006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473420 + }, + { + "epoch": 2.2960499434389825, + "grad_norm": 2.036095594348808e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473430 + }, + { + "epoch": 2.296098441631819, + "grad_norm": 1.5660174312870367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473440 + }, + { + "epoch": 2.296146939824655, + "grad_norm": 1.4680313142889645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473450 + }, + { + "epoch": 2.2961954380174907, + "grad_norm": 1.470433232952928e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473460 + }, + { + "epoch": 2.296243936210327, + "grad_norm": 1.5433754185778525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473470 + }, + { + "epoch": 2.296292434403163, + "grad_norm": 2.018103515410985e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473480 + }, + { + "epoch": 2.2963409325959994, + "grad_norm": 1.413487069612529e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473490 + }, + { + "epoch": 2.2963894307888353, + "grad_norm": 1.520111538866331e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473500 + }, + { + "epoch": 2.296437928981671, + "grad_norm": 1.42799237323743e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473510 + }, + { + "epoch": 2.2964864271745076, + "grad_norm": 1.4341586052069033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473520 + }, + { + "epoch": 2.2965349253673435, + "grad_norm": 1.9308330934109108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473530 + }, + { + "epoch": 2.2965834235601794, + "grad_norm": 1.4548390936397482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473540 + }, + { + "epoch": 2.296631921753016, + "grad_norm": 1.3412093835540873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473550 + }, + { + "epoch": 2.2966804199458517, + "grad_norm": 1.3798289444366674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473560 + }, + { + "epoch": 2.2967289181386876, + "grad_norm": 1.4060398711990274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473570 + }, + { + "epoch": 2.296777416331524, + "grad_norm": 1.8115296995802055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473580 + }, + { + "epoch": 2.29682591452436, + "grad_norm": 1.3575389345987787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473590 + }, + { + "epoch": 2.2968744127171963, + "grad_norm": 1.3475394666784268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473600 + }, + { + "epoch": 2.296922910910032, + "grad_norm": 1.352246385977196e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473610 + }, + { + "epoch": 2.296971409102868, + "grad_norm": 1.397642250822173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473620 + }, + { + "epoch": 2.2970199072957045, + "grad_norm": 1.800795956796719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473630 + }, + { + "epoch": 2.2970684054885404, + "grad_norm": 1.3938657161816081e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473640 + }, + { + "epoch": 2.2971169036813768, + "grad_norm": 1.3032406798174634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473650 + }, + { + "epoch": 2.2971654018742127, + "grad_norm": 1.3030118850565486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473660 + }, + { + "epoch": 2.2972139000670486, + "grad_norm": 1.28920703446056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473670 + }, + { + "epoch": 2.297262398259885, + "grad_norm": 1.8655680378287798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473680 + }, + { + "epoch": 2.297310896452721, + "grad_norm": 1.3539154508634965e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473690 + }, + { + "epoch": 2.297359394645557, + "grad_norm": 1.4337446430090495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473700 + }, + { + "epoch": 2.297407892838393, + "grad_norm": 1.2480919053814432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473710 + }, + { + "epoch": 2.297456391031229, + "grad_norm": 1.2474865229705756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473720 + }, + { + "epoch": 2.297504889224065, + "grad_norm": 1.6853887530032807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473730 + }, + { + "epoch": 2.2975533874169014, + "grad_norm": 1.2165993723556312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473740 + }, + { + "epoch": 2.2976018856097373, + "grad_norm": 1.2168642626875226e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473750 + }, + { + "epoch": 2.2976503838025737, + "grad_norm": 1.2141576632984652e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473760 + }, + { + "epoch": 2.2976988819954096, + "grad_norm": 1.1809442668209158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473770 + }, + { + "epoch": 2.2977473801882455, + "grad_norm": 1.6077133579983638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473780 + }, + { + "epoch": 2.297795878381082, + "grad_norm": 1.1883946626767283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473790 + }, + { + "epoch": 2.297844376573918, + "grad_norm": 1.1702891100640045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473800 + }, + { + "epoch": 2.297892874766754, + "grad_norm": 1.1963184931573778e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473810 + }, + { + "epoch": 2.29794137295959, + "grad_norm": 1.1055099946588598e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473820 + }, + { + "epoch": 2.297989871152426, + "grad_norm": 1.5527969310369372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473830 + }, + { + "epoch": 2.2980383693452624, + "grad_norm": 1.1681916589623142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473840 + }, + { + "epoch": 2.2980868675380983, + "grad_norm": 1.1566516633365609e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473850 + }, + { + "epoch": 2.2981353657309347, + "grad_norm": 1.095669048822856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473860 + }, + { + "epoch": 2.2981838639237706, + "grad_norm": 1.196454206819908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473870 + }, + { + "epoch": 2.2982323621166065, + "grad_norm": 1.5464537739262596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473880 + }, + { + "epoch": 2.298280860309443, + "grad_norm": 1.0992901877671102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473890 + }, + { + "epoch": 2.298329358502279, + "grad_norm": 1.1118937237597493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473900 + }, + { + "epoch": 2.2983778566951147, + "grad_norm": 1.1446955028304728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473910 + }, + { + "epoch": 2.298426354887951, + "grad_norm": 1.0623750767990714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473920 + }, + { + "epoch": 2.298474853080787, + "grad_norm": 1.4457194197348144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473930 + }, + { + "epoch": 2.298523351273623, + "grad_norm": 1.0743136868995862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473940 + }, + { + "epoch": 2.2985718494664593, + "grad_norm": 1.0481070233936407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473950 + }, + { + "epoch": 2.2986203476592952, + "grad_norm": 1.069899369099403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473960 + }, + { + "epoch": 2.2986688458521316, + "grad_norm": 1.1169039026981409e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473970 + }, + { + "epoch": 2.2987173440449675, + "grad_norm": 1.3931511944065278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473980 + }, + { + "epoch": 2.2987658422378034, + "grad_norm": 1.1161954205363145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 473990 + }, + { + "epoch": 2.29881434043064, + "grad_norm": 1.0288321306006765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474000 + }, + { + "epoch": 2.2988628386234757, + "grad_norm": 1.0689068830060933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474010 + }, + { + "epoch": 2.298911336816312, + "grad_norm": 1.0224883340015367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474020 + }, + { + "epoch": 2.298959835009148, + "grad_norm": 1.3840498525041767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474030 + }, + { + "epoch": 2.299008333201984, + "grad_norm": 9.88963293480083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474040 + }, + { + "epoch": 2.2990568313948203, + "grad_norm": 9.751160234827694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474050 + }, + { + "epoch": 2.2991053295876562, + "grad_norm": 9.613534501795584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474060 + }, + { + "epoch": 2.299153827780492, + "grad_norm": 1.0293619823187328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474070 + }, + { + "epoch": 2.2992023259733285, + "grad_norm": 1.3694454992219107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474080 + }, + { + "epoch": 2.2992508241661644, + "grad_norm": 9.550734603180899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474090 + }, + { + "epoch": 2.2992993223590004, + "grad_norm": 9.709818726832964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474100 + }, + { + "epoch": 2.2993478205518367, + "grad_norm": 9.38942577022317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474110 + }, + { + "epoch": 2.2993963187446727, + "grad_norm": 9.795353861363765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474120 + }, + { + "epoch": 2.299444816937509, + "grad_norm": 1.3569429313520232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474130 + }, + { + "epoch": 2.299493315130345, + "grad_norm": 9.217020391361075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474140 + }, + { + "epoch": 2.299541813323181, + "grad_norm": 9.397395928090191e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474150 + }, + { + "epoch": 2.2995903115160172, + "grad_norm": 9.421071212045717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474160 + }, + { + "epoch": 2.299638809708853, + "grad_norm": 9.128148548143145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474170 + }, + { + "epoch": 2.2996873079016895, + "grad_norm": 1.2821836037346657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474180 + }, + { + "epoch": 2.2997358060945254, + "grad_norm": 9.616012874857915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474190 + }, + { + "epoch": 2.2997843042873614, + "grad_norm": 9.605079753782775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474200 + }, + { + "epoch": 2.2998328024801977, + "grad_norm": 9.304037718038671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474210 + }, + { + "epoch": 2.2998813006730336, + "grad_norm": 9.382365817600657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474220 + }, + { + "epoch": 2.2999297988658696, + "grad_norm": 1.1902441343636383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474230 + }, + { + "epoch": 2.299978297058706, + "grad_norm": 8.980494925481253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474240 + }, + { + "epoch": 2.300026795251542, + "grad_norm": 8.980074284181683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474250 + }, + { + "epoch": 2.300075293444378, + "grad_norm": 8.82954509506817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474260 + }, + { + "epoch": 2.300123791637214, + "grad_norm": 9.132048717219732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474270 + }, + { + "epoch": 2.30017228983005, + "grad_norm": 1.1892890228182296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474280 + }, + { + "epoch": 2.3002207880228864, + "grad_norm": 9.070806328281833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474290 + }, + { + "epoch": 2.3002692862157224, + "grad_norm": 9.082060614673537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474300 + }, + { + "epoch": 2.3003177844085583, + "grad_norm": 9.060087791112892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474310 + }, + { + "epoch": 2.3003662826013946, + "grad_norm": 8.831948861143246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474320 + }, + { + "epoch": 2.3004147807942306, + "grad_norm": 1.1138236288843473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474330 + }, + { + "epoch": 2.300463278987067, + "grad_norm": 9.249129817590074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474340 + }, + { + "epoch": 2.300511777179903, + "grad_norm": 9.004410372881466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474350 + }, + { + "epoch": 2.3005602753727388, + "grad_norm": 8.509522331223707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474360 + }, + { + "epoch": 2.300608773565575, + "grad_norm": 8.600053291729637e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474370 + }, + { + "epoch": 2.300657271758411, + "grad_norm": 1.1530314480978632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474380 + }, + { + "epoch": 2.3007057699512474, + "grad_norm": 8.471496926176769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474390 + }, + { + "epoch": 2.3007542681440833, + "grad_norm": 8.338375323546643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474400 + }, + { + "epoch": 2.3008027663369193, + "grad_norm": 8.947020546656859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474410 + }, + { + "epoch": 2.3008512645297556, + "grad_norm": 8.312206745131334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474420 + }, + { + "epoch": 2.3008997627225916, + "grad_norm": 1.0972784281193526e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474430 + }, + { + "epoch": 2.3009482609154275, + "grad_norm": 8.385138272615222e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474440 + }, + { + "epoch": 2.300996759108264, + "grad_norm": 8.323307554292114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474450 + }, + { + "epoch": 2.3010452573010998, + "grad_norm": 8.024498754366505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474460 + }, + { + "epoch": 2.3010937554939357, + "grad_norm": 8.306034260385786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474470 + }, + { + "epoch": 2.301142253686772, + "grad_norm": 1.0348114898306449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474480 + }, + { + "epoch": 2.301190751879608, + "grad_norm": 7.97133168362052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474490 + }, + { + "epoch": 2.3012392500724443, + "grad_norm": 8.239029369860873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474500 + }, + { + "epoch": 2.3012877482652803, + "grad_norm": 8.465477918662145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474510 + }, + { + "epoch": 2.301336246458116, + "grad_norm": 8.067912915521447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474520 + }, + { + "epoch": 2.3013847446509526, + "grad_norm": 1.0391826066324938e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474530 + }, + { + "epoch": 2.3014332428437885, + "grad_norm": 7.9344729897457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474540 + }, + { + "epoch": 2.301481741036625, + "grad_norm": 8.077301316689045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474550 + }, + { + "epoch": 2.3015302392294608, + "grad_norm": 8.153453023851398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474560 + }, + { + "epoch": 2.3015787374222967, + "grad_norm": 7.867500073643896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474570 + }, + { + "epoch": 2.301627235615133, + "grad_norm": 9.678582557626214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474580 + }, + { + "epoch": 2.301675733807969, + "grad_norm": 7.957294911875579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474590 + }, + { + "epoch": 2.301724232000805, + "grad_norm": 7.612048591454368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474600 + }, + { + "epoch": 2.3017727301936413, + "grad_norm": 7.694199410934743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474610 + }, + { + "epoch": 2.301821228386477, + "grad_norm": 8.03011204197901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474620 + }, + { + "epoch": 2.301869726579313, + "grad_norm": 9.859413552248952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474630 + }, + { + "epoch": 2.3019182247721495, + "grad_norm": 7.553754954869873e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474640 + }, + { + "epoch": 2.3019667229649854, + "grad_norm": 7.660705136913748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474650 + }, + { + "epoch": 2.3020152211578218, + "grad_norm": 7.436842963670642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474660 + }, + { + "epoch": 2.3020637193506577, + "grad_norm": 7.610618979470019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474670 + }, + { + "epoch": 2.3021122175434936, + "grad_norm": 9.403174061617392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474680 + }, + { + "epoch": 2.30216071573633, + "grad_norm": 7.429098047850857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474690 + }, + { + "epoch": 2.302209213929166, + "grad_norm": 7.41565386874754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474700 + }, + { + "epoch": 2.3022577121220023, + "grad_norm": 7.572517546350355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474710 + }, + { + "epoch": 2.302306210314838, + "grad_norm": 7.553656899972339e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474720 + }, + { + "epoch": 2.302354708507674, + "grad_norm": 9.047936799788658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474730 + }, + { + "epoch": 2.3024032067005105, + "grad_norm": 7.790666955997949e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474740 + }, + { + "epoch": 2.3024517048933464, + "grad_norm": 7.202964980024262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474750 + }, + { + "epoch": 2.3025002030861828, + "grad_norm": 7.365201781794894e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474760 + }, + { + "epoch": 2.3025487012790187, + "grad_norm": 7.503176391310262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474770 + }, + { + "epoch": 2.3025971994718546, + "grad_norm": 8.946352636485244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474780 + }, + { + "epoch": 2.302645697664691, + "grad_norm": 7.389471790020252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474790 + }, + { + "epoch": 2.302694195857527, + "grad_norm": 7.237195376319505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474800 + }, + { + "epoch": 2.302742694050363, + "grad_norm": 7.177124672352875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474810 + }, + { + "epoch": 2.302791192243199, + "grad_norm": 7.419016867515893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474820 + }, + { + "epoch": 2.302839690436035, + "grad_norm": 8.859728239940523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474830 + }, + { + "epoch": 2.302888188628871, + "grad_norm": 7.206852359331606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474840 + }, + { + "epoch": 2.3029366868217074, + "grad_norm": 7.201325757932864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474850 + }, + { + "epoch": 2.3029851850145433, + "grad_norm": 7.127302126264112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474860 + }, + { + "epoch": 2.3030336832073797, + "grad_norm": 7.0061503265606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474870 + }, + { + "epoch": 2.3030821814002156, + "grad_norm": 8.360023429077046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474880 + }, + { + "epoch": 2.3031306795930515, + "grad_norm": 7.199913909516908e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474890 + }, + { + "epoch": 2.303179177785888, + "grad_norm": 8.585693223039925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474900 + }, + { + "epoch": 2.303227675978724, + "grad_norm": 6.858587653368886e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474910 + }, + { + "epoch": 2.30327617417156, + "grad_norm": 7.21648945045672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474920 + }, + { + "epoch": 2.303324672364396, + "grad_norm": 8.552132868544504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474930 + }, + { + "epoch": 2.303373170557232, + "grad_norm": 6.953334263926081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474940 + }, + { + "epoch": 2.3034216687500684, + "grad_norm": 6.978206101848627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474950 + }, + { + "epoch": 2.3034701669429043, + "grad_norm": 6.904978278043927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474960 + }, + { + "epoch": 2.30351866513574, + "grad_norm": 6.814353525896877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474970 + }, + { + "epoch": 2.3035671633285766, + "grad_norm": 8.46143493049567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474980 + }, + { + "epoch": 2.3036156615214125, + "grad_norm": 6.964150145449821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 474990 + }, + { + "epoch": 2.3036641597142484, + "grad_norm": 6.937516872085325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475000 + }, + { + "epoch": 2.303712657907085, + "grad_norm": 8.092850833918419e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475010 + }, + { + "epoch": 2.3037611560999207, + "grad_norm": 6.720151191075274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475020 + }, + { + "epoch": 2.303809654292757, + "grad_norm": 8.059215872435743e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475030 + }, + { + "epoch": 2.303858152485593, + "grad_norm": 6.84770569137072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475040 + }, + { + "epoch": 2.303906650678429, + "grad_norm": 6.891671233688612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475050 + }, + { + "epoch": 2.3039551488712653, + "grad_norm": 6.713123923418607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475060 + }, + { + "epoch": 2.304003647064101, + "grad_norm": 6.678212827182506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475070 + }, + { + "epoch": 2.3040521452569376, + "grad_norm": 8.05901692046973e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475080 + }, + { + "epoch": 2.3041006434497735, + "grad_norm": 6.582797595910961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475090 + }, + { + "epoch": 2.3041491416426094, + "grad_norm": 6.588493306480814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475100 + }, + { + "epoch": 2.304197639835446, + "grad_norm": 6.867018953471415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475110 + }, + { + "epoch": 2.3042461380282817, + "grad_norm": 6.525763751596969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475120 + }, + { + "epoch": 2.3042946362211176, + "grad_norm": 7.782739430695074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475130 + }, + { + "epoch": 2.304343134413954, + "grad_norm": 6.479248071400434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475140 + }, + { + "epoch": 2.30439163260679, + "grad_norm": 6.590085632751652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475150 + }, + { + "epoch": 2.304440130799626, + "grad_norm": 6.691946197179277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475160 + }, + { + "epoch": 2.304488628992462, + "grad_norm": 6.535092467174763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475170 + }, + { + "epoch": 2.304537127185298, + "grad_norm": 7.931777190606226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475180 + }, + { + "epoch": 2.3045856253781345, + "grad_norm": 6.492519588618961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475190 + }, + { + "epoch": 2.3046341235709704, + "grad_norm": 6.318641965208371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475200 + }, + { + "epoch": 2.3046826217638063, + "grad_norm": 6.32199217420748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475210 + }, + { + "epoch": 2.3047311199566427, + "grad_norm": 6.533327479019135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475220 + }, + { + "epoch": 2.3047796181494786, + "grad_norm": 7.818158564987243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475230 + }, + { + "epoch": 2.304828116342315, + "grad_norm": 6.34757739703673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475240 + }, + { + "epoch": 2.304876614535151, + "grad_norm": 6.606672542375236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475250 + }, + { + "epoch": 2.304925112727987, + "grad_norm": 6.525744566943104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475260 + }, + { + "epoch": 2.304973610920823, + "grad_norm": 6.551795905807012e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475270 + }, + { + "epoch": 2.305022109113659, + "grad_norm": 7.617087760536378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475280 + }, + { + "epoch": 2.3050706073064955, + "grad_norm": 6.265935326155159e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475290 + }, + { + "epoch": 2.3051191054993314, + "grad_norm": 6.387679007957558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475300 + }, + { + "epoch": 2.3051676036921673, + "grad_norm": 6.897097648561612e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475310 + }, + { + "epoch": 2.3052161018850037, + "grad_norm": 6.243519834470135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475320 + }, + { + "epoch": 2.3052646000778396, + "grad_norm": 7.349334651962636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475330 + }, + { + "epoch": 2.3053130982706755, + "grad_norm": 6.296337318190126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475340 + }, + { + "epoch": 2.305361596463512, + "grad_norm": 6.336296110021067e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475350 + }, + { + "epoch": 2.305410094656348, + "grad_norm": 6.217715053935535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475360 + }, + { + "epoch": 2.3054585928491838, + "grad_norm": 6.184729528513344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475370 + }, + { + "epoch": 2.30550709104202, + "grad_norm": 7.19065127441354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475380 + }, + { + "epoch": 2.305555589234856, + "grad_norm": 6.110154515681643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475390 + }, + { + "epoch": 2.3056040874276924, + "grad_norm": 6.207528713275678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475400 + }, + { + "epoch": 2.3056525856205283, + "grad_norm": 6.249504025390706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475410 + }, + { + "epoch": 2.3057010838133642, + "grad_norm": 6.092835747040226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475420 + }, + { + "epoch": 2.3057495820062006, + "grad_norm": 7.126370604737531e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475430 + }, + { + "epoch": 2.3057980801990365, + "grad_norm": 6.227666204949855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475440 + }, + { + "epoch": 2.305846578391873, + "grad_norm": 6.159583421094794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475450 + }, + { + "epoch": 2.305895076584709, + "grad_norm": 6.001717167691822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475460 + }, + { + "epoch": 2.3059435747775447, + "grad_norm": 5.857710760892587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475470 + }, + { + "epoch": 2.305992072970381, + "grad_norm": 7.028307180689808e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475480 + }, + { + "epoch": 2.306040571163217, + "grad_norm": 6.467588065106611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475490 + }, + { + "epoch": 2.306089069356053, + "grad_norm": 5.8825150972552365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475500 + }, + { + "epoch": 2.3061375675488893, + "grad_norm": 6.007343245073571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475510 + }, + { + "epoch": 2.3061860657417252, + "grad_norm": 5.9163625110159046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475520 + }, + { + "epoch": 2.306234563934561, + "grad_norm": 7.060243945034017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475530 + }, + { + "epoch": 2.3062830621273975, + "grad_norm": 5.8875887276599315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475540 + }, + { + "epoch": 2.3063315603202335, + "grad_norm": 5.844977835067766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475550 + }, + { + "epoch": 2.30638005851307, + "grad_norm": 6.016931308749918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475560 + }, + { + "epoch": 2.3064285567059057, + "grad_norm": 8.563745268475031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475570 + }, + { + "epoch": 2.3064770548987417, + "grad_norm": 7.265668244826884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475580 + }, + { + "epoch": 2.306525553091578, + "grad_norm": 5.7238789707980686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475590 + }, + { + "epoch": 2.306574051284414, + "grad_norm": 5.8946817205196567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475600 + }, + { + "epoch": 2.3066225494772503, + "grad_norm": 5.837803840336164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475610 + }, + { + "epoch": 2.3066710476700862, + "grad_norm": 5.8556480553306756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475620 + }, + { + "epoch": 2.306719545862922, + "grad_norm": 6.901252902480337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475630 + }, + { + "epoch": 2.3067680440557585, + "grad_norm": 5.660983504185424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475640 + }, + { + "epoch": 2.3068165422485944, + "grad_norm": 5.69595073329765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475650 + }, + { + "epoch": 2.3068650404414304, + "grad_norm": 5.959926241416724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475660 + }, + { + "epoch": 2.3069135386342667, + "grad_norm": 5.623075338689887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475670 + }, + { + "epoch": 2.3069620368271027, + "grad_norm": 6.653610284956812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475680 + }, + { + "epoch": 2.3070105350199386, + "grad_norm": 5.5712948920927374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475690 + }, + { + "epoch": 2.307059033212775, + "grad_norm": 5.4889703449134686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475700 + }, + { + "epoch": 2.307107531405611, + "grad_norm": 5.738598574112075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475710 + }, + { + "epoch": 2.3071560295984472, + "grad_norm": 5.6807348158827153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475720 + }, + { + "epoch": 2.307204527791283, + "grad_norm": 6.806638452871994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475730 + }, + { + "epoch": 2.307253025984119, + "grad_norm": 5.6543765225569587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475740 + }, + { + "epoch": 2.3073015241769554, + "grad_norm": 5.6293021799547205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475750 + }, + { + "epoch": 2.3073500223697914, + "grad_norm": 5.558954185858056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475760 + }, + { + "epoch": 2.3073985205626277, + "grad_norm": 5.6321219688015844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475770 + }, + { + "epoch": 2.3074470187554637, + "grad_norm": 6.531865182068941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475780 + }, + { + "epoch": 2.3074955169482996, + "grad_norm": 5.76408396568695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475790 + }, + { + "epoch": 2.307544015141136, + "grad_norm": 5.585196660717884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475800 + }, + { + "epoch": 2.307592513333972, + "grad_norm": 5.543538605934373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475810 + }, + { + "epoch": 2.3076410115268082, + "grad_norm": 5.4013334249702893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475820 + }, + { + "epoch": 2.307689509719644, + "grad_norm": 6.68427375671854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475830 + }, + { + "epoch": 2.30773800791248, + "grad_norm": 5.635017430449807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475840 + }, + { + "epoch": 2.3077865061053164, + "grad_norm": 5.703811822854732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475850 + }, + { + "epoch": 2.3078350042981524, + "grad_norm": 5.31801020997591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475860 + }, + { + "epoch": 2.3078835024909883, + "grad_norm": 5.3128626831266956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475870 + }, + { + "epoch": 2.3079320006838246, + "grad_norm": 6.451086420611318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475880 + }, + { + "epoch": 2.3079804988766606, + "grad_norm": 5.554810655894471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475890 + }, + { + "epoch": 2.3080289970694965, + "grad_norm": 5.332611507924412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475900 + }, + { + "epoch": 2.308077495262333, + "grad_norm": 5.412731951537353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475910 + }, + { + "epoch": 2.308125993455169, + "grad_norm": 5.563292759802607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475920 + }, + { + "epoch": 2.308174491648005, + "grad_norm": 6.59315944062655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475930 + }, + { + "epoch": 2.308222989840841, + "grad_norm": 5.440175598891983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475940 + }, + { + "epoch": 2.308271488033677, + "grad_norm": 5.312180206828998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475950 + }, + { + "epoch": 2.3083199862265134, + "grad_norm": 5.4318515907425535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475960 + }, + { + "epoch": 2.3083684844193493, + "grad_norm": 5.252452339732372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475970 + }, + { + "epoch": 2.3084169826121856, + "grad_norm": 6.50596518880775e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475980 + }, + { + "epoch": 2.3084654808050216, + "grad_norm": 5.165518857097595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 475990 + }, + { + "epoch": 2.3085139789978575, + "grad_norm": 5.402806735332888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476000 + }, + { + "epoch": 2.308562477190694, + "grad_norm": 5.192431373757245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476010 + }, + { + "epoch": 2.3086109753835298, + "grad_norm": 5.2649351545142054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476020 + }, + { + "epoch": 2.3086594735763657, + "grad_norm": 6.343698544242216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476030 + }, + { + "epoch": 2.308707971769202, + "grad_norm": 5.315686735229974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476040 + }, + { + "epoch": 2.308756469962038, + "grad_norm": 5.167122552052206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476050 + }, + { + "epoch": 2.308804968154874, + "grad_norm": 5.143110826111297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476060 + }, + { + "epoch": 2.3088534663477103, + "grad_norm": 5.228648802813041e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476070 + }, + { + "epoch": 2.308901964540546, + "grad_norm": 6.364202675968045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476080 + }, + { + "epoch": 2.3089504627333826, + "grad_norm": 5.247292023113914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476090 + }, + { + "epoch": 2.3089989609262185, + "grad_norm": 5.241554745794019e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476100 + }, + { + "epoch": 2.3090474591190544, + "grad_norm": 5.0451280486640826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476110 + }, + { + "epoch": 2.3090959573118908, + "grad_norm": 5.177816220225395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476120 + }, + { + "epoch": 2.3091444555047267, + "grad_norm": 6.046416700655755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476130 + }, + { + "epoch": 2.309192953697563, + "grad_norm": 5.1670969725137184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476140 + }, + { + "epoch": 2.309241451890399, + "grad_norm": 4.9841681004636484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476150 + }, + { + "epoch": 2.309289950083235, + "grad_norm": 4.9331802642882394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476160 + }, + { + "epoch": 2.3093384482760713, + "grad_norm": 4.883395376964472e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476170 + }, + { + "epoch": 2.309386946468907, + "grad_norm": 6.236481198129695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476180 + }, + { + "epoch": 2.309435444661743, + "grad_norm": 5.026497262861085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476190 + }, + { + "epoch": 2.3094839428545795, + "grad_norm": 5.3434497715443285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476200 + }, + { + "epoch": 2.3095324410474154, + "grad_norm": 4.984569557109353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476210 + }, + { + "epoch": 2.3095809392402513, + "grad_norm": 5.039395745143338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476220 + }, + { + "epoch": 2.3096294374330877, + "grad_norm": 5.907489963874468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476230 + }, + { + "epoch": 2.3096779356259236, + "grad_norm": 4.922469187818024e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476240 + }, + { + "epoch": 2.30972643381876, + "grad_norm": 5.159575167112962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476250 + }, + { + "epoch": 2.309774932011596, + "grad_norm": 4.8987043754777915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476260 + }, + { + "epoch": 2.309823430204432, + "grad_norm": 6.641067784585175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476270 + }, + { + "epoch": 2.309871928397268, + "grad_norm": 5.724571749965435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476280 + }, + { + "epoch": 2.309920426590104, + "grad_norm": 5.0103331261652784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476290 + }, + { + "epoch": 2.3099689247829405, + "grad_norm": 4.839643352738676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476300 + }, + { + "epoch": 2.3100174229757764, + "grad_norm": 4.873464831689489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476310 + }, + { + "epoch": 2.3100659211686123, + "grad_norm": 4.8375991212878944e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476320 + }, + { + "epoch": 2.3101144193614487, + "grad_norm": 5.810315784060549e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476330 + }, + { + "epoch": 2.3101629175542846, + "grad_norm": 4.938080522265409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476340 + }, + { + "epoch": 2.310211415747121, + "grad_norm": 5.0383306415824336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476350 + }, + { + "epoch": 2.310259913939957, + "grad_norm": 4.787303353737116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476360 + }, + { + "epoch": 2.310308412132793, + "grad_norm": 4.7358625465676596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476370 + }, + { + "epoch": 2.310356910325629, + "grad_norm": 5.808361080994473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476380 + }, + { + "epoch": 2.310405408518465, + "grad_norm": 4.676132903114194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476390 + }, + { + "epoch": 2.310453906711301, + "grad_norm": 4.767538186456477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476400 + }, + { + "epoch": 2.3105024049041374, + "grad_norm": 4.8017017917345584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476410 + }, + { + "epoch": 2.3105509030969733, + "grad_norm": 4.746561899082735e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476420 + }, + { + "epoch": 2.3105994012898092, + "grad_norm": 5.656840684764575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476430 + }, + { + "epoch": 2.3106478994826456, + "grad_norm": 4.602729930525129e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476440 + }, + { + "epoch": 2.3106963976754815, + "grad_norm": 4.8122572593456425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476450 + }, + { + "epoch": 2.310744895868318, + "grad_norm": 4.678745568753584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476460 + }, + { + "epoch": 2.310793394061154, + "grad_norm": 4.6758419358639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476470 + }, + { + "epoch": 2.3108418922539897, + "grad_norm": 5.465878771815369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476480 + }, + { + "epoch": 2.310890390446826, + "grad_norm": 4.557878341415744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476490 + }, + { + "epoch": 2.310938888639662, + "grad_norm": 4.640171269443272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476500 + }, + { + "epoch": 2.3109873868324984, + "grad_norm": 4.543128895306836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476510 + }, + { + "epoch": 2.3110358850253343, + "grad_norm": 4.627312577554221e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476520 + }, + { + "epoch": 2.3110843832181702, + "grad_norm": 5.7935885422466527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476530 + }, + { + "epoch": 2.3111328814110066, + "grad_norm": 4.806106801424903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476540 + }, + { + "epoch": 2.3111813796038425, + "grad_norm": 4.5951967564406004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476550 + }, + { + "epoch": 2.3112298777966784, + "grad_norm": 4.54293385132587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476560 + }, + { + "epoch": 2.311278375989515, + "grad_norm": 4.557402988325521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476570 + }, + { + "epoch": 2.3113268741823507, + "grad_norm": 5.449571460758307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476580 + }, + { + "epoch": 2.3113753723751866, + "grad_norm": 4.5022442662912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476590 + }, + { + "epoch": 2.311423870568023, + "grad_norm": 4.446763313126212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476600 + }, + { + "epoch": 2.311472368760859, + "grad_norm": 4.481665172306748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476610 + }, + { + "epoch": 2.3115208669536953, + "grad_norm": 4.512150653113167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476620 + }, + { + "epoch": 2.311569365146531, + "grad_norm": 5.1363315378694097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476630 + }, + { + "epoch": 2.311617863339367, + "grad_norm": 4.544920173543687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476640 + }, + { + "epoch": 2.3116663615322035, + "grad_norm": 4.670235753678753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476650 + }, + { + "epoch": 2.3117148597250394, + "grad_norm": 4.418024701635659e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476660 + }, + { + "epoch": 2.311763357917876, + "grad_norm": 4.412316911839298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476670 + }, + { + "epoch": 2.3118118561107117, + "grad_norm": 5.3088527351974335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476680 + }, + { + "epoch": 2.3118603543035476, + "grad_norm": 4.39148415409818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476690 + }, + { + "epoch": 2.311908852496384, + "grad_norm": 4.517648832802479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476700 + }, + { + "epoch": 2.31195735068922, + "grad_norm": 4.352602545054651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476710 + }, + { + "epoch": 2.312005848882056, + "grad_norm": 4.4790098741032125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476720 + }, + { + "epoch": 2.312054347074892, + "grad_norm": 5.163391847418097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476730 + }, + { + "epoch": 2.312102845267728, + "grad_norm": 4.348380500118765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476740 + }, + { + "epoch": 2.312151343460564, + "grad_norm": 4.3597488286195585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476750 + }, + { + "epoch": 2.3121998416534004, + "grad_norm": 4.320359892062697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476760 + }, + { + "epoch": 2.3122483398462363, + "grad_norm": 4.313614709872127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476770 + }, + { + "epoch": 2.3122968380390727, + "grad_norm": 5.0412182872605626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476780 + }, + { + "epoch": 2.3123453362319086, + "grad_norm": 4.246426854592755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476790 + }, + { + "epoch": 2.3123938344247446, + "grad_norm": 4.193207203684324e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476800 + }, + { + "epoch": 2.312442332617581, + "grad_norm": 4.310318146849568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476810 + }, + { + "epoch": 2.312490830810417, + "grad_norm": 4.2987824855345025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476820 + }, + { + "epoch": 2.312539329003253, + "grad_norm": 5.00205246112273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476830 + }, + { + "epoch": 2.312587827196089, + "grad_norm": 4.334224712465584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476840 + }, + { + "epoch": 2.312636325388925, + "grad_norm": 4.163127087508656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476850 + }, + { + "epoch": 2.3126848235817614, + "grad_norm": 4.2891326046401446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476860 + }, + { + "epoch": 2.3127333217745973, + "grad_norm": 4.134038533720741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476870 + }, + { + "epoch": 2.3127818199674337, + "grad_norm": 7.33716731815548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476880 + }, + { + "epoch": 2.3128303181602696, + "grad_norm": 4.216731852579869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476890 + }, + { + "epoch": 2.3128788163531055, + "grad_norm": 4.2869945815482424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476900 + }, + { + "epoch": 2.312927314545942, + "grad_norm": 4.154479782414455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476910 + }, + { + "epoch": 2.312975812738778, + "grad_norm": 4.181939061709272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476920 + }, + { + "epoch": 2.3130243109316138, + "grad_norm": 4.788449459169897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476930 + }, + { + "epoch": 2.31307280912445, + "grad_norm": 4.174697920689141e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476940 + }, + { + "epoch": 2.313121307317286, + "grad_norm": 4.030977507341049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476950 + }, + { + "epoch": 2.313169805510122, + "grad_norm": 4.179389634373365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476960 + }, + { + "epoch": 2.3132183037029583, + "grad_norm": 4.229793404419979e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476970 + }, + { + "epoch": 2.3132668018957943, + "grad_norm": 4.899653660572767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476980 + }, + { + "epoch": 2.3133153000886306, + "grad_norm": 4.024183297701711e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 476990 + }, + { + "epoch": 2.3133637982814665, + "grad_norm": 3.977516271902459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477000 + }, + { + "epoch": 2.3134122964743025, + "grad_norm": 3.990989583257942e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477010 + }, + { + "epoch": 2.313460794667139, + "grad_norm": 3.911379664600645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477020 + }, + { + "epoch": 2.3135092928599748, + "grad_norm": 4.9121869238888394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477030 + }, + { + "epoch": 2.313557791052811, + "grad_norm": 4.0824176039677695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477040 + }, + { + "epoch": 2.313606289245647, + "grad_norm": 4.0156255209922165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477050 + }, + { + "epoch": 2.313654787438483, + "grad_norm": 4.0380225385661106e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477060 + }, + { + "epoch": 2.3137032856313193, + "grad_norm": 3.9333901469262855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477070 + }, + { + "epoch": 2.3137517838241553, + "grad_norm": 4.7273239545120305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477080 + }, + { + "epoch": 2.313800282016991, + "grad_norm": 3.946880156036059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477090 + }, + { + "epoch": 2.3138487802098275, + "grad_norm": 5.955068331786606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477100 + }, + { + "epoch": 2.3138972784026635, + "grad_norm": 3.918541935377107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477110 + }, + { + "epoch": 2.3139457765954994, + "grad_norm": 4.015236854115756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477120 + }, + { + "epoch": 2.3139942747883357, + "grad_norm": 4.629351835205853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477130 + }, + { + "epoch": 2.3140427729811717, + "grad_norm": 3.860758823748256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477140 + }, + { + "epoch": 2.314091271174008, + "grad_norm": 3.9121179185030996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477150 + }, + { + "epoch": 2.314139769366844, + "grad_norm": 3.9646170790774704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477160 + }, + { + "epoch": 2.31418826755968, + "grad_norm": 3.921407554230427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477170 + }, + { + "epoch": 2.3142367657525162, + "grad_norm": 5.0141505170131495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477180 + }, + { + "epoch": 2.314285263945352, + "grad_norm": 3.883207000399125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477190 + }, + { + "epoch": 2.3143337621381885, + "grad_norm": 3.876489174103881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477200 + }, + { + "epoch": 2.3143822603310245, + "grad_norm": 3.804339598900697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477210 + }, + { + "epoch": 2.3144307585238604, + "grad_norm": 3.809824278278029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477220 + }, + { + "epoch": 2.3144792567166967, + "grad_norm": 4.5971454198934225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477230 + }, + { + "epoch": 2.3145277549095327, + "grad_norm": 3.830037798024932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477240 + }, + { + "epoch": 2.3145762531023686, + "grad_norm": 3.8152457193518785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477250 + }, + { + "epoch": 2.314624751295205, + "grad_norm": 3.621643784867956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477260 + }, + { + "epoch": 2.314673249488041, + "grad_norm": 3.884998633907344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477270 + }, + { + "epoch": 2.314721747680877, + "grad_norm": 4.688649468675976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477280 + }, + { + "epoch": 2.314770245873713, + "grad_norm": 3.892358435564347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477290 + }, + { + "epoch": 2.314818744066549, + "grad_norm": 3.749391552787529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477300 + }, + { + "epoch": 2.3148672422593854, + "grad_norm": 3.81057994047751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477310 + }, + { + "epoch": 2.3149157404522214, + "grad_norm": 3.778191981496093e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477320 + }, + { + "epoch": 2.3149642386450573, + "grad_norm": 4.532960673486741e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477330 + }, + { + "epoch": 2.3150127368378937, + "grad_norm": 3.823797811719487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477340 + }, + { + "epoch": 2.3150612350307296, + "grad_norm": 3.7896999316444635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477350 + }, + { + "epoch": 2.315109733223566, + "grad_norm": 3.749293142618626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477360 + }, + { + "epoch": 2.315158231416402, + "grad_norm": 3.657294911363351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477370 + }, + { + "epoch": 2.315206729609238, + "grad_norm": 4.4316294633972575e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477380 + }, + { + "epoch": 2.315255227802074, + "grad_norm": 3.843637230716013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477390 + }, + { + "epoch": 2.31530372599491, + "grad_norm": 3.7548716136370786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477400 + }, + { + "epoch": 2.3153522241877464, + "grad_norm": 3.748784394019822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477410 + }, + { + "epoch": 2.3154007223805824, + "grad_norm": 3.582061935958336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477420 + }, + { + "epoch": 2.3154492205734183, + "grad_norm": 4.407057474509202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477430 + }, + { + "epoch": 2.3154977187662547, + "grad_norm": 3.767959455558412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477440 + }, + { + "epoch": 2.3155462169590906, + "grad_norm": 3.8487378617446666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477450 + }, + { + "epoch": 2.3155947151519265, + "grad_norm": 3.4811719729077595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477460 + }, + { + "epoch": 2.315643213344763, + "grad_norm": 3.6403523751005196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477470 + }, + { + "epoch": 2.315691711537599, + "grad_norm": 4.425810828934118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477480 + }, + { + "epoch": 2.3157402097304347, + "grad_norm": 3.5027905909146284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477490 + }, + { + "epoch": 2.315788707923271, + "grad_norm": 3.7488298687549104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477500 + }, + { + "epoch": 2.315837206116107, + "grad_norm": 3.609023124795385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477510 + }, + { + "epoch": 2.3158857043089434, + "grad_norm": 3.631116030078374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477520 + }, + { + "epoch": 2.3159342025017793, + "grad_norm": 4.366657435639354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477530 + }, + { + "epoch": 2.315982700694615, + "grad_norm": 3.538503534628035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477540 + }, + { + "epoch": 2.3160311988874516, + "grad_norm": 3.553080674123521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477550 + }, + { + "epoch": 2.3160796970802875, + "grad_norm": 3.5566362299732646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477560 + }, + { + "epoch": 2.316128195273124, + "grad_norm": 3.714570695478869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477570 + }, + { + "epoch": 2.31617669346596, + "grad_norm": 4.20309866910884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477580 + }, + { + "epoch": 2.3162251916587957, + "grad_norm": 3.5024850575382516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477590 + }, + { + "epoch": 2.316273689851632, + "grad_norm": 3.568672113374305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477600 + }, + { + "epoch": 2.316322188044468, + "grad_norm": 3.407146209610801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477610 + }, + { + "epoch": 2.316370686237304, + "grad_norm": 3.6740466669016314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477620 + }, + { + "epoch": 2.3164191844301403, + "grad_norm": 4.159986843887964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477630 + }, + { + "epoch": 2.316467682622976, + "grad_norm": 3.8191728890524246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477640 + }, + { + "epoch": 2.316516180815812, + "grad_norm": 3.6192258079381645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477650 + }, + { + "epoch": 2.3165646790086485, + "grad_norm": 3.36070016260237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477660 + }, + { + "epoch": 2.3166131772014844, + "grad_norm": 3.3348428019053244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477670 + }, + { + "epoch": 2.3166616753943208, + "grad_norm": 4.477831794247322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477680 + }, + { + "epoch": 2.3167101735871567, + "grad_norm": 3.336613119131471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477690 + }, + { + "epoch": 2.3167586717799926, + "grad_norm": 3.3826818679472126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477700 + }, + { + "epoch": 2.316807169972829, + "grad_norm": 3.773535794948657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477710 + }, + { + "epoch": 2.316855668165665, + "grad_norm": 3.386335478694491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477720 + }, + { + "epoch": 2.3169041663585013, + "grad_norm": 4.134856013138233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477730 + }, + { + "epoch": 2.316952664551337, + "grad_norm": 3.6035551431723434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477740 + }, + { + "epoch": 2.317001162744173, + "grad_norm": 3.658546177121025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477750 + }, + { + "epoch": 2.3170496609370095, + "grad_norm": 3.3753334349739816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477760 + }, + { + "epoch": 2.3170981591298454, + "grad_norm": 3.326811892634396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477770 + }, + { + "epoch": 2.3171466573226813, + "grad_norm": 3.935538117616488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477780 + }, + { + "epoch": 2.3171951555155177, + "grad_norm": 3.289637717784899e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477790 + }, + { + "epoch": 2.3172436537083536, + "grad_norm": 3.428883132983174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477800 + }, + { + "epoch": 2.3172921519011895, + "grad_norm": 3.233028422755524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477810 + }, + { + "epoch": 2.317340650094026, + "grad_norm": 3.2773979086186955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477820 + }, + { + "epoch": 2.317389148286862, + "grad_norm": 3.9139489871331534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477830 + }, + { + "epoch": 2.317437646479698, + "grad_norm": 3.2681988670901774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477840 + }, + { + "epoch": 2.317486144672534, + "grad_norm": 3.197439468749508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477850 + }, + { + "epoch": 2.31753464286537, + "grad_norm": 3.27396669774771e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477860 + }, + { + "epoch": 2.3175831410582064, + "grad_norm": 3.094737266451375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477870 + }, + { + "epoch": 2.3176316392510423, + "grad_norm": 4.156879995775853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477880 + }, + { + "epoch": 2.3176801374438787, + "grad_norm": 3.539403792274243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477890 + }, + { + "epoch": 2.3177286356367146, + "grad_norm": 3.242240254053286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477900 + }, + { + "epoch": 2.3177771338295505, + "grad_norm": 3.1890522222965956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477910 + }, + { + "epoch": 2.317825632022387, + "grad_norm": 3.167971129869329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477920 + }, + { + "epoch": 2.317874130215223, + "grad_norm": 3.847025098480117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477930 + }, + { + "epoch": 2.317922628408059, + "grad_norm": 3.788686342431902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477940 + }, + { + "epoch": 2.317971126600895, + "grad_norm": 3.216356958546385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477950 + }, + { + "epoch": 2.318019624793731, + "grad_norm": 3.333342846190135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477960 + }, + { + "epoch": 2.3180681229865674, + "grad_norm": 3.1266939259921855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477970 + }, + { + "epoch": 2.3181166211794033, + "grad_norm": 3.729184427925247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477980 + }, + { + "epoch": 2.3181651193722392, + "grad_norm": 3.088858235855696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 477990 + }, + { + "epoch": 2.3182136175650756, + "grad_norm": 3.0940814355062685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478000 + }, + { + "epoch": 2.3182621157579115, + "grad_norm": 3.0982484133801336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478010 + }, + { + "epoch": 2.3183106139507474, + "grad_norm": 3.058218212004249e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478020 + }, + { + "epoch": 2.318359112143584, + "grad_norm": 3.8525765688746105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478030 + }, + { + "epoch": 2.3184076103364197, + "grad_norm": 3.101058254628697e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478040 + }, + { + "epoch": 2.318456108529256, + "grad_norm": 3.1470509753717124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478050 + }, + { + "epoch": 2.318504606722092, + "grad_norm": 2.9861407568887444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478060 + }, + { + "epoch": 2.318553104914928, + "grad_norm": 3.604240816912352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478070 + }, + { + "epoch": 2.3186016031077643, + "grad_norm": 3.6196755814899007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478080 + }, + { + "epoch": 2.3186501013006002, + "grad_norm": 3.1158268853914706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478090 + }, + { + "epoch": 2.3186985994934366, + "grad_norm": 3.027921735565542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478100 + }, + { + "epoch": 2.3187470976862725, + "grad_norm": 3.151549066160442e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478110 + }, + { + "epoch": 2.3187955958791084, + "grad_norm": 2.9781519472749096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478120 + }, + { + "epoch": 2.318844094071945, + "grad_norm": 3.7244866746277694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478130 + }, + { + "epoch": 2.3188925922647807, + "grad_norm": 3.1988857784881475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478140 + }, + { + "epoch": 2.3189410904576166, + "grad_norm": 2.9371522103360803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478150 + }, + { + "epoch": 2.318989588650453, + "grad_norm": 3.083179933582869e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478160 + }, + { + "epoch": 2.319038086843289, + "grad_norm": 3.129135350832257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478170 + }, + { + "epoch": 2.319086585036125, + "grad_norm": 3.5205140136440605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478180 + }, + { + "epoch": 2.3191350832289612, + "grad_norm": 2.9413197211169972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478190 + }, + { + "epoch": 2.319183581421797, + "grad_norm": 2.9261492784371512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478200 + }, + { + "epoch": 2.3192320796146335, + "grad_norm": 3.151784966348714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478210 + }, + { + "epoch": 2.3192805778074694, + "grad_norm": 2.9015115643460376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478220 + }, + { + "epoch": 2.3193290760003054, + "grad_norm": 3.70028061524863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478230 + }, + { + "epoch": 2.3193775741931417, + "grad_norm": 2.955789213388016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478240 + }, + { + "epoch": 2.3194260723859776, + "grad_norm": 2.8404558705119598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478250 + }, + { + "epoch": 2.319474570578814, + "grad_norm": 2.995291126239863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478260 + }, + { + "epoch": 2.31952306877165, + "grad_norm": 2.9226569608908903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478270 + }, + { + "epoch": 2.319571566964486, + "grad_norm": 3.7940626640420305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478280 + }, + { + "epoch": 2.319620065157322, + "grad_norm": 2.9904235532285384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478290 + }, + { + "epoch": 2.319668563350158, + "grad_norm": 2.9723883798737916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478300 + }, + { + "epoch": 2.319717061542994, + "grad_norm": 2.8950637442903826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478310 + }, + { + "epoch": 2.3197655597358304, + "grad_norm": 2.9773829623991332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478320 + }, + { + "epoch": 2.3198140579286664, + "grad_norm": 3.692307259939298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478330 + }, + { + "epoch": 2.3198625561215023, + "grad_norm": 2.96338349414782e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478340 + }, + { + "epoch": 2.3199110543143386, + "grad_norm": 2.8308482669103796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478350 + }, + { + "epoch": 2.3199595525071746, + "grad_norm": 2.9687724278915084e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478360 + }, + { + "epoch": 2.320008050700011, + "grad_norm": 2.8548985397947035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478370 + }, + { + "epoch": 2.320056548892847, + "grad_norm": 3.39148051864413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478380 + }, + { + "epoch": 2.3201050470856828, + "grad_norm": 2.8862706002996674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478390 + }, + { + "epoch": 2.320153545278519, + "grad_norm": 2.817480471151157e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478400 + }, + { + "epoch": 2.320202043471355, + "grad_norm": 3.049175845148966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478410 + }, + { + "epoch": 2.3202505416641914, + "grad_norm": 2.8663432516395915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478420 + }, + { + "epoch": 2.3202990398570273, + "grad_norm": 3.872704468221855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478430 + }, + { + "epoch": 2.3203475380498633, + "grad_norm": 2.7499996946289684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478440 + }, + { + "epoch": 2.3203960362426996, + "grad_norm": 2.770970475296508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478450 + }, + { + "epoch": 2.3204445344355356, + "grad_norm": 2.8819670205848524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478460 + }, + { + "epoch": 2.320493032628372, + "grad_norm": 2.8417415975923177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478470 + }, + { + "epoch": 2.320541530821208, + "grad_norm": 3.448948504569671e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478480 + }, + { + "epoch": 2.3205900290140438, + "grad_norm": 2.755471584237057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478490 + }, + { + "epoch": 2.32063852720688, + "grad_norm": 3.017760263901437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478500 + }, + { + "epoch": 2.320687025399716, + "grad_norm": 2.8058545709086502e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478510 + }, + { + "epoch": 2.320735523592552, + "grad_norm": 2.671719379065962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478520 + }, + { + "epoch": 2.3207840217853883, + "grad_norm": 3.469480347462195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478530 + }, + { + "epoch": 2.3208325199782243, + "grad_norm": 2.6691733268080498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478540 + }, + { + "epoch": 2.32088101817106, + "grad_norm": 2.7382046852153508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478550 + }, + { + "epoch": 2.3209295163638965, + "grad_norm": 2.8881192548624313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478560 + }, + { + "epoch": 2.3209780145567325, + "grad_norm": 2.6726038271362995e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478570 + }, + { + "epoch": 2.321026512749569, + "grad_norm": 3.432759498878113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478580 + }, + { + "epoch": 2.3210750109424048, + "grad_norm": 2.700088330698236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478590 + }, + { + "epoch": 2.3211235091352407, + "grad_norm": 2.7225794951846183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478600 + }, + { + "epoch": 2.321172007328077, + "grad_norm": 2.693906608897123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478610 + }, + { + "epoch": 2.321220505520913, + "grad_norm": 2.7183517659068457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478620 + }, + { + "epoch": 2.3212690037137493, + "grad_norm": 3.190717734469217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478630 + }, + { + "epoch": 2.3213175019065853, + "grad_norm": 2.7940380675772758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478640 + }, + { + "epoch": 2.321366000099421, + "grad_norm": 2.7904690114155528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478650 + }, + { + "epoch": 2.3214144982922575, + "grad_norm": 2.7222354148648265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478660 + }, + { + "epoch": 2.3214629964850935, + "grad_norm": 2.911737340127729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478670 + }, + { + "epoch": 2.3215114946779294, + "grad_norm": 3.3038975999488684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478680 + }, + { + "epoch": 2.3215599928707658, + "grad_norm": 2.673561816379788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478690 + }, + { + "epoch": 2.3216084910636017, + "grad_norm": 2.5256534641471262e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478700 + }, + { + "epoch": 2.3216569892564376, + "grad_norm": 2.6975623512726088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478710 + }, + { + "epoch": 2.321705487449274, + "grad_norm": 2.736651971702031e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478720 + }, + { + "epoch": 2.32175398564211, + "grad_norm": 3.102313073100049e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478730 + }, + { + "epoch": 2.3218024838349463, + "grad_norm": 2.5817049831289296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478740 + }, + { + "epoch": 2.321850982027782, + "grad_norm": 2.6135003494687226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478750 + }, + { + "epoch": 2.321899480220618, + "grad_norm": 2.5421227789479417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478760 + }, + { + "epoch": 2.3219479784134545, + "grad_norm": 2.7463357810120215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478770 + }, + { + "epoch": 2.3219964766062904, + "grad_norm": 3.592893449422263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478780 + }, + { + "epoch": 2.3220449747991267, + "grad_norm": 2.5766059508214312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478790 + }, + { + "epoch": 2.3220934729919627, + "grad_norm": 2.519804631617717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478800 + }, + { + "epoch": 2.3221419711847986, + "grad_norm": 2.7018778325782478e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478810 + }, + { + "epoch": 2.322190469377635, + "grad_norm": 2.749470695562195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478820 + }, + { + "epoch": 2.322238967570471, + "grad_norm": 3.0076225954189795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478830 + }, + { + "epoch": 2.322287465763307, + "grad_norm": 2.4726622527282416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478840 + }, + { + "epoch": 2.322335963956143, + "grad_norm": 2.74443898717891e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478850 + }, + { + "epoch": 2.322384462148979, + "grad_norm": 2.8721224509808962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478860 + }, + { + "epoch": 2.3224329603418155, + "grad_norm": 2.5549269366820226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478870 + }, + { + "epoch": 2.3224814585346514, + "grad_norm": 2.992205239138457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478880 + }, + { + "epoch": 2.3225299567274873, + "grad_norm": 2.453323055817691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478890 + }, + { + "epoch": 2.3225784549203237, + "grad_norm": 2.5033372708094248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478900 + }, + { + "epoch": 2.3226269531131596, + "grad_norm": 2.464969028892483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478910 + }, + { + "epoch": 2.3226754513059955, + "grad_norm": 2.381964137043724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478920 + }, + { + "epoch": 2.322723949498832, + "grad_norm": 2.9881991991942414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478930 + }, + { + "epoch": 2.322772447691668, + "grad_norm": 2.4769104101096673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478940 + }, + { + "epoch": 2.322820945884504, + "grad_norm": 2.8858558209776675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478950 + }, + { + "epoch": 2.32286944407734, + "grad_norm": 2.3972983598241626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478960 + }, + { + "epoch": 2.322917942270176, + "grad_norm": 2.388515341067432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478970 + }, + { + "epoch": 2.3229664404630124, + "grad_norm": 3.103252055325356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478980 + }, + { + "epoch": 2.3230149386558483, + "grad_norm": 2.551342070944429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 478990 + }, + { + "epoch": 2.3230634368486847, + "grad_norm": 2.3607459098684558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479000 + }, + { + "epoch": 2.3231119350415206, + "grad_norm": 2.454786063310621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479010 + }, + { + "epoch": 2.3231604332343565, + "grad_norm": 2.3621858247224736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479020 + }, + { + "epoch": 2.323208931427193, + "grad_norm": 2.8990598366362974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479030 + }, + { + "epoch": 2.323257429620029, + "grad_norm": 2.3961991502119417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479040 + }, + { + "epoch": 2.3233059278128647, + "grad_norm": 2.532807386046443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479050 + }, + { + "epoch": 2.323354426005701, + "grad_norm": 2.3993507625164057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479060 + }, + { + "epoch": 2.323402924198537, + "grad_norm": 2.8015682218551774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479070 + }, + { + "epoch": 2.323451422391373, + "grad_norm": 2.8002855145814465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479080 + }, + { + "epoch": 2.3234999205842093, + "grad_norm": 2.5673644543644514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479090 + }, + { + "epoch": 2.323548418777045, + "grad_norm": 2.6069200131928483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479100 + }, + { + "epoch": 2.3235969169698816, + "grad_norm": 2.6255907670247325e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479110 + }, + { + "epoch": 2.3236454151627175, + "grad_norm": 2.4054520153526937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479120 + }, + { + "epoch": 2.3236939133555534, + "grad_norm": 3.16445678549826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479130 + }, + { + "epoch": 2.32374241154839, + "grad_norm": 2.4175196955411593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479140 + }, + { + "epoch": 2.3237909097412257, + "grad_norm": 2.414173572162781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479150 + }, + { + "epoch": 2.323839407934062, + "grad_norm": 2.344144078847421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479160 + }, + { + "epoch": 2.323887906126898, + "grad_norm": 2.284382993877898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479170 + }, + { + "epoch": 2.323936404319734, + "grad_norm": 3.078592669680802e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479180 + }, + { + "epoch": 2.3239849025125703, + "grad_norm": 2.466775939069521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479190 + }, + { + "epoch": 2.324033400705406, + "grad_norm": 2.4569269285734663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479200 + }, + { + "epoch": 2.324081898898242, + "grad_norm": 2.339968041553675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479210 + }, + { + "epoch": 2.3241303970910785, + "grad_norm": 2.6122556562313548e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479220 + }, + { + "epoch": 2.3241788952839144, + "grad_norm": 2.9142780633151233e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479230 + }, + { + "epoch": 2.3242273934767503, + "grad_norm": 2.3116108138765412e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479240 + }, + { + "epoch": 2.3242758916695867, + "grad_norm": 2.3081392797053013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479250 + }, + { + "epoch": 2.3243243898624226, + "grad_norm": 2.2889345530074934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479260 + }, + { + "epoch": 2.324372888055259, + "grad_norm": 2.194412118683431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479270 + }, + { + "epoch": 2.324421386248095, + "grad_norm": 3.1606038675136006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479280 + }, + { + "epoch": 2.324469884440931, + "grad_norm": 2.2415783007545542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479290 + }, + { + "epoch": 2.324518382633767, + "grad_norm": 2.3852077646324688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479300 + }, + { + "epoch": 2.324566880826603, + "grad_norm": 2.2492633533488515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479310 + }, + { + "epoch": 2.3246153790194395, + "grad_norm": 2.415658073573468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479320 + }, + { + "epoch": 2.3246638772122754, + "grad_norm": 2.8742633162437414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479330 + }, + { + "epoch": 2.3247123754051113, + "grad_norm": 2.347923278023245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479340 + }, + { + "epoch": 2.3247608735979477, + "grad_norm": 2.1485915269181532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479350 + }, + { + "epoch": 2.3248093717907836, + "grad_norm": 2.324601666714443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479360 + }, + { + "epoch": 2.32485786998362, + "grad_norm": 2.2704222502056837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479370 + }, + { + "epoch": 2.324906368176456, + "grad_norm": 2.7253660661585855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479380 + }, + { + "epoch": 2.324954866369292, + "grad_norm": 2.2779103048264915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479390 + }, + { + "epoch": 2.325003364562128, + "grad_norm": 2.1980348208217038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479400 + }, + { + "epoch": 2.325051862754964, + "grad_norm": 2.2596188031798192e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479410 + }, + { + "epoch": 2.3251003609478, + "grad_norm": 2.4317559521591647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479420 + }, + { + "epoch": 2.3251488591406364, + "grad_norm": 2.5905723788355317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479430 + }, + { + "epoch": 2.3251973573334723, + "grad_norm": 2.3421677042279043e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479440 + }, + { + "epoch": 2.3252458555263082, + "grad_norm": 2.1188597543186916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479450 + }, + { + "epoch": 2.3252943537191446, + "grad_norm": 2.227732487369849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479460 + }, + { + "epoch": 2.3253428519119805, + "grad_norm": 2.762639184084037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479470 + }, + { + "epoch": 2.325391350104817, + "grad_norm": 2.621223949006435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479480 + }, + { + "epoch": 2.325439848297653, + "grad_norm": 2.1190384558167352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479490 + }, + { + "epoch": 2.3254883464904887, + "grad_norm": 2.352478745137887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479500 + }, + { + "epoch": 2.325536844683325, + "grad_norm": 2.1936847005576965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479510 + }, + { + "epoch": 2.325585342876161, + "grad_norm": 2.2756889705988215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479520 + }, + { + "epoch": 2.3256338410689974, + "grad_norm": 2.6526830509965293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479530 + }, + { + "epoch": 2.3256823392618333, + "grad_norm": 2.310207669609099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479540 + }, + { + "epoch": 2.3257308374546692, + "grad_norm": 2.107354646341264e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479550 + }, + { + "epoch": 2.3257793356475056, + "grad_norm": 2.2461156490294343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479560 + }, + { + "epoch": 2.3258278338403415, + "grad_norm": 2.389107933709056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479570 + }, + { + "epoch": 2.3258763320331775, + "grad_norm": 2.6551260745577565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479580 + }, + { + "epoch": 2.325924830226014, + "grad_norm": 2.200063420332299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479590 + }, + { + "epoch": 2.3259733284188497, + "grad_norm": 2.0157321145575224e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479600 + }, + { + "epoch": 2.3260218266116857, + "grad_norm": 2.136964027954491e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479610 + }, + { + "epoch": 2.326070324804522, + "grad_norm": 2.2389292197999566e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479620 + }, + { + "epoch": 2.326118822997358, + "grad_norm": 2.485618821879143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479630 + }, + { + "epoch": 2.3261673211901943, + "grad_norm": 2.133348075972208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479640 + }, + { + "epoch": 2.3262158193830302, + "grad_norm": 2.025432976893171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479650 + }, + { + "epoch": 2.326264317575866, + "grad_norm": 2.1990153697970527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479660 + }, + { + "epoch": 2.3263128157687025, + "grad_norm": 2.0002717704414863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479670 + }, + { + "epoch": 2.3263613139615384, + "grad_norm": 2.6315207790617023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479680 + }, + { + "epoch": 2.326409812154375, + "grad_norm": 1.990981779442791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479690 + }, + { + "epoch": 2.3264583103472107, + "grad_norm": 2.536651599882589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479700 + }, + { + "epoch": 2.3265068085400467, + "grad_norm": 2.1735658606303332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479710 + }, + { + "epoch": 2.326555306732883, + "grad_norm": 2.1680291339976066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479720 + }, + { + "epoch": 2.326603804925719, + "grad_norm": 2.4141867172033926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479730 + }, + { + "epoch": 2.326652303118555, + "grad_norm": 2.0695955171845526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479740 + }, + { + "epoch": 2.3267008013113912, + "grad_norm": 1.9723172428598446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479750 + }, + { + "epoch": 2.326749299504227, + "grad_norm": 2.0456029758975092e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479760 + }, + { + "epoch": 2.326797797697063, + "grad_norm": 2.1210194489640344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479770 + }, + { + "epoch": 2.3268462958898994, + "grad_norm": 2.6051164780938052e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479780 + }, + { + "epoch": 2.3268947940827354, + "grad_norm": 2.1166970398667218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479790 + }, + { + "epoch": 2.3269432922755717, + "grad_norm": 2.1791237259094487e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479800 + }, + { + "epoch": 2.3269917904684077, + "grad_norm": 1.978535557611849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479810 + }, + { + "epoch": 2.3270402886612436, + "grad_norm": 2.2559632384400174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479820 + }, + { + "epoch": 2.32708878685408, + "grad_norm": 2.6727109414537153e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479830 + }, + { + "epoch": 2.327137285046916, + "grad_norm": 2.5868056141575835e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479840 + }, + { + "epoch": 2.3271857832397522, + "grad_norm": 2.3350620992346194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479850 + }, + { + "epoch": 2.327234281432588, + "grad_norm": 2.1584142473329848e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479860 + }, + { + "epoch": 2.327282779625424, + "grad_norm": 2.2498417351357602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479870 + }, + { + "epoch": 2.3273312778182604, + "grad_norm": 2.5835419137365534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479880 + }, + { + "epoch": 2.3273797760110964, + "grad_norm": 2.1235820213405532e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479890 + }, + { + "epoch": 2.3274282742039327, + "grad_norm": 2.1304371600194827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479900 + }, + { + "epoch": 2.3274767723967686, + "grad_norm": 2.0435070524627008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479910 + }, + { + "epoch": 2.3275252705896046, + "grad_norm": 2.1036694164422443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479920 + }, + { + "epoch": 2.327573768782441, + "grad_norm": 2.4279001920035626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479930 + }, + { + "epoch": 2.327622266975277, + "grad_norm": 2.09790602667681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479940 + }, + { + "epoch": 2.3276707651681128, + "grad_norm": 1.9144660967640448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479950 + }, + { + "epoch": 2.327719263360949, + "grad_norm": 2.0006764245295017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479960 + }, + { + "epoch": 2.327767761553785, + "grad_norm": 2.006118826614056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479970 + }, + { + "epoch": 2.327816259746621, + "grad_norm": 2.389110775879999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479980 + }, + { + "epoch": 2.3278647579394574, + "grad_norm": 1.9031878295550086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 479990 + }, + { + "epoch": 2.3279132561322933, + "grad_norm": 2.6514351603168507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480000 + }, + { + "epoch": 2.3279617543251296, + "grad_norm": 1.8151679270772547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480010 + }, + { + "epoch": 2.3280102525179656, + "grad_norm": 1.769628887871022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480020 + }, + { + "epoch": 2.3280587507108015, + "grad_norm": 2.2836676549786716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480030 + }, + { + "epoch": 2.328107248903638, + "grad_norm": 2.430238765782633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480040 + }, + { + "epoch": 2.3281557470964738, + "grad_norm": 2.0455582117051563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480050 + }, + { + "epoch": 2.32820424528931, + "grad_norm": 2.1581490372568624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480060 + }, + { + "epoch": 2.328252743482146, + "grad_norm": 2.023043244037126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480070 + }, + { + "epoch": 2.328301241674982, + "grad_norm": 2.7658860091150927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480080 + }, + { + "epoch": 2.3283497398678183, + "grad_norm": 2.0757033425411464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480090 + }, + { + "epoch": 2.3283982380606543, + "grad_norm": 2.0263531297359805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480100 + }, + { + "epoch": 2.32844673625349, + "grad_norm": 2.0631150121630526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480110 + }, + { + "epoch": 2.3284952344463266, + "grad_norm": 2.118503417136708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480120 + }, + { + "epoch": 2.3285437326391625, + "grad_norm": 2.3217396005748014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480130 + }, + { + "epoch": 2.3285922308319984, + "grad_norm": 1.9483451296764542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480140 + }, + { + "epoch": 2.3286407290248348, + "grad_norm": 6.222909831876677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480150 + }, + { + "epoch": 2.3286892272176707, + "grad_norm": 1.8253640376997282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480160 + }, + { + "epoch": 2.328737725410507, + "grad_norm": 1.9602294898390937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480170 + }, + { + "epoch": 2.328786223603343, + "grad_norm": 2.1379074510718965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480180 + }, + { + "epoch": 2.328834721796179, + "grad_norm": 1.8909080523599187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480190 + }, + { + "epoch": 2.3288832199890153, + "grad_norm": 2.3296596651789514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480200 + }, + { + "epoch": 2.328931718181851, + "grad_norm": 2.2032844881891833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480210 + }, + { + "epoch": 2.3289802163746876, + "grad_norm": 1.73980918560801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480220 + }, + { + "epoch": 2.3290287145675235, + "grad_norm": 2.350429539887955e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480230 + }, + { + "epoch": 2.3290772127603594, + "grad_norm": 2.0050753946065925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480240 + }, + { + "epoch": 2.3291257109531958, + "grad_norm": 1.936362359344912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480250 + }, + { + "epoch": 2.3291742091460317, + "grad_norm": 1.844575336917842e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480260 + }, + { + "epoch": 2.3292227073388676, + "grad_norm": 1.9660051364667197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480270 + }, + { + "epoch": 2.329271205531704, + "grad_norm": 2.4361131778505296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480280 + }, + { + "epoch": 2.32931970372454, + "grad_norm": 2.0684693069483728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480290 + }, + { + "epoch": 2.329368201917376, + "grad_norm": 1.8822797542838998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480300 + }, + { + "epoch": 2.329416700110212, + "grad_norm": 1.789097403559481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480310 + }, + { + "epoch": 2.329465198303048, + "grad_norm": 2.2259472487462517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480320 + }, + { + "epoch": 2.3295136964958845, + "grad_norm": 2.3704169294092026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480330 + }, + { + "epoch": 2.3295621946887204, + "grad_norm": 1.817950767701859e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480340 + }, + { + "epoch": 2.3296106928815563, + "grad_norm": 1.7266838625573655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480350 + }, + { + "epoch": 2.3296591910743927, + "grad_norm": 1.843425856407066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480360 + }, + { + "epoch": 2.3297076892672286, + "grad_norm": 1.7335946012053682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480370 + }, + { + "epoch": 2.329756187460065, + "grad_norm": 2.3945300853256413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480380 + }, + { + "epoch": 2.329804685652901, + "grad_norm": 1.886763101310862e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480390 + }, + { + "epoch": 2.329853183845737, + "grad_norm": 1.9101031867307938e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480400 + }, + { + "epoch": 2.329901682038573, + "grad_norm": 1.8462376516481527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480410 + }, + { + "epoch": 2.329950180231409, + "grad_norm": 0.03499173745512962, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 480420 + }, + { + "epoch": 2.3299986784242455, + "grad_norm": 1.1382527191017289e-05, + "learning_rate": 0.0002, + "loss": 0.0023, + "step": 480430 + }, + { + "epoch": 2.3300471766170814, + "grad_norm": 0.0013562041567638516, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 480440 + }, + { + "epoch": 2.3300956748099173, + "grad_norm": 0.9533877372741699, + "learning_rate": 0.0002, + "loss": 0.0022, + "step": 480450 + }, + { + "epoch": 2.3301441730027537, + "grad_norm": 0.008801767602562904, + "learning_rate": 0.0002, + "loss": 0.0541, + "step": 480460 + }, + { + "epoch": 2.3301926711955896, + "grad_norm": 0.002505277516320348, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 480470 + }, + { + "epoch": 2.3302411693884255, + "grad_norm": 0.0005820015794597566, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 480480 + }, + { + "epoch": 2.330289667581262, + "grad_norm": 0.00031825521728023887, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480490 + }, + { + "epoch": 2.330338165774098, + "grad_norm": 0.00019020769104827195, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480500 + }, + { + "epoch": 2.3303866639669337, + "grad_norm": 0.0001393839920638129, + "learning_rate": 0.0002, + "loss": 0.0016, + "step": 480510 + }, + { + "epoch": 2.33043516215977, + "grad_norm": 0.00034451077226549387, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 480520 + }, + { + "epoch": 2.330483660352606, + "grad_norm": 7.989761070348322e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480530 + }, + { + "epoch": 2.3305321585454424, + "grad_norm": 0.0017550818156450987, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 480540 + }, + { + "epoch": 2.3305806567382783, + "grad_norm": 7.772276148898527e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 480550 + }, + { + "epoch": 2.330629154931114, + "grad_norm": 6.739457603543997e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480560 + }, + { + "epoch": 2.3306776531239506, + "grad_norm": 6.53644310659729e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480570 + }, + { + "epoch": 2.3307261513167865, + "grad_norm": 5.5332457122858614e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480580 + }, + { + "epoch": 2.330774649509623, + "grad_norm": 5.219239028519951e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480590 + }, + { + "epoch": 2.330823147702459, + "grad_norm": 4.906405592919327e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480600 + }, + { + "epoch": 2.3308716458952947, + "grad_norm": 4.501221337704919e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480610 + }, + { + "epoch": 2.330920144088131, + "grad_norm": 4.194086432107724e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480620 + }, + { + "epoch": 2.330968642280967, + "grad_norm": 7.024603110039607e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480630 + }, + { + "epoch": 2.331017140473803, + "grad_norm": 3.736713188118301e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480640 + }, + { + "epoch": 2.3310656386666393, + "grad_norm": 3.61482598236762e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480650 + }, + { + "epoch": 2.331114136859475, + "grad_norm": 3.361433118698187e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480660 + }, + { + "epoch": 2.331162635052311, + "grad_norm": 3.292081237304956e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480670 + }, + { + "epoch": 2.3312111332451475, + "grad_norm": 2.323978151252959e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480680 + }, + { + "epoch": 2.3312596314379834, + "grad_norm": 3.0544928449671715e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480690 + }, + { + "epoch": 2.33130812963082, + "grad_norm": 2.792921986838337e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480700 + }, + { + "epoch": 2.3313566278236557, + "grad_norm": 3.107726661255583e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480710 + }, + { + "epoch": 2.3314051260164916, + "grad_norm": 2.5997664124588482e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480720 + }, + { + "epoch": 2.331453624209328, + "grad_norm": 1.933287967403885e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480730 + }, + { + "epoch": 2.331502122402164, + "grad_norm": 2.5492607164778747e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480740 + }, + { + "epoch": 2.3315506205950003, + "grad_norm": 2.4215272787841968e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480750 + }, + { + "epoch": 2.331599118787836, + "grad_norm": 2.2541062207892537e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480760 + }, + { + "epoch": 2.331647616980672, + "grad_norm": 2.2779981009080075e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480770 + }, + { + "epoch": 2.3316961151735085, + "grad_norm": 1.607059130037669e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480780 + }, + { + "epoch": 2.3317446133663444, + "grad_norm": 2.1766209101770073e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480790 + }, + { + "epoch": 2.3317931115591803, + "grad_norm": 2.0955758373020217e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480800 + }, + { + "epoch": 2.3318416097520167, + "grad_norm": 1.9447998056421056e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480810 + }, + { + "epoch": 2.3318901079448526, + "grad_norm": 1.916522887768224e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480820 + }, + { + "epoch": 2.3319386061376886, + "grad_norm": 1.3952609151601791e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480830 + }, + { + "epoch": 2.331987104330525, + "grad_norm": 1.7966343875741586e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480840 + }, + { + "epoch": 2.332035602523361, + "grad_norm": 1.7575197489350103e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480850 + }, + { + "epoch": 2.332084100716197, + "grad_norm": 1.7145923266070895e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480860 + }, + { + "epoch": 2.332132598909033, + "grad_norm": 1.714386962703429e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480870 + }, + { + "epoch": 2.332181097101869, + "grad_norm": 1.2385518857627176e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480880 + }, + { + "epoch": 2.3322295952947054, + "grad_norm": 1.694160710030701e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480890 + }, + { + "epoch": 2.3322780934875413, + "grad_norm": 1.6178715668502264e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480900 + }, + { + "epoch": 2.3323265916803777, + "grad_norm": 1.5094672562554479e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480910 + }, + { + "epoch": 2.3323750898732136, + "grad_norm": 1.4368681149790064e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480920 + }, + { + "epoch": 2.3324235880660495, + "grad_norm": 1.091830745281186e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480930 + }, + { + "epoch": 2.332472086258886, + "grad_norm": 1.4482539882010315e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480940 + }, + { + "epoch": 2.332520584451722, + "grad_norm": 1.4555410416505765e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480950 + }, + { + "epoch": 2.332569082644558, + "grad_norm": 1.4026171811565291e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480960 + }, + { + "epoch": 2.332617580837394, + "grad_norm": 1.378436263621552e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480970 + }, + { + "epoch": 2.33266607903023, + "grad_norm": 9.92295827018097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480980 + }, + { + "epoch": 2.3327145772230664, + "grad_norm": 1.3128506907378323e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 480990 + }, + { + "epoch": 2.3327630754159023, + "grad_norm": 1.2864708878623787e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481000 + }, + { + "epoch": 2.3328115736087383, + "grad_norm": 1.2295184205868281e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481010 + }, + { + "epoch": 2.3328600718015746, + "grad_norm": 1.211502603837289e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481020 + }, + { + "epoch": 2.3329085699944105, + "grad_norm": 8.961175808508415e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481030 + }, + { + "epoch": 2.3329570681872465, + "grad_norm": 1.1609656212385744e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481040 + }, + { + "epoch": 2.333005566380083, + "grad_norm": 1.1403739335946739e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481050 + }, + { + "epoch": 2.3330540645729188, + "grad_norm": 1.2551122381410096e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481060 + }, + { + "epoch": 2.333102562765755, + "grad_norm": 1.1720006114046555e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481070 + }, + { + "epoch": 2.333151060958591, + "grad_norm": 8.266766599263065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481080 + }, + { + "epoch": 2.333199559151427, + "grad_norm": 1.0351484888815321e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481090 + }, + { + "epoch": 2.3332480573442633, + "grad_norm": 1.0352498975407798e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481100 + }, + { + "epoch": 2.3332965555370992, + "grad_norm": 1.0837412446562666e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481110 + }, + { + "epoch": 2.3333450537299356, + "grad_norm": 9.919501280819532e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481120 + }, + { + "epoch": 2.3333935519227715, + "grad_norm": 7.587369509565178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481130 + }, + { + "epoch": 2.3334420501156075, + "grad_norm": 9.511374628345948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481140 + }, + { + "epoch": 2.333490548308444, + "grad_norm": 9.942937140294816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481150 + }, + { + "epoch": 2.3335390465012797, + "grad_norm": 9.466075425734743e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481160 + }, + { + "epoch": 2.3335875446941157, + "grad_norm": 9.428098564967513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481170 + }, + { + "epoch": 2.333636042886952, + "grad_norm": 6.726861101924442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481180 + }, + { + "epoch": 2.333684541079788, + "grad_norm": 9.098622285819147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481190 + }, + { + "epoch": 2.333733039272624, + "grad_norm": 8.501620868628379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481200 + }, + { + "epoch": 2.3337815374654602, + "grad_norm": 8.49438310979167e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481210 + }, + { + "epoch": 2.333830035658296, + "grad_norm": 8.525271368853282e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481220 + }, + { + "epoch": 2.3338785338511325, + "grad_norm": 6.540776666952297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481230 + }, + { + "epoch": 2.3339270320439685, + "grad_norm": 8.379972314287443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481240 + }, + { + "epoch": 2.3339755302368044, + "grad_norm": 8.058795174292754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481250 + }, + { + "epoch": 2.3340240284296407, + "grad_norm": 8.029630407691002e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481260 + }, + { + "epoch": 2.3340725266224767, + "grad_norm": 7.689771337027196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481270 + }, + { + "epoch": 2.334121024815313, + "grad_norm": 6.005066097714007e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481280 + }, + { + "epoch": 2.334169523008149, + "grad_norm": 7.595481747557642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481290 + }, + { + "epoch": 2.334218021200985, + "grad_norm": 7.423378519888502e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481300 + }, + { + "epoch": 2.3342665193938212, + "grad_norm": 7.353010005317628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481310 + }, + { + "epoch": 2.334315017586657, + "grad_norm": 7.163871941884281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481320 + }, + { + "epoch": 2.334363515779493, + "grad_norm": 5.787167538073845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481330 + }, + { + "epoch": 2.3344120139723294, + "grad_norm": 6.789545750507386e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481340 + }, + { + "epoch": 2.3344605121651654, + "grad_norm": 7.146427833504276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481350 + }, + { + "epoch": 2.3345090103580013, + "grad_norm": 6.828270670666825e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481360 + }, + { + "epoch": 2.3345575085508377, + "grad_norm": 7.3757114478212316e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481370 + }, + { + "epoch": 2.3346060067436736, + "grad_norm": 5.395221251092153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481380 + }, + { + "epoch": 2.33465450493651, + "grad_norm": 7.401733910228359e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481390 + }, + { + "epoch": 2.334703003129346, + "grad_norm": 9.999033864005469e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481400 + }, + { + "epoch": 2.334751501322182, + "grad_norm": 6.789228336856468e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481410 + }, + { + "epoch": 2.334799999515018, + "grad_norm": 6.409964498743648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481420 + }, + { + "epoch": 2.334848497707854, + "grad_norm": 4.941035513184033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481430 + }, + { + "epoch": 2.3348969959006904, + "grad_norm": 5.960494945611572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481440 + }, + { + "epoch": 2.3349454940935264, + "grad_norm": 6.124309038568754e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481450 + }, + { + "epoch": 2.3349939922863623, + "grad_norm": 6.24450603936566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481460 + }, + { + "epoch": 2.3350424904791987, + "grad_norm": 6.172903795231832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481470 + }, + { + "epoch": 2.3350909886720346, + "grad_norm": 4.8095389502123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481480 + }, + { + "epoch": 2.335139486864871, + "grad_norm": 6.182845936564263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481490 + }, + { + "epoch": 2.335187985057707, + "grad_norm": 5.719145974580897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481500 + }, + { + "epoch": 2.335236483250543, + "grad_norm": 5.8328828345111106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481510 + }, + { + "epoch": 2.335284981443379, + "grad_norm": 5.816916655021487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481520 + }, + { + "epoch": 2.335333479636215, + "grad_norm": 4.346498371887719e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481530 + }, + { + "epoch": 2.335381977829051, + "grad_norm": 5.422197318694089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481540 + }, + { + "epoch": 2.3354304760218874, + "grad_norm": 5.330769909051014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481550 + }, + { + "epoch": 2.3354789742147233, + "grad_norm": 7.696205102547538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481560 + }, + { + "epoch": 2.335527472407559, + "grad_norm": 5.380236871133093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481570 + }, + { + "epoch": 2.3355759706003956, + "grad_norm": 4.309870291763218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481580 + }, + { + "epoch": 2.3356244687932315, + "grad_norm": 5.4134061429067515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481590 + }, + { + "epoch": 2.335672966986068, + "grad_norm": 5.156650786375394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481600 + }, + { + "epoch": 2.3357214651789038, + "grad_norm": 5.18998785992153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481610 + }, + { + "epoch": 2.3357699633717397, + "grad_norm": 5.259106274024816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481620 + }, + { + "epoch": 2.335818461564576, + "grad_norm": 3.847072548524011e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481630 + }, + { + "epoch": 2.335866959757412, + "grad_norm": 4.98502276968793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481640 + }, + { + "epoch": 2.3359154579502484, + "grad_norm": 4.877762421529042e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481650 + }, + { + "epoch": 2.3359639561430843, + "grad_norm": 4.764364803122589e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481660 + }, + { + "epoch": 2.33601245433592, + "grad_norm": 4.7256571633624844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481670 + }, + { + "epoch": 2.3360609525287566, + "grad_norm": 3.8800822039775085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481680 + }, + { + "epoch": 2.3361094507215925, + "grad_norm": 4.7861253733572084e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481690 + }, + { + "epoch": 2.3361579489144284, + "grad_norm": 4.86600401927717e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481700 + }, + { + "epoch": 2.3362064471072648, + "grad_norm": 4.537477252597455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481710 + }, + { + "epoch": 2.3362549453001007, + "grad_norm": 5.2504219638649374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481720 + }, + { + "epoch": 2.3363034434929366, + "grad_norm": 3.6387948512128787e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481730 + }, + { + "epoch": 2.336351941685773, + "grad_norm": 4.4840958253189456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481740 + }, + { + "epoch": 2.336400439878609, + "grad_norm": 4.326320777181536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481750 + }, + { + "epoch": 2.3364489380714453, + "grad_norm": 4.381388407637132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481760 + }, + { + "epoch": 2.336497436264281, + "grad_norm": 4.308908955863444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481770 + }, + { + "epoch": 2.336545934457117, + "grad_norm": 3.4198433240817394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481780 + }, + { + "epoch": 2.3365944326499535, + "grad_norm": 4.288830041332403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481790 + }, + { + "epoch": 2.3366429308427894, + "grad_norm": 4.173899469606113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481800 + }, + { + "epoch": 2.3366914290356258, + "grad_norm": 4.3745667426264845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481810 + }, + { + "epoch": 2.3367399272284617, + "grad_norm": 4.001974048151169e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481820 + }, + { + "epoch": 2.3367884254212976, + "grad_norm": 3.25643827636668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481830 + }, + { + "epoch": 2.336836923614134, + "grad_norm": 3.889615982188843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481840 + }, + { + "epoch": 2.33688542180697, + "grad_norm": 4.0327845454157796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481850 + }, + { + "epoch": 2.336933919999806, + "grad_norm": 3.977324467996368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481860 + }, + { + "epoch": 2.336982418192642, + "grad_norm": 4.044451543450123e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481870 + }, + { + "epoch": 2.337030916385478, + "grad_norm": 3.118279892078135e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481880 + }, + { + "epoch": 2.337079414578314, + "grad_norm": 3.852502231893595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481890 + }, + { + "epoch": 2.3371279127711504, + "grad_norm": 3.7797881304868497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481900 + }, + { + "epoch": 2.3371764109639863, + "grad_norm": 3.7387194424809422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481910 + }, + { + "epoch": 2.3372249091568227, + "grad_norm": 3.561727908163448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481920 + }, + { + "epoch": 2.3372734073496586, + "grad_norm": 3.024994612133014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481930 + }, + { + "epoch": 2.3373219055424945, + "grad_norm": 3.6850942706223577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481940 + }, + { + "epoch": 2.337370403735331, + "grad_norm": 3.51343351212563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481950 + }, + { + "epoch": 2.337418901928167, + "grad_norm": 3.5640073292597663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481960 + }, + { + "epoch": 2.337467400121003, + "grad_norm": 3.568018200894585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481970 + }, + { + "epoch": 2.337515898313839, + "grad_norm": 2.874517576856306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481980 + }, + { + "epoch": 2.337564396506675, + "grad_norm": 3.5117241168336477e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 481990 + }, + { + "epoch": 2.3376128946995114, + "grad_norm": 3.269017270213226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482000 + }, + { + "epoch": 2.3376613928923473, + "grad_norm": 3.2876519071578514e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482010 + }, + { + "epoch": 2.3377098910851837, + "grad_norm": 3.3665232876955997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482020 + }, + { + "epoch": 2.3377583892780196, + "grad_norm": 2.643782408995321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482030 + }, + { + "epoch": 2.3378068874708555, + "grad_norm": 3.3814471862569917e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482040 + }, + { + "epoch": 2.337855385663692, + "grad_norm": 3.310555712232599e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482050 + }, + { + "epoch": 2.337903883856528, + "grad_norm": 3.3845280995592475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482060 + }, + { + "epoch": 2.3379523820493637, + "grad_norm": 3.2660263968864456e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482070 + }, + { + "epoch": 2.3380008802422, + "grad_norm": 2.533336555643473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482080 + }, + { + "epoch": 2.338049378435036, + "grad_norm": 3.097869466728298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482090 + }, + { + "epoch": 2.338097876627872, + "grad_norm": 3.245040716137737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482100 + }, + { + "epoch": 2.3381463748207083, + "grad_norm": 3.0392375265364535e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482110 + }, + { + "epoch": 2.3381948730135442, + "grad_norm": 2.974950348288985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482120 + }, + { + "epoch": 2.3382433712063806, + "grad_norm": 2.356937784497859e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482130 + }, + { + "epoch": 2.3382918693992165, + "grad_norm": 3.015379661519546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482140 + }, + { + "epoch": 2.3383403675920524, + "grad_norm": 3.020327312697191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482150 + }, + { + "epoch": 2.338388865784889, + "grad_norm": 2.9360869575612014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482160 + }, + { + "epoch": 2.3384373639777247, + "grad_norm": 2.8808624392695492e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482170 + }, + { + "epoch": 2.338485862170561, + "grad_norm": 2.245397809019778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482180 + }, + { + "epoch": 2.338534360363397, + "grad_norm": 2.783546506179846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482190 + }, + { + "epoch": 2.338582858556233, + "grad_norm": 3.496714271022938e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482200 + }, + { + "epoch": 2.3386313567490693, + "grad_norm": 2.7849200705531985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482210 + }, + { + "epoch": 2.3386798549419052, + "grad_norm": 2.8819983981520636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482220 + }, + { + "epoch": 2.338728353134741, + "grad_norm": 2.102509597534663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482230 + }, + { + "epoch": 2.3387768513275775, + "grad_norm": 3.7275929116731277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482240 + }, + { + "epoch": 2.3388253495204134, + "grad_norm": 5.4117358558869455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482250 + }, + { + "epoch": 2.3388738477132494, + "grad_norm": 2.6010209239757387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482260 + }, + { + "epoch": 2.3389223459060857, + "grad_norm": 2.599483650556067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482270 + }, + { + "epoch": 2.3389708440989216, + "grad_norm": 2.0344391487014946e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482280 + }, + { + "epoch": 2.339019342291758, + "grad_norm": 3.939563157473458e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482290 + }, + { + "epoch": 2.339067840484594, + "grad_norm": 2.6295665520592593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482300 + }, + { + "epoch": 2.33911633867743, + "grad_norm": 2.451262389513431e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482310 + }, + { + "epoch": 2.339164836870266, + "grad_norm": 2.492769453965593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482320 + }, + { + "epoch": 2.339213335063102, + "grad_norm": 1.925377091538394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482330 + }, + { + "epoch": 2.3392618332559385, + "grad_norm": 2.3637212507310323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482340 + }, + { + "epoch": 2.3393103314487744, + "grad_norm": 2.4598182335466845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482350 + }, + { + "epoch": 2.3393588296416103, + "grad_norm": 2.3772427084622905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482360 + }, + { + "epoch": 2.3394073278344467, + "grad_norm": 2.2881722543388605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482370 + }, + { + "epoch": 2.3394558260272826, + "grad_norm": 1.891645524665364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482380 + }, + { + "epoch": 2.3395043242201186, + "grad_norm": 2.258941776744905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482390 + }, + { + "epoch": 2.339552822412955, + "grad_norm": 2.3072677777236095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482400 + }, + { + "epoch": 2.339601320605791, + "grad_norm": 2.1986309093335876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482410 + }, + { + "epoch": 2.3396498187986268, + "grad_norm": 2.3562163278256776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482420 + }, + { + "epoch": 2.339698316991463, + "grad_norm": 1.7830457181844395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482430 + }, + { + "epoch": 2.339746815184299, + "grad_norm": 2.3201398562378017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482440 + }, + { + "epoch": 2.3397953133771354, + "grad_norm": 2.217118662883877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482450 + }, + { + "epoch": 2.3398438115699713, + "grad_norm": 2.242394202767173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482460 + }, + { + "epoch": 2.3398923097628073, + "grad_norm": 2.1483635919139488e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482470 + }, + { + "epoch": 2.3399408079556436, + "grad_norm": 1.6372808886444545e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482480 + }, + { + "epoch": 2.3399893061484796, + "grad_norm": 2.0881066120637115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482490 + }, + { + "epoch": 2.340037804341316, + "grad_norm": 1.9978028831246775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482500 + }, + { + "epoch": 2.340086302534152, + "grad_norm": 2.018423401750624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482510 + }, + { + "epoch": 2.3401348007269878, + "grad_norm": 2.0803281586267985e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482520 + }, + { + "epoch": 2.340183298919824, + "grad_norm": 1.5700102267146576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482530 + }, + { + "epoch": 2.34023179711266, + "grad_norm": 2.03020204025961e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482540 + }, + { + "epoch": 2.3402802953054964, + "grad_norm": 1.990525788642117e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482550 + }, + { + "epoch": 2.3403287934983323, + "grad_norm": 2.0210454749758355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482560 + }, + { + "epoch": 2.3403772916911683, + "grad_norm": 1.9447279555606656e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482570 + }, + { + "epoch": 2.3404257898840046, + "grad_norm": 1.5124658148124581e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482580 + }, + { + "epoch": 2.3404742880768405, + "grad_norm": 1.9701578821695875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482590 + }, + { + "epoch": 2.3405227862696765, + "grad_norm": 1.9719568626896944e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482600 + }, + { + "epoch": 2.340571284462513, + "grad_norm": 1.91265030480281e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482610 + }, + { + "epoch": 2.3406197826553488, + "grad_norm": 1.8414729083815473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482620 + }, + { + "epoch": 2.3406682808481847, + "grad_norm": 1.4718582406203495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482630 + }, + { + "epoch": 2.340716779041021, + "grad_norm": 1.8581490621727426e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482640 + }, + { + "epoch": 2.340765277233857, + "grad_norm": 1.8672708392841741e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482650 + }, + { + "epoch": 2.3408137754266933, + "grad_norm": 1.8978208800035645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482660 + }, + { + "epoch": 2.3408622736195293, + "grad_norm": 1.828377435231232e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482670 + }, + { + "epoch": 2.340910771812365, + "grad_norm": 1.3766373285761802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482680 + }, + { + "epoch": 2.3409592700052015, + "grad_norm": 1.7522929738333914e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482690 + }, + { + "epoch": 2.3410077681980375, + "grad_norm": 1.9198796508135274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482700 + }, + { + "epoch": 2.341056266390874, + "grad_norm": 1.9102692476735683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482710 + }, + { + "epoch": 2.3411047645837098, + "grad_norm": 1.8139172652809066e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482720 + }, + { + "epoch": 2.3411532627765457, + "grad_norm": 1.3839469374943292e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482730 + }, + { + "epoch": 2.341201760969382, + "grad_norm": 1.7208742519869702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482740 + }, + { + "epoch": 2.341250259162218, + "grad_norm": 1.6151992667801096e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482750 + }, + { + "epoch": 2.341298757355054, + "grad_norm": 1.7424571296942304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482760 + }, + { + "epoch": 2.3413472555478902, + "grad_norm": 1.7040395050571533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482770 + }, + { + "epoch": 2.341395753740726, + "grad_norm": 1.3132001868143561e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482780 + }, + { + "epoch": 2.341444251933562, + "grad_norm": 1.5798780168552184e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482790 + }, + { + "epoch": 2.3414927501263985, + "grad_norm": 1.686532300482213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482800 + }, + { + "epoch": 2.3415412483192344, + "grad_norm": 1.5429341146955267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482810 + }, + { + "epoch": 2.3415897465120707, + "grad_norm": 2.317211283298093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482820 + }, + { + "epoch": 2.3416382447049067, + "grad_norm": 1.2976888683624566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482830 + }, + { + "epoch": 2.3416867428977426, + "grad_norm": 1.5960947621351806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482840 + }, + { + "epoch": 2.341735241090579, + "grad_norm": 1.5916235724944272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482850 + }, + { + "epoch": 2.341783739283415, + "grad_norm": 1.5472825225515408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482860 + }, + { + "epoch": 2.3418322374762512, + "grad_norm": 1.4830518466624198e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482870 + }, + { + "epoch": 2.341880735669087, + "grad_norm": 1.1953978855672176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482880 + }, + { + "epoch": 2.341929233861923, + "grad_norm": 1.5057755717862165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482890 + }, + { + "epoch": 2.3419777320547595, + "grad_norm": 1.4772105032534455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482900 + }, + { + "epoch": 2.3420262302475954, + "grad_norm": 1.4663424963146099e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482910 + }, + { + "epoch": 2.3420747284404313, + "grad_norm": 1.4808994137638365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482920 + }, + { + "epoch": 2.3421232266332677, + "grad_norm": 1.1518925475684227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482930 + }, + { + "epoch": 2.3421717248261036, + "grad_norm": 1.4989600458648056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482940 + }, + { + "epoch": 2.34222022301894, + "grad_norm": 1.390432089465321e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482950 + }, + { + "epoch": 2.342268721211776, + "grad_norm": 1.3560256775235757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482960 + }, + { + "epoch": 2.342317219404612, + "grad_norm": 1.314109681516129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482970 + }, + { + "epoch": 2.342365717597448, + "grad_norm": 1.1831359643110773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482980 + }, + { + "epoch": 2.342414215790284, + "grad_norm": 1.5538336128884112e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 482990 + }, + { + "epoch": 2.34246271398312, + "grad_norm": 1.397152459503559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483000 + }, + { + "epoch": 2.3425112121759564, + "grad_norm": 1.3778965239907848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483010 + }, + { + "epoch": 2.3425597103687923, + "grad_norm": 1.3339046063265414e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483020 + }, + { + "epoch": 2.3426082085616287, + "grad_norm": 1.2180524890936795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483030 + }, + { + "epoch": 2.3426567067544646, + "grad_norm": 1.3143165915607824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483040 + }, + { + "epoch": 2.3427052049473005, + "grad_norm": 1.31904255340487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483050 + }, + { + "epoch": 2.342753703140137, + "grad_norm": 1.3423568816506304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483060 + }, + { + "epoch": 2.342802201332973, + "grad_norm": 1.300364374401397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483070 + }, + { + "epoch": 2.342850699525809, + "grad_norm": 1.058898305927869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483080 + }, + { + "epoch": 2.342899197718645, + "grad_norm": 1.2797386261809152e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483090 + }, + { + "epoch": 2.342947695911481, + "grad_norm": 1.350354409623833e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483100 + }, + { + "epoch": 2.3429961941043174, + "grad_norm": 1.1991671726718778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483110 + }, + { + "epoch": 2.3430446922971533, + "grad_norm": 1.3240617136034416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483120 + }, + { + "epoch": 2.343093190489989, + "grad_norm": 1.0473667089172523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483130 + }, + { + "epoch": 2.3431416886828256, + "grad_norm": 1.2154511068729335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483140 + }, + { + "epoch": 2.3431901868756615, + "grad_norm": 1.9462218006083276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483150 + }, + { + "epoch": 2.3432386850684974, + "grad_norm": 1.2468652812458458e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483160 + }, + { + "epoch": 2.343287183261334, + "grad_norm": 1.2829358411181602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483170 + }, + { + "epoch": 2.3433356814541697, + "grad_norm": 9.732751777846715e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483180 + }, + { + "epoch": 2.343384179647006, + "grad_norm": 1.23267807339289e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483190 + }, + { + "epoch": 2.343432677839842, + "grad_norm": 1.2292238125155563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483200 + }, + { + "epoch": 2.343481176032678, + "grad_norm": 1.2034400924676447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483210 + }, + { + "epoch": 2.3435296742255143, + "grad_norm": 1.1770339369832072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483220 + }, + { + "epoch": 2.34357817241835, + "grad_norm": 9.358062698083813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483230 + }, + { + "epoch": 2.3436266706111866, + "grad_norm": 1.1828593642349006e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483240 + }, + { + "epoch": 2.3436751688040225, + "grad_norm": 1.140312747338612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483250 + }, + { + "epoch": 2.3437236669968584, + "grad_norm": 1.1769438970077317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483260 + }, + { + "epoch": 2.343772165189695, + "grad_norm": 1.116605062634335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483270 + }, + { + "epoch": 2.3438206633825307, + "grad_norm": 9.489310741628287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483280 + }, + { + "epoch": 2.3438691615753666, + "grad_norm": 1.7924188568940735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483290 + }, + { + "epoch": 2.343917659768203, + "grad_norm": 1.1012456297976314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483300 + }, + { + "epoch": 2.343966157961039, + "grad_norm": 1.1368299510650104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483310 + }, + { + "epoch": 2.344014656153875, + "grad_norm": 1.0526030109758722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483320 + }, + { + "epoch": 2.344063154346711, + "grad_norm": 9.093964195017179e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483330 + }, + { + "epoch": 2.344111652539547, + "grad_norm": 1.1392544365662616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483340 + }, + { + "epoch": 2.3441601507323835, + "grad_norm": 1.0794573199746083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483350 + }, + { + "epoch": 2.3442086489252194, + "grad_norm": 1.0986601637341664e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483360 + }, + { + "epoch": 2.3442571471180553, + "grad_norm": 1.0744025757958298e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483370 + }, + { + "epoch": 2.3443056453108917, + "grad_norm": 8.939231861404551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483380 + }, + { + "epoch": 2.3443541435037276, + "grad_norm": 8.04802493803436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483390 + }, + { + "epoch": 2.344402641696564, + "grad_norm": 1.043762040353613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483400 + }, + { + "epoch": 2.3444511398894, + "grad_norm": 1.0353068091717432e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483410 + }, + { + "epoch": 2.344499638082236, + "grad_norm": 1.1033438340746216e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483420 + }, + { + "epoch": 2.344548136275072, + "grad_norm": 8.926348300519749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483430 + }, + { + "epoch": 2.344596634467908, + "grad_norm": 1.0202696785199805e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483440 + }, + { + "epoch": 2.344645132660744, + "grad_norm": 9.783832410903415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483450 + }, + { + "epoch": 2.3446936308535804, + "grad_norm": 1.062399292095506e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483460 + }, + { + "epoch": 2.3447421290464163, + "grad_norm": 9.503536375632393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483470 + }, + { + "epoch": 2.3447906272392527, + "grad_norm": 8.146220125127002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483480 + }, + { + "epoch": 2.3448391254320886, + "grad_norm": 9.283003237214871e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483490 + }, + { + "epoch": 2.3448876236249245, + "grad_norm": 9.831411489358288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483500 + }, + { + "epoch": 2.344936121817761, + "grad_norm": 9.434745038561232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483510 + }, + { + "epoch": 2.344984620010597, + "grad_norm": 9.64985247264849e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483520 + }, + { + "epoch": 2.3450331182034327, + "grad_norm": 7.697282171648112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483530 + }, + { + "epoch": 2.345081616396269, + "grad_norm": 9.864099865808384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483540 + }, + { + "epoch": 2.345130114589105, + "grad_norm": 9.464686172577785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483550 + }, + { + "epoch": 2.3451786127819414, + "grad_norm": 9.140084671344084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483560 + }, + { + "epoch": 2.3452271109747773, + "grad_norm": 9.444943884773238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483570 + }, + { + "epoch": 2.3452756091676132, + "grad_norm": 7.701756317146646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483580 + }, + { + "epoch": 2.3453241073604496, + "grad_norm": 9.461439276492456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483590 + }, + { + "epoch": 2.3453726055532855, + "grad_norm": 9.002428669191431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483600 + }, + { + "epoch": 2.345421103746122, + "grad_norm": 3.4280856198165566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483610 + }, + { + "epoch": 2.345469601938958, + "grad_norm": 9.308198514190735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483620 + }, + { + "epoch": 2.3455181001317937, + "grad_norm": 7.700754736106319e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483630 + }, + { + "epoch": 2.34556659832463, + "grad_norm": 8.930509807214548e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483640 + }, + { + "epoch": 2.345615096517466, + "grad_norm": 8.423876352026127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483650 + }, + { + "epoch": 2.345663594710302, + "grad_norm": 9.403005378771923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483660 + }, + { + "epoch": 2.3457120929031383, + "grad_norm": 8.644938702673244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483670 + }, + { + "epoch": 2.3457605910959742, + "grad_norm": 7.659629090994713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483680 + }, + { + "epoch": 2.34580908928881, + "grad_norm": 8.938006317293912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483690 + }, + { + "epoch": 2.3458575874816465, + "grad_norm": 8.810351346255629e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483700 + }, + { + "epoch": 2.3459060856744824, + "grad_norm": 8.701755405127187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483710 + }, + { + "epoch": 2.345954583867319, + "grad_norm": 8.286095294351981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483720 + }, + { + "epoch": 2.3460030820601547, + "grad_norm": 7.23370078503649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483730 + }, + { + "epoch": 2.3460515802529907, + "grad_norm": 8.553910220143734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483740 + }, + { + "epoch": 2.346100078445827, + "grad_norm": 8.408000553572492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483750 + }, + { + "epoch": 2.346148576638663, + "grad_norm": 8.526082524440426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483760 + }, + { + "epoch": 2.3461970748314993, + "grad_norm": 8.547073662157345e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483770 + }, + { + "epoch": 2.3462455730243352, + "grad_norm": 6.820756652814453e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483780 + }, + { + "epoch": 2.346294071217171, + "grad_norm": 8.27463622954383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483790 + }, + { + "epoch": 2.3463425694100075, + "grad_norm": 8.373439186470932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483800 + }, + { + "epoch": 2.3463910676028434, + "grad_norm": 7.821707868060912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483810 + }, + { + "epoch": 2.3464395657956794, + "grad_norm": 8.31074942198029e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483820 + }, + { + "epoch": 2.3464880639885157, + "grad_norm": 7.113118840607058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483830 + }, + { + "epoch": 2.3465365621813516, + "grad_norm": 7.964300152707438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483840 + }, + { + "epoch": 2.3465850603741876, + "grad_norm": 8.111888973871828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483850 + }, + { + "epoch": 2.346633558567024, + "grad_norm": 7.573743801003729e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483860 + }, + { + "epoch": 2.34668205675986, + "grad_norm": 7.839532258913096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483870 + }, + { + "epoch": 2.3467305549526962, + "grad_norm": 7.222870976875129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483880 + }, + { + "epoch": 2.346779053145532, + "grad_norm": 7.841418891985086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483890 + }, + { + "epoch": 2.346827551338368, + "grad_norm": 8.314627280014975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483900 + }, + { + "epoch": 2.3468760495312044, + "grad_norm": 1.0689333294067183e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483910 + }, + { + "epoch": 2.3469245477240404, + "grad_norm": 7.272040534189728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483920 + }, + { + "epoch": 2.3469730459168767, + "grad_norm": 6.526123002004169e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483930 + }, + { + "epoch": 2.3470215441097126, + "grad_norm": 7.55477003622218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483940 + }, + { + "epoch": 2.3470700423025486, + "grad_norm": 7.651604505554133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483950 + }, + { + "epoch": 2.347118540495385, + "grad_norm": 7.456833941432706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483960 + }, + { + "epoch": 2.347167038688221, + "grad_norm": 7.47128865441482e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483970 + }, + { + "epoch": 2.347215536881057, + "grad_norm": 6.991361942709773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483980 + }, + { + "epoch": 2.347264035073893, + "grad_norm": 6.985077334320522e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 483990 + }, + { + "epoch": 2.347312533266729, + "grad_norm": 7.713655350016779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484000 + }, + { + "epoch": 2.3473610314595654, + "grad_norm": 7.6105101243229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484010 + }, + { + "epoch": 2.3474095296524013, + "grad_norm": 7.180257171057747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484020 + }, + { + "epoch": 2.3474580278452373, + "grad_norm": 5.867140089321765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484030 + }, + { + "epoch": 2.3475065260380736, + "grad_norm": 6.94145910529187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484040 + }, + { + "epoch": 2.3475550242309096, + "grad_norm": 6.849387546026264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484050 + }, + { + "epoch": 2.3476035224237455, + "grad_norm": 6.943060952835367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484060 + }, + { + "epoch": 2.347652020616582, + "grad_norm": 6.96752977091819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484070 + }, + { + "epoch": 2.3477005188094178, + "grad_norm": 5.751192588832055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484080 + }, + { + "epoch": 2.347749017002254, + "grad_norm": 6.688396751997061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484090 + }, + { + "epoch": 2.34779751519509, + "grad_norm": 6.559345138157369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484100 + }, + { + "epoch": 2.347846013387926, + "grad_norm": 1.0546211797191063e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484110 + }, + { + "epoch": 2.3478945115807623, + "grad_norm": 6.447720011237834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484120 + }, + { + "epoch": 2.3479430097735983, + "grad_norm": 5.576829948950035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484130 + }, + { + "epoch": 2.3479915079664346, + "grad_norm": 6.722515877299884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484140 + }, + { + "epoch": 2.3480400061592706, + "grad_norm": 6.657352855654608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484150 + }, + { + "epoch": 2.3480885043521065, + "grad_norm": 6.408653234757367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484160 + }, + { + "epoch": 2.348137002544943, + "grad_norm": 6.816511586293927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484170 + }, + { + "epoch": 2.3481855007377788, + "grad_norm": 6.557054348377278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484180 + }, + { + "epoch": 2.3482339989306147, + "grad_norm": 6.4493633544771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484190 + }, + { + "epoch": 2.348282497123451, + "grad_norm": 6.334670388241648e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484200 + }, + { + "epoch": 2.348330995316287, + "grad_norm": 6.518635586871824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484210 + }, + { + "epoch": 2.348379493509123, + "grad_norm": 7.616408765898086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484220 + }, + { + "epoch": 2.3484279917019593, + "grad_norm": 5.142230179444596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484230 + }, + { + "epoch": 2.348476489894795, + "grad_norm": 6.572238362423377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484240 + }, + { + "epoch": 2.3485249880876315, + "grad_norm": 6.07279503128666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484250 + }, + { + "epoch": 2.3485734862804675, + "grad_norm": 6.353227490762947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484260 + }, + { + "epoch": 2.3486219844733034, + "grad_norm": 5.924375159338524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484270 + }, + { + "epoch": 2.3486704826661398, + "grad_norm": 5.028879854762636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484280 + }, + { + "epoch": 2.3487189808589757, + "grad_norm": 6.114551638347621e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484290 + }, + { + "epoch": 2.348767479051812, + "grad_norm": 6.133874421720975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484300 + }, + { + "epoch": 2.348815977244648, + "grad_norm": 6.226716209312144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484310 + }, + { + "epoch": 2.348864475437484, + "grad_norm": 6.930977178853936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484320 + }, + { + "epoch": 2.3489129736303203, + "grad_norm": 4.7661518465247354e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484330 + }, + { + "epoch": 2.348961471823156, + "grad_norm": 5.815493864247401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484340 + }, + { + "epoch": 2.349009970015992, + "grad_norm": 6.05890591032221e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484350 + }, + { + "epoch": 2.3490584682088285, + "grad_norm": 1.2007819805148756e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484360 + }, + { + "epoch": 2.3491069664016644, + "grad_norm": 5.644562861562008e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484370 + }, + { + "epoch": 2.3491554645945003, + "grad_norm": 4.912522513222939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484380 + }, + { + "epoch": 2.3492039627873367, + "grad_norm": 6.563680017279694e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484390 + }, + { + "epoch": 2.3492524609801726, + "grad_norm": 5.410209382716857e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484400 + }, + { + "epoch": 2.349300959173009, + "grad_norm": 5.666755100719456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484410 + }, + { + "epoch": 2.349349457365845, + "grad_norm": 5.59899092422711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484420 + }, + { + "epoch": 2.349397955558681, + "grad_norm": 4.5422240191328456e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484430 + }, + { + "epoch": 2.349446453751517, + "grad_norm": 5.613572398033284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484440 + }, + { + "epoch": 2.349494951944353, + "grad_norm": 5.507364448931185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484450 + }, + { + "epoch": 2.3495434501371895, + "grad_norm": 5.258622763903986e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484460 + }, + { + "epoch": 2.3495919483300254, + "grad_norm": 5.534120646188967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484470 + }, + { + "epoch": 2.3496404465228613, + "grad_norm": 4.444934802450007e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484480 + }, + { + "epoch": 2.3496889447156977, + "grad_norm": 5.37665982847102e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484490 + }, + { + "epoch": 2.3497374429085336, + "grad_norm": 5.688698365702294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484500 + }, + { + "epoch": 2.34978594110137, + "grad_norm": 5.304132741912326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484510 + }, + { + "epoch": 2.349834439294206, + "grad_norm": 5.188153977542243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484520 + }, + { + "epoch": 2.349882937487042, + "grad_norm": 4.310446968247561e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484530 + }, + { + "epoch": 2.349931435679878, + "grad_norm": 5.06956098433875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484540 + }, + { + "epoch": 2.349979933872714, + "grad_norm": 5.312819553182635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484550 + }, + { + "epoch": 2.35002843206555, + "grad_norm": 5.3603491778631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484560 + }, + { + "epoch": 2.3500769302583864, + "grad_norm": 5.242273459771241e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484570 + }, + { + "epoch": 2.3501254284512223, + "grad_norm": 4.1080244272961863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484580 + }, + { + "epoch": 2.350173926644058, + "grad_norm": 5.042232373853039e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484590 + }, + { + "epoch": 2.3502224248368946, + "grad_norm": 4.897145231552713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484600 + }, + { + "epoch": 2.3502709230297305, + "grad_norm": 5.014995849705883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484610 + }, + { + "epoch": 2.350319421222567, + "grad_norm": 4.7621864496250055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484620 + }, + { + "epoch": 2.350367919415403, + "grad_norm": 3.777049073505623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484630 + }, + { + "epoch": 2.3504164176082387, + "grad_norm": 4.883726205662242e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484640 + }, + { + "epoch": 2.350464915801075, + "grad_norm": 4.918625222671835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484650 + }, + { + "epoch": 2.350513413993911, + "grad_norm": 5.060052217231714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484660 + }, + { + "epoch": 2.3505619121867474, + "grad_norm": 4.875656713920762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484670 + }, + { + "epoch": 2.3506104103795833, + "grad_norm": 3.635379925981397e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484680 + }, + { + "epoch": 2.350658908572419, + "grad_norm": 4.969733140569588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484690 + }, + { + "epoch": 2.3507074067652556, + "grad_norm": 1.1750528301490704e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484700 + }, + { + "epoch": 2.3507559049580915, + "grad_norm": 4.7280269654947915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484710 + }, + { + "epoch": 2.3508044031509274, + "grad_norm": 5.504210776052787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484720 + }, + { + "epoch": 2.350852901343764, + "grad_norm": 3.524828855461237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484730 + }, + { + "epoch": 2.3509013995365997, + "grad_norm": 4.7158493998722406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484740 + }, + { + "epoch": 2.3509498977294356, + "grad_norm": 4.6032002387619286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484750 + }, + { + "epoch": 2.350998395922272, + "grad_norm": 4.926910150970798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484760 + }, + { + "epoch": 2.351046894115108, + "grad_norm": 4.53420369694868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484770 + }, + { + "epoch": 2.3510953923079443, + "grad_norm": 3.655187583717634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484780 + }, + { + "epoch": 2.35114389050078, + "grad_norm": 4.738533618819929e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484790 + }, + { + "epoch": 2.351192388693616, + "grad_norm": 4.4089952666581667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484800 + }, + { + "epoch": 2.3512408868864525, + "grad_norm": 4.337308325830236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484810 + }, + { + "epoch": 2.3512893850792884, + "grad_norm": 4.605029175763775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484820 + }, + { + "epoch": 2.351337883272125, + "grad_norm": 3.4278482985428127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484830 + }, + { + "epoch": 2.3513863814649607, + "grad_norm": 5.002447096558171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484840 + }, + { + "epoch": 2.3514348796577966, + "grad_norm": 4.325479494582396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484850 + }, + { + "epoch": 2.351483377850633, + "grad_norm": 4.4161191681268974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484860 + }, + { + "epoch": 2.351531876043469, + "grad_norm": 4.537776590041176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484870 + }, + { + "epoch": 2.351580374236305, + "grad_norm": 3.276653615102987e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484880 + }, + { + "epoch": 2.351628872429141, + "grad_norm": 4.104911397462274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484890 + }, + { + "epoch": 2.351677370621977, + "grad_norm": 4.260669470568246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484900 + }, + { + "epoch": 2.351725868814813, + "grad_norm": 4.232694834627182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484910 + }, + { + "epoch": 2.3517743670076494, + "grad_norm": 4.5497145606532285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484920 + }, + { + "epoch": 2.3518228652004853, + "grad_norm": 3.2946473993433756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484930 + }, + { + "epoch": 2.3518713633933217, + "grad_norm": 4.075455706242792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484940 + }, + { + "epoch": 2.3519198615861576, + "grad_norm": 4.0643570287102193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484950 + }, + { + "epoch": 2.3519683597789935, + "grad_norm": 4.253884071658831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484960 + }, + { + "epoch": 2.35201685797183, + "grad_norm": 3.9734018741910404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484970 + }, + { + "epoch": 2.352065356164666, + "grad_norm": 3.003807478307863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484980 + }, + { + "epoch": 2.352113854357502, + "grad_norm": 6.064587978471536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 484990 + }, + { + "epoch": 2.352162352550338, + "grad_norm": 3.873950618071831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485000 + }, + { + "epoch": 2.352210850743174, + "grad_norm": 4.1629422753430845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485010 + }, + { + "epoch": 2.3522593489360104, + "grad_norm": 4.066642418365518e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485020 + }, + { + "epoch": 2.3523078471288463, + "grad_norm": 3.069792455789866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485030 + }, + { + "epoch": 2.3523563453216827, + "grad_norm": 4.03237436330528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485040 + }, + { + "epoch": 2.3524048435145186, + "grad_norm": 3.86911153782421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485050 + }, + { + "epoch": 2.3524533417073545, + "grad_norm": 7.427752848343516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485060 + }, + { + "epoch": 2.352501839900191, + "grad_norm": 3.8952506997702585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485070 + }, + { + "epoch": 2.352550338093027, + "grad_norm": 2.91002379526617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485080 + }, + { + "epoch": 2.3525988362858627, + "grad_norm": 3.9900740489429154e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485090 + }, + { + "epoch": 2.352647334478699, + "grad_norm": 3.739034752925363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485100 + }, + { + "epoch": 2.352695832671535, + "grad_norm": 3.7725806123489747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485110 + }, + { + "epoch": 2.352744330864371, + "grad_norm": 3.759519984214421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485120 + }, + { + "epoch": 2.3527928290572073, + "grad_norm": 2.842839705863298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485130 + }, + { + "epoch": 2.3528413272500432, + "grad_norm": 3.590848223211651e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485140 + }, + { + "epoch": 2.3528898254428796, + "grad_norm": 5.00431383443356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485150 + }, + { + "epoch": 2.3529383236357155, + "grad_norm": 9.730073315949994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485160 + }, + { + "epoch": 2.3529868218285515, + "grad_norm": 4.0718046534493624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485170 + }, + { + "epoch": 2.353035320021388, + "grad_norm": 2.5798979663704813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485180 + }, + { + "epoch": 2.3530838182142237, + "grad_norm": 3.7601213875859685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485190 + }, + { + "epoch": 2.35313231640706, + "grad_norm": 3.3383295772182464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485200 + }, + { + "epoch": 2.353180814599896, + "grad_norm": 3.68217712320984e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485210 + }, + { + "epoch": 2.353229312792732, + "grad_norm": 3.3619079431446153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485220 + }, + { + "epoch": 2.3532778109855683, + "grad_norm": 2.589410144082649e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485230 + }, + { + "epoch": 2.3533263091784042, + "grad_norm": 3.4649698932298634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485240 + }, + { + "epoch": 2.35337480737124, + "grad_norm": 3.397197190224688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485250 + }, + { + "epoch": 2.3534233055640765, + "grad_norm": 3.257339074025367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485260 + }, + { + "epoch": 2.3534718037569125, + "grad_norm": 3.264102019784332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485270 + }, + { + "epoch": 2.3535203019497484, + "grad_norm": 2.4783216190371604e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485280 + }, + { + "epoch": 2.3535688001425847, + "grad_norm": 3.3126869425359473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485290 + }, + { + "epoch": 2.3536172983354207, + "grad_norm": 3.164191468840727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485300 + }, + { + "epoch": 2.353665796528257, + "grad_norm": 4.6039338030823274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485310 + }, + { + "epoch": 2.353714294721093, + "grad_norm": 3.6893501942358853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485320 + }, + { + "epoch": 2.353762792913929, + "grad_norm": 2.5055689434339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485330 + }, + { + "epoch": 2.3538112911067652, + "grad_norm": 3.166835256251943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485340 + }, + { + "epoch": 2.353859789299601, + "grad_norm": 3.2125439020092017e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485350 + }, + { + "epoch": 2.3539082874924375, + "grad_norm": 3.145893856526527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485360 + }, + { + "epoch": 2.3539567856852734, + "grad_norm": 3.3372856478308677e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485370 + }, + { + "epoch": 2.3540052838781094, + "grad_norm": 2.4412364041381807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485380 + }, + { + "epoch": 2.3540537820709457, + "grad_norm": 3.1041673764775624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485390 + }, + { + "epoch": 2.3541022802637817, + "grad_norm": 3.2479323408551863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485400 + }, + { + "epoch": 2.3541507784566176, + "grad_norm": 2.88263578340775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485410 + }, + { + "epoch": 2.354199276649454, + "grad_norm": 3.2166269647859735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485420 + }, + { + "epoch": 2.35424777484229, + "grad_norm": 2.2468486804427812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485430 + }, + { + "epoch": 2.354296273035126, + "grad_norm": 2.9486287189683935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485440 + }, + { + "epoch": 2.354344771227962, + "grad_norm": 3.059764139834442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485450 + }, + { + "epoch": 2.354393269420798, + "grad_norm": 2.908382441546564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485460 + }, + { + "epoch": 2.3544417676136344, + "grad_norm": 2.909097247538739e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485470 + }, + { + "epoch": 2.3544902658064704, + "grad_norm": 2.3195717346879974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485480 + }, + { + "epoch": 2.3545387639993063, + "grad_norm": 3.3251714626203466e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485490 + }, + { + "epoch": 2.3545872621921426, + "grad_norm": 3.22984561762496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485500 + }, + { + "epoch": 2.3546357603849786, + "grad_norm": 3.0181712418198003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485510 + }, + { + "epoch": 2.354684258577815, + "grad_norm": 3.187127788351063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485520 + }, + { + "epoch": 2.354732756770651, + "grad_norm": 2.2386346643088473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485530 + }, + { + "epoch": 2.354781254963487, + "grad_norm": 2.823359466219699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485540 + }, + { + "epoch": 2.354829753156323, + "grad_norm": 2.8208478397573344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485550 + }, + { + "epoch": 2.354878251349159, + "grad_norm": 2.7462647267384455e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485560 + }, + { + "epoch": 2.3549267495419954, + "grad_norm": 2.819711824031401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485570 + }, + { + "epoch": 2.3549752477348314, + "grad_norm": 2.1174544428959052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485580 + }, + { + "epoch": 2.3550237459276673, + "grad_norm": 2.68503015377064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485590 + }, + { + "epoch": 2.3550722441205036, + "grad_norm": 2.8897324000354274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485600 + }, + { + "epoch": 2.3551207423133396, + "grad_norm": 2.866785280275508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485610 + }, + { + "epoch": 2.3551692405061755, + "grad_norm": 2.7326231588631344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485620 + }, + { + "epoch": 2.355217738699012, + "grad_norm": 2.0696838021194708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485630 + }, + { + "epoch": 2.3552662368918478, + "grad_norm": 2.5725177010826883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485640 + }, + { + "epoch": 2.3553147350846837, + "grad_norm": 2.69814933062662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485650 + }, + { + "epoch": 2.35536323327752, + "grad_norm": 8.073225217231084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485660 + }, + { + "epoch": 2.355411731470356, + "grad_norm": 2.7344393060957373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485670 + }, + { + "epoch": 2.3554602296631924, + "grad_norm": 2.0449287774226832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485680 + }, + { + "epoch": 2.3555087278560283, + "grad_norm": 2.5945820425476995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485690 + }, + { + "epoch": 2.355557226048864, + "grad_norm": 2.5131129177680123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485700 + }, + { + "epoch": 2.3556057242417006, + "grad_norm": 2.8527992412819003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485710 + }, + { + "epoch": 2.3556542224345365, + "grad_norm": 2.639507101775962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485720 + }, + { + "epoch": 2.355702720627373, + "grad_norm": 1.9358124347945704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485730 + }, + { + "epoch": 2.3557512188202088, + "grad_norm": 2.598423805011407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485740 + }, + { + "epoch": 2.3557997170130447, + "grad_norm": 2.4961914846244326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485750 + }, + { + "epoch": 2.355848215205881, + "grad_norm": 2.549975874899246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485760 + }, + { + "epoch": 2.355896713398717, + "grad_norm": 2.6321060886402847e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485770 + }, + { + "epoch": 2.355945211591553, + "grad_norm": 1.8243483168589592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485780 + }, + { + "epoch": 2.3559937097843893, + "grad_norm": 2.458019991991023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485790 + }, + { + "epoch": 2.356042207977225, + "grad_norm": 2.510874992367462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485800 + }, + { + "epoch": 2.356090706170061, + "grad_norm": 2.738061652962642e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485810 + }, + { + "epoch": 2.3561392043628975, + "grad_norm": 2.31843941378429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485820 + }, + { + "epoch": 2.3561877025557334, + "grad_norm": 1.9213827329167543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485830 + }, + { + "epoch": 2.3562362007485698, + "grad_norm": 2.36563195699091e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485840 + }, + { + "epoch": 2.3562846989414057, + "grad_norm": 2.3647780267310736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485850 + }, + { + "epoch": 2.3563331971342416, + "grad_norm": 2.4539971832382435e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485860 + }, + { + "epoch": 2.356381695327078, + "grad_norm": 3.107707300387119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485870 + }, + { + "epoch": 2.356430193519914, + "grad_norm": 1.8110961264028447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485880 + }, + { + "epoch": 2.3564786917127503, + "grad_norm": 2.29018766617628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485890 + }, + { + "epoch": 2.356527189905586, + "grad_norm": 2.3864495801717567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485900 + }, + { + "epoch": 2.356575688098422, + "grad_norm": 2.4290071110044664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485910 + }, + { + "epoch": 2.3566241862912585, + "grad_norm": 2.2878742811371922e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485920 + }, + { + "epoch": 2.3566726844840944, + "grad_norm": 1.8539876123213617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485930 + }, + { + "epoch": 2.3567211826769303, + "grad_norm": 2.2182825887284707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485940 + }, + { + "epoch": 2.3567696808697667, + "grad_norm": 2.1356508739245328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485950 + }, + { + "epoch": 2.3568181790626026, + "grad_norm": 2.2904927732270153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485960 + }, + { + "epoch": 2.3568666772554385, + "grad_norm": 2.1576579456450418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485970 + }, + { + "epoch": 2.356915175448275, + "grad_norm": 1.7478615177424217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485980 + }, + { + "epoch": 2.356963673641111, + "grad_norm": 2.3009194194401061e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 485990 + }, + { + "epoch": 2.357012171833947, + "grad_norm": 2.1389979565356043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486000 + }, + { + "epoch": 2.357060670026783, + "grad_norm": 2.146469455510669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486010 + }, + { + "epoch": 2.357109168219619, + "grad_norm": 2.2898103679835913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486020 + }, + { + "epoch": 2.3571576664124554, + "grad_norm": 1.7149653785963892e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486030 + }, + { + "epoch": 2.3572061646052913, + "grad_norm": 4.945982254866976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486040 + }, + { + "epoch": 2.3572546627981277, + "grad_norm": 2.1357200807869958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486050 + }, + { + "epoch": 2.3573031609909636, + "grad_norm": 2.0662025690398877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486060 + }, + { + "epoch": 2.3573516591837995, + "grad_norm": 2.266922365379287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486070 + }, + { + "epoch": 2.357400157376636, + "grad_norm": 1.7195969803651678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486080 + }, + { + "epoch": 2.357448655569472, + "grad_norm": 2.1859634102838754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486090 + }, + { + "epoch": 2.357497153762308, + "grad_norm": 2.0610191597825178e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486100 + }, + { + "epoch": 2.357545651955144, + "grad_norm": 2.9402082191154477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486110 + }, + { + "epoch": 2.35759415014798, + "grad_norm": 2.129542053808109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486120 + }, + { + "epoch": 2.3576426483408164, + "grad_norm": 1.703352126014579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486130 + }, + { + "epoch": 2.3576911465336523, + "grad_norm": 1.964097293694067e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486140 + }, + { + "epoch": 2.3577396447264882, + "grad_norm": 1.951793393573098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486150 + }, + { + "epoch": 2.3577881429193246, + "grad_norm": 1.996503016243878e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486160 + }, + { + "epoch": 2.3578366411121605, + "grad_norm": 2.078548817507908e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486170 + }, + { + "epoch": 2.3578851393049964, + "grad_norm": 1.69022939644492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486180 + }, + { + "epoch": 2.357933637497833, + "grad_norm": 2.02119295522607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486190 + }, + { + "epoch": 2.3579821356906687, + "grad_norm": 1.9316412647185643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486200 + }, + { + "epoch": 2.358030633883505, + "grad_norm": 1.8673463841878402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486210 + }, + { + "epoch": 2.358079132076341, + "grad_norm": 1.995609295590839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486220 + }, + { + "epoch": 2.358127630269177, + "grad_norm": 1.6177924067051208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486230 + }, + { + "epoch": 2.3581761284620133, + "grad_norm": 2.0295944125336973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486240 + }, + { + "epoch": 2.358224626654849, + "grad_norm": 1.9206397894322436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486250 + }, + { + "epoch": 2.3582731248476856, + "grad_norm": 1.9279002572147874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486260 + }, + { + "epoch": 2.3583216230405215, + "grad_norm": 1.9288859220978338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486270 + }, + { + "epoch": 2.3583701212333574, + "grad_norm": 1.715890647346896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486280 + }, + { + "epoch": 2.358418619426194, + "grad_norm": 1.985089994605005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486290 + }, + { + "epoch": 2.3584671176190297, + "grad_norm": 1.9307624654629763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486300 + }, + { + "epoch": 2.3585156158118656, + "grad_norm": 1.895073324931218e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486310 + }, + { + "epoch": 2.358564114004702, + "grad_norm": 1.9680460638937802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486320 + }, + { + "epoch": 2.358612612197538, + "grad_norm": 1.624209033934676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486330 + }, + { + "epoch": 2.358661110390374, + "grad_norm": 1.9294181186069181e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486340 + }, + { + "epoch": 2.35870960858321, + "grad_norm": 1.9856709343457624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486350 + }, + { + "epoch": 2.358758106776046, + "grad_norm": 1.9848728527449566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486360 + }, + { + "epoch": 2.3588066049688825, + "grad_norm": 1.7411248620646802e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486370 + }, + { + "epoch": 2.3588551031617184, + "grad_norm": 1.5661329655358713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486380 + }, + { + "epoch": 2.3589036013545543, + "grad_norm": 1.8483392238977103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486390 + }, + { + "epoch": 2.3589520995473907, + "grad_norm": 1.8212224972558033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486400 + }, + { + "epoch": 2.3590005977402266, + "grad_norm": 1.7594156531686167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486410 + }, + { + "epoch": 2.359049095933063, + "grad_norm": 1.9224307834520005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486420 + }, + { + "epoch": 2.359097594125899, + "grad_norm": 1.523524417734734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486430 + }, + { + "epoch": 2.359146092318735, + "grad_norm": 1.8122783274066023e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486440 + }, + { + "epoch": 2.359194590511571, + "grad_norm": 1.8068510598823195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486450 + }, + { + "epoch": 2.359243088704407, + "grad_norm": 1.7370444993503042e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486460 + }, + { + "epoch": 2.359291586897243, + "grad_norm": 1.7788175910027348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486470 + }, + { + "epoch": 2.3593400850900794, + "grad_norm": 1.8000324075728713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486480 + }, + { + "epoch": 2.3593885832829153, + "grad_norm": 1.7526818396618182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486490 + }, + { + "epoch": 2.3594370814757513, + "grad_norm": 1.6277591896596277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486500 + }, + { + "epoch": 2.3594855796685876, + "grad_norm": 1.7183924683195073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486510 + }, + { + "epoch": 2.3595340778614236, + "grad_norm": 1.64997217666496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486520 + }, + { + "epoch": 2.35958257605426, + "grad_norm": 1.5257292318437976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486530 + }, + { + "epoch": 2.359631074247096, + "grad_norm": 1.6535041424958763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486540 + }, + { + "epoch": 2.3596795724399318, + "grad_norm": 1.6320092299793032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486550 + }, + { + "epoch": 2.359728070632768, + "grad_norm": 1.7648531525082944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486560 + }, + { + "epoch": 2.359776568825604, + "grad_norm": 1.7355249326556077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486570 + }, + { + "epoch": 2.3598250670184404, + "grad_norm": 1.4415948612622742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486580 + }, + { + "epoch": 2.3598735652112763, + "grad_norm": 1.7583012379418506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486590 + }, + { + "epoch": 2.3599220634041123, + "grad_norm": 1.778991673972996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486600 + }, + { + "epoch": 2.3599705615969486, + "grad_norm": 1.7209467273460177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486610 + }, + { + "epoch": 2.3600190597897845, + "grad_norm": 1.7092880000291188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486620 + }, + { + "epoch": 2.360067557982621, + "grad_norm": 1.4739309506239806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486630 + }, + { + "epoch": 2.360116056175457, + "grad_norm": 1.6092805310563563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486640 + }, + { + "epoch": 2.3601645543682928, + "grad_norm": 1.6822333748223173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486650 + }, + { + "epoch": 2.360213052561129, + "grad_norm": 1.5998867297639663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486660 + }, + { + "epoch": 2.360261550753965, + "grad_norm": 1.7076207825539313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486670 + }, + { + "epoch": 2.360310048946801, + "grad_norm": 1.4274573345574026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486680 + }, + { + "epoch": 2.3603585471396373, + "grad_norm": 1.4865393893614964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486690 + }, + { + "epoch": 2.3604070453324733, + "grad_norm": 1.5296690492050402e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486700 + }, + { + "epoch": 2.360455543525309, + "grad_norm": 1.6819322468109021e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486710 + }, + { + "epoch": 2.3605040417181455, + "grad_norm": 1.580705344395028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486720 + }, + { + "epoch": 2.3605525399109815, + "grad_norm": 1.471196924285323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486730 + }, + { + "epoch": 2.360601038103818, + "grad_norm": 1.6952637338363274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486740 + }, + { + "epoch": 2.3606495362966537, + "grad_norm": 1.4259560998652887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486750 + }, + { + "epoch": 2.3606980344894897, + "grad_norm": 1.6481024545100809e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486760 + }, + { + "epoch": 2.360746532682326, + "grad_norm": 1.5440332390426192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486770 + }, + { + "epoch": 2.360795030875162, + "grad_norm": 1.4083580879287183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486780 + }, + { + "epoch": 2.3608435290679983, + "grad_norm": 1.551903068275351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486790 + }, + { + "epoch": 2.3608920272608342, + "grad_norm": 1.5953116871969542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486800 + }, + { + "epoch": 2.36094052545367, + "grad_norm": 1.488654532977307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486810 + }, + { + "epoch": 2.3609890236465065, + "grad_norm": 1.5920018370252365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486820 + }, + { + "epoch": 2.3610375218393425, + "grad_norm": 1.360599668487339e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486830 + }, + { + "epoch": 2.3610860200321784, + "grad_norm": 1.485478833274101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486840 + }, + { + "epoch": 2.3611345182250147, + "grad_norm": 1.5740351955173537e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486850 + }, + { + "epoch": 2.3611830164178507, + "grad_norm": 1.4836483330782357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486860 + }, + { + "epoch": 2.3612315146106866, + "grad_norm": 1.457102030144597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486870 + }, + { + "epoch": 2.361280012803523, + "grad_norm": 1.294891660563735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486880 + }, + { + "epoch": 2.361328510996359, + "grad_norm": 1.3767538575848448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486890 + }, + { + "epoch": 2.3613770091891952, + "grad_norm": 1.472108550615303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486900 + }, + { + "epoch": 2.361425507382031, + "grad_norm": 1.5155359278651304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486910 + }, + { + "epoch": 2.361474005574867, + "grad_norm": 1.5979796330611862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486920 + }, + { + "epoch": 2.3615225037677035, + "grad_norm": 1.3041943702774006e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486930 + }, + { + "epoch": 2.3615710019605394, + "grad_norm": 1.4792108515848668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486940 + }, + { + "epoch": 2.3616195001533757, + "grad_norm": 1.5254731522418297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486950 + }, + { + "epoch": 2.3616679983462117, + "grad_norm": 1.47006588235854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486960 + }, + { + "epoch": 2.3617164965390476, + "grad_norm": 1.4496967537525052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486970 + }, + { + "epoch": 2.361764994731884, + "grad_norm": 1.1939519595216552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486980 + }, + { + "epoch": 2.36181349292472, + "grad_norm": 1.4100945122663688e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 486990 + }, + { + "epoch": 2.361861991117556, + "grad_norm": 1.543620840038784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487000 + }, + { + "epoch": 2.361910489310392, + "grad_norm": 1.446730379939254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487010 + }, + { + "epoch": 2.361958987503228, + "grad_norm": 1.499486899092517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487020 + }, + { + "epoch": 2.362007485696064, + "grad_norm": 1.1568118907234748e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487030 + }, + { + "epoch": 2.3620559838889004, + "grad_norm": 1.4401169323718932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487040 + }, + { + "epoch": 2.3621044820817363, + "grad_norm": 1.370760998042897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487050 + }, + { + "epoch": 2.3621529802745727, + "grad_norm": 1.345167959243554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487060 + }, + { + "epoch": 2.3622014784674086, + "grad_norm": 1.3021215750086412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487070 + }, + { + "epoch": 2.3622499766602445, + "grad_norm": 1.156304065830227e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487080 + }, + { + "epoch": 2.362298474853081, + "grad_norm": 1.3699754219942406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487090 + }, + { + "epoch": 2.362346973045917, + "grad_norm": 1.3357785633161257e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487100 + }, + { + "epoch": 2.362395471238753, + "grad_norm": 1.283976871491177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487110 + }, + { + "epoch": 2.362443969431589, + "grad_norm": 1.417920003632389e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487120 + }, + { + "epoch": 2.362492467624425, + "grad_norm": 1.1999686932995246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487130 + }, + { + "epoch": 2.3625409658172614, + "grad_norm": 1.3815889587931451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487140 + }, + { + "epoch": 2.3625894640100973, + "grad_norm": 1.3222856409811357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487150 + }, + { + "epoch": 2.3626379622029337, + "grad_norm": 2.112974755164032e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487160 + }, + { + "epoch": 2.3626864603957696, + "grad_norm": 1.2457242348773434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487170 + }, + { + "epoch": 2.3627349585886055, + "grad_norm": 1.0884506451702691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487180 + }, + { + "epoch": 2.362783456781442, + "grad_norm": 1.183190363462927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487190 + }, + { + "epoch": 2.362831954974278, + "grad_norm": 1.2859223375016882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487200 + }, + { + "epoch": 2.3628804531671137, + "grad_norm": 1.2908080293527746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487210 + }, + { + "epoch": 2.36292895135995, + "grad_norm": 1.2637438828733139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487220 + }, + { + "epoch": 2.362977449552786, + "grad_norm": 1.0296004404608539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487230 + }, + { + "epoch": 2.363025947745622, + "grad_norm": 1.2219520328926592e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487240 + }, + { + "epoch": 2.3630744459384583, + "grad_norm": 1.3266482312701555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487250 + }, + { + "epoch": 2.363122944131294, + "grad_norm": 1.1736999994127473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487260 + }, + { + "epoch": 2.3631714423241306, + "grad_norm": 1.2072712252120255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487270 + }, + { + "epoch": 2.3632199405169665, + "grad_norm": 9.270078749068489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487280 + }, + { + "epoch": 2.3632684387098024, + "grad_norm": 1.1174829950277854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487290 + }, + { + "epoch": 2.3633169369026388, + "grad_norm": 1.2211847888465854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487300 + }, + { + "epoch": 2.3633654350954747, + "grad_norm": 1.1723096093874119e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487310 + }, + { + "epoch": 2.363413933288311, + "grad_norm": 1.1537183297605225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487320 + }, + { + "epoch": 2.363462431481147, + "grad_norm": 9.837344094876244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487330 + }, + { + "epoch": 2.363510929673983, + "grad_norm": 1.1234813968030721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487340 + }, + { + "epoch": 2.3635594278668193, + "grad_norm": 1.1198390126310187e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487350 + }, + { + "epoch": 2.363607926059655, + "grad_norm": 1.1183195880448693e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487360 + }, + { + "epoch": 2.363656424252491, + "grad_norm": 1.1211557904289293e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487370 + }, + { + "epoch": 2.3637049224453275, + "grad_norm": 8.693233866097216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487380 + }, + { + "epoch": 2.3637534206381634, + "grad_norm": 1.0739380229551898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487390 + }, + { + "epoch": 2.3638019188309993, + "grad_norm": 1.1372421937494437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487400 + }, + { + "epoch": 2.3638504170238357, + "grad_norm": 1.2894633982796222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487410 + }, + { + "epoch": 2.3638989152166716, + "grad_norm": 1.0525104698899668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487420 + }, + { + "epoch": 2.363947413409508, + "grad_norm": 8.910193116662413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487430 + }, + { + "epoch": 2.363995911602344, + "grad_norm": 1.0116623627709487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487440 + }, + { + "epoch": 2.36404440979518, + "grad_norm": 1.1460667082019427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487450 + }, + { + "epoch": 2.364092907988016, + "grad_norm": 1.192746736933259e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487460 + }, + { + "epoch": 2.364141406180852, + "grad_norm": 9.79585905724889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487470 + }, + { + "epoch": 2.3641899043736885, + "grad_norm": 8.164838760649218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487480 + }, + { + "epoch": 2.3642384025665244, + "grad_norm": 1.0787751136831503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487490 + }, + { + "epoch": 2.3642869007593603, + "grad_norm": 1.0245907589023773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487500 + }, + { + "epoch": 2.3643353989521967, + "grad_norm": 9.191676042519248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487510 + }, + { + "epoch": 2.3643838971450326, + "grad_norm": 9.389218291744328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487520 + }, + { + "epoch": 2.3644323953378685, + "grad_norm": 7.930059098271158e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487530 + }, + { + "epoch": 2.364480893530705, + "grad_norm": 1.0015970275389918e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487540 + }, + { + "epoch": 2.364529391723541, + "grad_norm": 1.0441725351029163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487550 + }, + { + "epoch": 2.364577889916377, + "grad_norm": 9.57846637561488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487560 + }, + { + "epoch": 2.364626388109213, + "grad_norm": 9.986094084979413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487570 + }, + { + "epoch": 2.364674886302049, + "grad_norm": 7.420470637953258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487580 + }, + { + "epoch": 2.3647233844948854, + "grad_norm": 9.704494630113913e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487590 + }, + { + "epoch": 2.3647718826877213, + "grad_norm": 1.0041063802646022e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487600 + }, + { + "epoch": 2.3648203808805572, + "grad_norm": 9.220261176778877e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487610 + }, + { + "epoch": 2.3648688790733936, + "grad_norm": 9.458941718776259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487620 + }, + { + "epoch": 2.3649173772662295, + "grad_norm": 7.87987559647263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487630 + }, + { + "epoch": 2.364965875459066, + "grad_norm": 9.636615772024015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487640 + }, + { + "epoch": 2.365014373651902, + "grad_norm": 8.878257773403675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487650 + }, + { + "epoch": 2.3650628718447377, + "grad_norm": 8.576846255436976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487660 + }, + { + "epoch": 2.365111370037574, + "grad_norm": 9.707613202181165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487670 + }, + { + "epoch": 2.36515986823041, + "grad_norm": 9.463689565336608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487680 + }, + { + "epoch": 2.3652083664232464, + "grad_norm": 9.5555172663353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487690 + }, + { + "epoch": 2.3652568646160823, + "grad_norm": 8.236218462798206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487700 + }, + { + "epoch": 2.3653053628089182, + "grad_norm": 8.323078048988464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487710 + }, + { + "epoch": 2.3653538610017546, + "grad_norm": 8.527720041229259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487720 + }, + { + "epoch": 2.3654023591945905, + "grad_norm": 8.371718962507657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487730 + }, + { + "epoch": 2.3654508573874264, + "grad_norm": 8.445684329672076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487740 + }, + { + "epoch": 2.365499355580263, + "grad_norm": 8.932124728744384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487750 + }, + { + "epoch": 2.3655478537730987, + "grad_norm": 9.380256216218186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487760 + }, + { + "epoch": 2.3655963519659347, + "grad_norm": 8.252341388015338e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487770 + }, + { + "epoch": 2.365644850158771, + "grad_norm": 6.85218850549063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487780 + }, + { + "epoch": 2.365693348351607, + "grad_norm": 8.368331094743553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487790 + }, + { + "epoch": 2.3657418465444433, + "grad_norm": 8.087577896276343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487800 + }, + { + "epoch": 2.3657903447372792, + "grad_norm": 7.74949455717433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487810 + }, + { + "epoch": 2.365838842930115, + "grad_norm": 7.721732941945447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487820 + }, + { + "epoch": 2.3658873411229515, + "grad_norm": 6.691874432362965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487830 + }, + { + "epoch": 2.3659358393157874, + "grad_norm": 8.126325212742813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487840 + }, + { + "epoch": 2.365984337508624, + "grad_norm": 7.439016513899332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487850 + }, + { + "epoch": 2.3660328357014597, + "grad_norm": 8.409615048776686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487860 + }, + { + "epoch": 2.3660813338942956, + "grad_norm": 7.641551746928599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487870 + }, + { + "epoch": 2.366129832087132, + "grad_norm": 7.070071461612315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487880 + }, + { + "epoch": 2.366178330279968, + "grad_norm": 8.450022903616627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487890 + }, + { + "epoch": 2.366226828472804, + "grad_norm": 7.575125238190594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487900 + }, + { + "epoch": 2.36627532666564, + "grad_norm": 8.179321753232216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487910 + }, + { + "epoch": 2.366323824858476, + "grad_norm": 7.984864680565806e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487920 + }, + { + "epoch": 2.366372323051312, + "grad_norm": 6.54991580972819e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487930 + }, + { + "epoch": 2.3664208212441484, + "grad_norm": 7.847759775359009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487940 + }, + { + "epoch": 2.3664693194369844, + "grad_norm": 7.539261304145839e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487950 + }, + { + "epoch": 2.3665178176298207, + "grad_norm": 7.570555027314185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487960 + }, + { + "epoch": 2.3665663158226566, + "grad_norm": 7.279265901161125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487970 + }, + { + "epoch": 2.3666148140154926, + "grad_norm": 5.8376727452014165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487980 + }, + { + "epoch": 2.366663312208329, + "grad_norm": 7.28416011952504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 487990 + }, + { + "epoch": 2.366711810401165, + "grad_norm": 7.113425937177453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488000 + }, + { + "epoch": 2.366760308594001, + "grad_norm": 6.915026062870311e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488010 + }, + { + "epoch": 2.366808806786837, + "grad_norm": 7.831621928744426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488020 + }, + { + "epoch": 2.366857304979673, + "grad_norm": 6.501444005380108e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488030 + }, + { + "epoch": 2.3669058031725094, + "grad_norm": 7.552495873142107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488040 + }, + { + "epoch": 2.3669543013653453, + "grad_norm": 8.40809448732216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488050 + }, + { + "epoch": 2.3670027995581813, + "grad_norm": 8.391782557737315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488060 + }, + { + "epoch": 2.3670512977510176, + "grad_norm": 7.360778653264788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488070 + }, + { + "epoch": 2.3670997959438536, + "grad_norm": 5.893098631304383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488080 + }, + { + "epoch": 2.36714829413669, + "grad_norm": 6.93868926759933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488090 + }, + { + "epoch": 2.367196792329526, + "grad_norm": 7.71895400930589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488100 + }, + { + "epoch": 2.3672452905223618, + "grad_norm": 7.218700659450406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488110 + }, + { + "epoch": 2.367293788715198, + "grad_norm": 6.759438520020922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488120 + }, + { + "epoch": 2.367342286908034, + "grad_norm": 5.682815285013021e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488130 + }, + { + "epoch": 2.36739078510087, + "grad_norm": 6.674130048622828e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488140 + }, + { + "epoch": 2.3674392832937063, + "grad_norm": 7.816026936779963e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488150 + }, + { + "epoch": 2.3674877814865423, + "grad_norm": 6.662995843953468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488160 + }, + { + "epoch": 2.3675362796793786, + "grad_norm": 6.914887507036838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488170 + }, + { + "epoch": 2.3675847778722146, + "grad_norm": 5.6626518585289887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488180 + }, + { + "epoch": 2.3676332760650505, + "grad_norm": 6.934349272569307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488190 + }, + { + "epoch": 2.367681774257887, + "grad_norm": 6.54911644915046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488200 + }, + { + "epoch": 2.3677302724507228, + "grad_norm": 7.613295593955627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488210 + }, + { + "epoch": 2.367778770643559, + "grad_norm": 4.476404171782633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488220 + }, + { + "epoch": 2.367827268836395, + "grad_norm": 6.042359501634564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488230 + }, + { + "epoch": 2.367875767029231, + "grad_norm": 6.493227999726514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488240 + }, + { + "epoch": 2.3679242652220673, + "grad_norm": 6.864495816216731e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488250 + }, + { + "epoch": 2.3679727634149033, + "grad_norm": 6.771262661686706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488260 + }, + { + "epoch": 2.368021261607739, + "grad_norm": 6.434952126710414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488270 + }, + { + "epoch": 2.3680697598005755, + "grad_norm": 1.3217903926943109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488280 + }, + { + "epoch": 2.3681182579934115, + "grad_norm": 6.661812790298427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488290 + }, + { + "epoch": 2.3681667561862474, + "grad_norm": 6.36070254245169e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488300 + }, + { + "epoch": 2.3682152543790838, + "grad_norm": 7.125601086954703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488310 + }, + { + "epoch": 2.3682637525719197, + "grad_norm": 6.982590150528267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488320 + }, + { + "epoch": 2.368312250764756, + "grad_norm": 5.574050021550647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488330 + }, + { + "epoch": 2.368360748957592, + "grad_norm": 6.348930980948353e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488340 + }, + { + "epoch": 2.368409247150428, + "grad_norm": 6.650924433415639e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488350 + }, + { + "epoch": 2.3684577453432643, + "grad_norm": 6.982543965250443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488360 + }, + { + "epoch": 2.3685062435361, + "grad_norm": 6.25893505912245e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488370 + }, + { + "epoch": 2.3685547417289365, + "grad_norm": 5.410075232248346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488380 + }, + { + "epoch": 2.3686032399217725, + "grad_norm": 7.364900511674932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488390 + }, + { + "epoch": 2.3686517381146084, + "grad_norm": 6.439872635155552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488400 + }, + { + "epoch": 2.3687002363074448, + "grad_norm": 6.027775611983088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488410 + }, + { + "epoch": 2.3687487345002807, + "grad_norm": 6.360034632280076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488420 + }, + { + "epoch": 2.3687972326931166, + "grad_norm": 5.193093954858341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488430 + }, + { + "epoch": 2.368845730885953, + "grad_norm": 6.025911147844454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488440 + }, + { + "epoch": 2.368894229078789, + "grad_norm": 7.05491771668676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488450 + }, + { + "epoch": 2.368942727271625, + "grad_norm": 7.535656720847328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488460 + }, + { + "epoch": 2.368991225464461, + "grad_norm": 6.270538932540148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488470 + }, + { + "epoch": 2.369039723657297, + "grad_norm": 5.439467898327166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488480 + }, + { + "epoch": 2.3690882218501335, + "grad_norm": 6.488495785106352e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488490 + }, + { + "epoch": 2.3691367200429694, + "grad_norm": 9.51969099105554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488500 + }, + { + "epoch": 2.3691852182358053, + "grad_norm": 6.092493975984326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488510 + }, + { + "epoch": 2.3692337164286417, + "grad_norm": 6.404856378594559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488520 + }, + { + "epoch": 2.3692822146214776, + "grad_norm": 5.213621889765818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488530 + }, + { + "epoch": 2.369330712814314, + "grad_norm": 5.7635617167761666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488540 + }, + { + "epoch": 2.36937921100715, + "grad_norm": 6.292879817237917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488550 + }, + { + "epoch": 2.369427709199986, + "grad_norm": 6.068724900387679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488560 + }, + { + "epoch": 2.369476207392822, + "grad_norm": 5.971633498802476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488570 + }, + { + "epoch": 2.369524705585658, + "grad_norm": 5.588004725609608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488580 + }, + { + "epoch": 2.3695732037784945, + "grad_norm": 6.133616636816441e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488590 + }, + { + "epoch": 2.3696217019713304, + "grad_norm": 6.025410925758479e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488600 + }, + { + "epoch": 2.3696702001641663, + "grad_norm": 5.878853670537865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488610 + }, + { + "epoch": 2.3697186983570027, + "grad_norm": 5.8622440235467366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488620 + }, + { + "epoch": 2.3697671965498386, + "grad_norm": 5.2239904846373975e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488630 + }, + { + "epoch": 2.3698156947426745, + "grad_norm": 5.964924554291429e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488640 + }, + { + "epoch": 2.369864192935511, + "grad_norm": 6.165056021245618e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488650 + }, + { + "epoch": 2.369912691128347, + "grad_norm": 5.874080954981764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488660 + }, + { + "epoch": 2.3699611893211827, + "grad_norm": 5.704610828161094e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488670 + }, + { + "epoch": 2.370009687514019, + "grad_norm": 6.045537759291619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488680 + }, + { + "epoch": 2.370058185706855, + "grad_norm": 5.440584516236413e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488690 + }, + { + "epoch": 2.3701066838996914, + "grad_norm": 5.902262856238849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488700 + }, + { + "epoch": 2.3701551820925273, + "grad_norm": 5.34581410249757e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488710 + }, + { + "epoch": 2.370203680285363, + "grad_norm": 6.02314997877329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488720 + }, + { + "epoch": 2.3702521784781996, + "grad_norm": 5.857645746232265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488730 + }, + { + "epoch": 2.3703006766710355, + "grad_norm": 6.261838336740766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488740 + }, + { + "epoch": 2.370349174863872, + "grad_norm": 6.041526745548254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488750 + }, + { + "epoch": 2.370397673056708, + "grad_norm": 6.286381193376656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488760 + }, + { + "epoch": 2.3704461712495437, + "grad_norm": 5.899546806631406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488770 + }, + { + "epoch": 2.37049466944238, + "grad_norm": 5.531225255595018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488780 + }, + { + "epoch": 2.370543167635216, + "grad_norm": 5.7580177070803984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488790 + }, + { + "epoch": 2.370591665828052, + "grad_norm": 6.13110131553185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488800 + }, + { + "epoch": 2.3706401640208883, + "grad_norm": 5.4579928132625355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488810 + }, + { + "epoch": 2.370688662213724, + "grad_norm": 5.5487252126340536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488820 + }, + { + "epoch": 2.37073716040656, + "grad_norm": 4.9599520934862085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488830 + }, + { + "epoch": 2.3707856585993965, + "grad_norm": 5.7705101141891646e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488840 + }, + { + "epoch": 2.3708341567922324, + "grad_norm": 5.8064170360694334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488850 + }, + { + "epoch": 2.370882654985069, + "grad_norm": 5.582702300443998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488860 + }, + { + "epoch": 2.3709311531779047, + "grad_norm": 5.036292449744906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488870 + }, + { + "epoch": 2.3709796513707406, + "grad_norm": 5.553294712967727e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488880 + }, + { + "epoch": 2.371028149563577, + "grad_norm": 5.381334133858218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488890 + }, + { + "epoch": 2.371076647756413, + "grad_norm": 5.577616235541427e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488900 + }, + { + "epoch": 2.3711251459492493, + "grad_norm": 5.3341146610819123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488910 + }, + { + "epoch": 2.371173644142085, + "grad_norm": 5.5115844332931374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488920 + }, + { + "epoch": 2.371222142334921, + "grad_norm": 4.998192792982081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488930 + }, + { + "epoch": 2.3712706405277575, + "grad_norm": 5.948954040491117e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488940 + }, + { + "epoch": 2.3713191387205934, + "grad_norm": 5.443276762662208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488950 + }, + { + "epoch": 2.3713676369134293, + "grad_norm": 6.21440108261595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488960 + }, + { + "epoch": 2.3714161351062657, + "grad_norm": 5.39632090124087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488970 + }, + { + "epoch": 2.3714646332991016, + "grad_norm": 5.0628521819362504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488980 + }, + { + "epoch": 2.3715131314919375, + "grad_norm": 5.739892117162526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 488990 + }, + { + "epoch": 2.371561629684774, + "grad_norm": 5.690726823104342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489000 + }, + { + "epoch": 2.37161012787761, + "grad_norm": 5.505093980673337e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489010 + }, + { + "epoch": 2.371658626070446, + "grad_norm": 5.404358915939156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489020 + }, + { + "epoch": 2.371707124263282, + "grad_norm": 4.8409816599814803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489030 + }, + { + "epoch": 2.371755622456118, + "grad_norm": 5.4185825604236015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489040 + }, + { + "epoch": 2.3718041206489544, + "grad_norm": 5.194586094603437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489050 + }, + { + "epoch": 2.3718526188417903, + "grad_norm": 5.3279066491995763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489060 + }, + { + "epoch": 2.3719011170346267, + "grad_norm": 5.048364215554102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489070 + }, + { + "epoch": 2.3719496152274626, + "grad_norm": 4.89122200519887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489080 + }, + { + "epoch": 2.3719981134202985, + "grad_norm": 5.1548529000911e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489090 + }, + { + "epoch": 2.372046611613135, + "grad_norm": 4.348567017586902e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489100 + }, + { + "epoch": 2.372095109805971, + "grad_norm": 5.449807005675211e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489110 + }, + { + "epoch": 2.372143607998807, + "grad_norm": 5.310010209313987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489120 + }, + { + "epoch": 2.372192106191643, + "grad_norm": 5.179786910503026e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489130 + }, + { + "epoch": 2.372240604384479, + "grad_norm": 5.174307560196212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489140 + }, + { + "epoch": 2.3722891025773154, + "grad_norm": 8.66258034193379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489150 + }, + { + "epoch": 2.3723376007701513, + "grad_norm": 5.105027867102763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489160 + }, + { + "epoch": 2.3723860989629872, + "grad_norm": 4.9637165489002655e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489170 + }, + { + "epoch": 2.3724345971558236, + "grad_norm": 4.9524910394893595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489180 + }, + { + "epoch": 2.3724830953486595, + "grad_norm": 5.536789871030123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489190 + }, + { + "epoch": 2.3725315935414955, + "grad_norm": 5.108211809101704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489200 + }, + { + "epoch": 2.372580091734332, + "grad_norm": 5.138010550354011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489210 + }, + { + "epoch": 2.3726285899271677, + "grad_norm": 7.6549817151772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489220 + }, + { + "epoch": 2.372677088120004, + "grad_norm": 4.723122515315481e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489230 + }, + { + "epoch": 2.37272558631284, + "grad_norm": 5.536843161735305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489240 + }, + { + "epoch": 2.372774084505676, + "grad_norm": 5.293926008675953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489250 + }, + { + "epoch": 2.3728225826985123, + "grad_norm": 4.9194515128192506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489260 + }, + { + "epoch": 2.3728710808913482, + "grad_norm": 5.155662918809867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489270 + }, + { + "epoch": 2.3729195790841846, + "grad_norm": 4.65858747134007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489280 + }, + { + "epoch": 2.3729680772770205, + "grad_norm": 5.0026493170207686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489290 + }, + { + "epoch": 2.3730165754698564, + "grad_norm": 5.526294089008843e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489300 + }, + { + "epoch": 2.373065073662693, + "grad_norm": 5.0123404093938007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489310 + }, + { + "epoch": 2.3731135718555287, + "grad_norm": 5.040974571102197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489320 + }, + { + "epoch": 2.3731620700483647, + "grad_norm": 5.648445977612937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489330 + }, + { + "epoch": 2.373210568241201, + "grad_norm": 5.3402274602376565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489340 + }, + { + "epoch": 2.373259066434037, + "grad_norm": 5.0780723626075996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489350 + }, + { + "epoch": 2.373307564626873, + "grad_norm": 5.2373273717876145e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489360 + }, + { + "epoch": 2.3733560628197092, + "grad_norm": 4.959989752251204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489370 + }, + { + "epoch": 2.373404561012545, + "grad_norm": 4.76907437985119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489380 + }, + { + "epoch": 2.3734530592053815, + "grad_norm": 5.473289022006611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489390 + }, + { + "epoch": 2.3735015573982174, + "grad_norm": 5.353112797479298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489400 + }, + { + "epoch": 2.3735500555910534, + "grad_norm": 5.081158249709006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489410 + }, + { + "epoch": 2.3735985537838897, + "grad_norm": 5.0930832884432675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489420 + }, + { + "epoch": 2.3736470519767257, + "grad_norm": 4.967594691152044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489430 + }, + { + "epoch": 2.373695550169562, + "grad_norm": 4.840340395162457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489440 + }, + { + "epoch": 2.373744048362398, + "grad_norm": 4.9283666925248326e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489450 + }, + { + "epoch": 2.373792546555234, + "grad_norm": 4.695544220112424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489460 + }, + { + "epoch": 2.3738410447480702, + "grad_norm": 4.640662965016418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489470 + }, + { + "epoch": 2.373889542940906, + "grad_norm": 4.634506822753792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489480 + }, + { + "epoch": 2.373938041133742, + "grad_norm": 4.9023118009472455e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489490 + }, + { + "epoch": 2.3739865393265784, + "grad_norm": 4.8847329736645406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489500 + }, + { + "epoch": 2.3740350375194144, + "grad_norm": 5.832414373685424e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489510 + }, + { + "epoch": 2.3740835357122503, + "grad_norm": 4.7952742221468725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489520 + }, + { + "epoch": 2.3741320339050866, + "grad_norm": 4.499811367963957e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489530 + }, + { + "epoch": 2.3741805320979226, + "grad_norm": 5.012306658613852e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489540 + }, + { + "epoch": 2.374229030290759, + "grad_norm": 5.22475929187749e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489550 + }, + { + "epoch": 2.374277528483595, + "grad_norm": 4.901649219846149e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489560 + }, + { + "epoch": 2.374326026676431, + "grad_norm": 4.8565905075292903e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489570 + }, + { + "epoch": 2.374374524869267, + "grad_norm": 4.537173126095695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489580 + }, + { + "epoch": 2.374423023062103, + "grad_norm": 4.852341817240813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489590 + }, + { + "epoch": 2.3744715212549394, + "grad_norm": 4.7434095762355355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489600 + }, + { + "epoch": 2.3745200194477754, + "grad_norm": 4.0889173646974086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489610 + }, + { + "epoch": 2.3745685176406113, + "grad_norm": 5.049519913313816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489620 + }, + { + "epoch": 2.3746170158334476, + "grad_norm": 4.5012505722752394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489630 + }, + { + "epoch": 2.3746655140262836, + "grad_norm": 4.843588286007616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489640 + }, + { + "epoch": 2.37471401221912, + "grad_norm": 4.7401336189523136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489650 + }, + { + "epoch": 2.374762510411956, + "grad_norm": 4.695687749745048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489660 + }, + { + "epoch": 2.3748110086047918, + "grad_norm": 1.1984754166860512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489670 + }, + { + "epoch": 2.374859506797628, + "grad_norm": 5.355567367359981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489680 + }, + { + "epoch": 2.374908004990464, + "grad_norm": 4.945028564407039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489690 + }, + { + "epoch": 2.3749565031833, + "grad_norm": 5.128860891545628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489700 + }, + { + "epoch": 2.3750050013761363, + "grad_norm": 4.414706822331027e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489710 + }, + { + "epoch": 2.3750534995689723, + "grad_norm": 4.8523929763177875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489720 + }, + { + "epoch": 2.375101997761808, + "grad_norm": 4.1557612462383986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489730 + }, + { + "epoch": 2.3751504959546446, + "grad_norm": 4.750652493612506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489740 + }, + { + "epoch": 2.3751989941474805, + "grad_norm": 4.759926142128279e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489750 + }, + { + "epoch": 2.375247492340317, + "grad_norm": 4.771204942244367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489760 + }, + { + "epoch": 2.3752959905331528, + "grad_norm": 5.060437047177402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489770 + }, + { + "epoch": 2.3753444887259887, + "grad_norm": 4.418048149545939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489780 + }, + { + "epoch": 2.375392986918825, + "grad_norm": 4.568419242900745e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489790 + }, + { + "epoch": 2.375441485111661, + "grad_norm": 4.605099945820257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489800 + }, + { + "epoch": 2.3754899833044973, + "grad_norm": 5.012241999224898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489810 + }, + { + "epoch": 2.3755384814973333, + "grad_norm": 4.715836965374365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489820 + }, + { + "epoch": 2.375586979690169, + "grad_norm": 4.534676278922234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489830 + }, + { + "epoch": 2.3756354778830056, + "grad_norm": 6.562735421766774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489840 + }, + { + "epoch": 2.3756839760758415, + "grad_norm": 4.54831834417746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489850 + }, + { + "epoch": 2.3757324742686774, + "grad_norm": 4.5897735390099115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489860 + }, + { + "epoch": 2.3757809724615138, + "grad_norm": 5.045411555215651e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489870 + }, + { + "epoch": 2.3758294706543497, + "grad_norm": 4.3700794094547746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489880 + }, + { + "epoch": 2.3758779688471856, + "grad_norm": 4.654568641626611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489890 + }, + { + "epoch": 2.375926467040022, + "grad_norm": 4.2391523180640434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489900 + }, + { + "epoch": 2.375974965232858, + "grad_norm": 4.2677690714754135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489910 + }, + { + "epoch": 2.3760234634256943, + "grad_norm": 4.4246185382235126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489920 + }, + { + "epoch": 2.37607196161853, + "grad_norm": 4.365295325214902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489930 + }, + { + "epoch": 2.376120459811366, + "grad_norm": 5.0240611670915314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489940 + }, + { + "epoch": 2.3761689580042025, + "grad_norm": 4.276158449556533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489950 + }, + { + "epoch": 2.3762174561970384, + "grad_norm": 4.5282678939884136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489960 + }, + { + "epoch": 2.3762659543898748, + "grad_norm": 4.516232010587373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489970 + }, + { + "epoch": 2.3763144525827107, + "grad_norm": 4.324108004993832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489980 + }, + { + "epoch": 2.3763629507755466, + "grad_norm": 4.597227132308035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 489990 + }, + { + "epoch": 2.376411448968383, + "grad_norm": 4.489353244707672e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490000 + }, + { + "epoch": 2.376459947161219, + "grad_norm": 4.2725435633883535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490010 + }, + { + "epoch": 2.376508445354055, + "grad_norm": 4.650290463814599e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490020 + }, + { + "epoch": 2.376556943546891, + "grad_norm": 4.2441634207079915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490030 + }, + { + "epoch": 2.376605441739727, + "grad_norm": 4.382665252933293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490040 + }, + { + "epoch": 2.376653939932563, + "grad_norm": 4.7110837897434976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490050 + }, + { + "epoch": 2.3767024381253994, + "grad_norm": 4.400036246465788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490060 + }, + { + "epoch": 2.3767509363182353, + "grad_norm": 4.4176225344472186e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490070 + }, + { + "epoch": 2.3767994345110717, + "grad_norm": 4.26927293517565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490080 + }, + { + "epoch": 2.3768479327039076, + "grad_norm": 4.343901238712533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490090 + }, + { + "epoch": 2.3768964308967435, + "grad_norm": 4.976563516834176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490100 + }, + { + "epoch": 2.37694492908958, + "grad_norm": 4.3491162671216443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490110 + }, + { + "epoch": 2.376993427282416, + "grad_norm": 4.518903651273831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490120 + }, + { + "epoch": 2.377041925475252, + "grad_norm": 4.238443196413755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490130 + }, + { + "epoch": 2.377090423668088, + "grad_norm": 4.8375362382557796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490140 + }, + { + "epoch": 2.377138921860924, + "grad_norm": 5.6749023258362286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490150 + }, + { + "epoch": 2.3771874200537604, + "grad_norm": 4.343880632973196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490160 + }, + { + "epoch": 2.3772359182465963, + "grad_norm": 4.075680948290028e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490170 + }, + { + "epoch": 2.3772844164394327, + "grad_norm": 4.207387505061888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490180 + }, + { + "epoch": 2.3773329146322686, + "grad_norm": 4.4546105470999464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490190 + }, + { + "epoch": 2.3773814128251045, + "grad_norm": 4.131478803515165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490200 + }, + { + "epoch": 2.377429911017941, + "grad_norm": 4.224571625854878e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490210 + }, + { + "epoch": 2.377478409210777, + "grad_norm": 4.168418499261861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490220 + }, + { + "epoch": 2.3775269074036127, + "grad_norm": 4.111074147772342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490230 + }, + { + "epoch": 2.377575405596449, + "grad_norm": 4.2331457450472953e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490240 + }, + { + "epoch": 2.377623903789285, + "grad_norm": 5.019699500508068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490250 + }, + { + "epoch": 2.377672401982121, + "grad_norm": 4.3431057861198497e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490260 + }, + { + "epoch": 2.3777209001749573, + "grad_norm": 4.2127471289177265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490270 + }, + { + "epoch": 2.377769398367793, + "grad_norm": 4.133699960107151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490280 + }, + { + "epoch": 2.3778178965606296, + "grad_norm": 4.2501611119405425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490290 + }, + { + "epoch": 2.3778663947534655, + "grad_norm": 4.285262278358459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490300 + }, + { + "epoch": 2.3779148929463014, + "grad_norm": 4.4876586002828844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490310 + }, + { + "epoch": 2.377963391139138, + "grad_norm": 4.2080674944600105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490320 + }, + { + "epoch": 2.3780118893319737, + "grad_norm": 3.9787806827007444e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490330 + }, + { + "epoch": 2.37806038752481, + "grad_norm": 4.17568344346364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490340 + }, + { + "epoch": 2.378108885717646, + "grad_norm": 3.9781259175697414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490350 + }, + { + "epoch": 2.378157383910482, + "grad_norm": 4.2200483107990294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490360 + }, + { + "epoch": 2.3782058821033183, + "grad_norm": 4.0524142264075635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490370 + }, + { + "epoch": 2.378254380296154, + "grad_norm": 3.89612360152114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490380 + }, + { + "epoch": 2.37830287848899, + "grad_norm": 4.269035258630538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490390 + }, + { + "epoch": 2.3783513766818265, + "grad_norm": 6.487034198698893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490400 + }, + { + "epoch": 2.3783998748746624, + "grad_norm": 4.1105014503273196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490410 + }, + { + "epoch": 2.3784483730674983, + "grad_norm": 4.094757599659715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490420 + }, + { + "epoch": 2.3784968712603347, + "grad_norm": 4.0575468318593266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490430 + }, + { + "epoch": 2.3785453694531706, + "grad_norm": 4.062009750782636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490440 + }, + { + "epoch": 2.378593867646007, + "grad_norm": 3.916692392635923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490450 + }, + { + "epoch": 2.378642365838843, + "grad_norm": 4.115194940368383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490460 + }, + { + "epoch": 2.378690864031679, + "grad_norm": 3.953493887820514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490470 + }, + { + "epoch": 2.378739362224515, + "grad_norm": 4.111789664307253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490480 + }, + { + "epoch": 2.378787860417351, + "grad_norm": 8.611161206317774e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490490 + }, + { + "epoch": 2.3788363586101875, + "grad_norm": 4.142340159773994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490500 + }, + { + "epoch": 2.3788848568030234, + "grad_norm": 4.215262805473685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490510 + }, + { + "epoch": 2.3789333549958593, + "grad_norm": 4.8458034029863484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490520 + }, + { + "epoch": 2.3789818531886957, + "grad_norm": 3.9565399845287175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490530 + }, + { + "epoch": 2.3790303513815316, + "grad_norm": 4.1091066549370225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490540 + }, + { + "epoch": 2.3790788495743675, + "grad_norm": 4.725255919879601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490550 + }, + { + "epoch": 2.379127347767204, + "grad_norm": 4.163461397865831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490560 + }, + { + "epoch": 2.37917584596004, + "grad_norm": 3.777678614369506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490570 + }, + { + "epoch": 2.3792243441528758, + "grad_norm": 4.011972620787674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490580 + }, + { + "epoch": 2.379272842345712, + "grad_norm": 6.090370874289874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490590 + }, + { + "epoch": 2.379321340538548, + "grad_norm": 4.0624783537168696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490600 + }, + { + "epoch": 2.3793698387313844, + "grad_norm": 3.853544328080716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490610 + }, + { + "epoch": 2.3794183369242203, + "grad_norm": 4.108803963731589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490620 + }, + { + "epoch": 2.3794668351170563, + "grad_norm": 3.792465363972042e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490630 + }, + { + "epoch": 2.3795153333098926, + "grad_norm": 3.995739206175131e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490640 + }, + { + "epoch": 2.3795638315027285, + "grad_norm": 3.88634830983392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490650 + }, + { + "epoch": 2.379612329695565, + "grad_norm": 4.029322653309464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490660 + }, + { + "epoch": 2.379660827888401, + "grad_norm": 4.056726154999524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490670 + }, + { + "epoch": 2.3797093260812368, + "grad_norm": 3.9025639608780693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490680 + }, + { + "epoch": 2.379757824274073, + "grad_norm": 3.7954425380348766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490690 + }, + { + "epoch": 2.379806322466909, + "grad_norm": 4.207766579611416e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490700 + }, + { + "epoch": 2.3798548206597454, + "grad_norm": 3.9517296102076216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490710 + }, + { + "epoch": 2.3799033188525813, + "grad_norm": 3.9197033174787066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490720 + }, + { + "epoch": 2.3799518170454173, + "grad_norm": 4.134897579888275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490730 + }, + { + "epoch": 2.3800003152382536, + "grad_norm": 4.0508915333248297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490740 + }, + { + "epoch": 2.3800488134310895, + "grad_norm": 3.864454711788312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490750 + }, + { + "epoch": 2.3800973116239255, + "grad_norm": 3.840904838625647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490760 + }, + { + "epoch": 2.380145809816762, + "grad_norm": 3.942634663189892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490770 + }, + { + "epoch": 2.3801943080095977, + "grad_norm": 3.9724326938994636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490780 + }, + { + "epoch": 2.3802428062024337, + "grad_norm": 3.871026876822725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490790 + }, + { + "epoch": 2.38029130439527, + "grad_norm": 4.278070875329831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490800 + }, + { + "epoch": 2.380339802588106, + "grad_norm": 3.864652953211589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490810 + }, + { + "epoch": 2.3803883007809423, + "grad_norm": 3.7773105532323825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490820 + }, + { + "epoch": 2.3804367989737782, + "grad_norm": 3.5584843516289766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490830 + }, + { + "epoch": 2.380485297166614, + "grad_norm": 3.738339771075516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490840 + }, + { + "epoch": 2.3805337953594505, + "grad_norm": 3.726635355860708e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490850 + }, + { + "epoch": 2.3805822935522865, + "grad_norm": 3.5967843814432854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490860 + }, + { + "epoch": 2.380630791745123, + "grad_norm": 4.144987286736068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490870 + }, + { + "epoch": 2.3806792899379587, + "grad_norm": 4.0531165979018624e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490880 + }, + { + "epoch": 2.3807277881307947, + "grad_norm": 3.742117371530185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490890 + }, + { + "epoch": 2.380776286323631, + "grad_norm": 3.7013748510617006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490900 + }, + { + "epoch": 2.380824784516467, + "grad_norm": 4.293431032920125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490910 + }, + { + "epoch": 2.380873282709303, + "grad_norm": 3.789693181488474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490920 + }, + { + "epoch": 2.3809217809021392, + "grad_norm": 3.879892318536804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490930 + }, + { + "epoch": 2.380970279094975, + "grad_norm": 3.809524429243538e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490940 + }, + { + "epoch": 2.381018777287811, + "grad_norm": 4.270530240546577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490950 + }, + { + "epoch": 2.3810672754806474, + "grad_norm": 4.200229852813209e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490960 + }, + { + "epoch": 2.3811157736734834, + "grad_norm": 6.566926913365023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490970 + }, + { + "epoch": 2.3811642718663197, + "grad_norm": 3.953887883767493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490980 + }, + { + "epoch": 2.3812127700591557, + "grad_norm": 3.679888749275051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 490990 + }, + { + "epoch": 2.3812612682519916, + "grad_norm": 3.751374677563035e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491000 + }, + { + "epoch": 2.381309766444828, + "grad_norm": 3.916887436616889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491010 + }, + { + "epoch": 2.381358264637664, + "grad_norm": 3.633400069702475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491020 + }, + { + "epoch": 2.3814067628305002, + "grad_norm": 3.486246669126558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491030 + }, + { + "epoch": 2.381455261023336, + "grad_norm": 3.426639239023643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491040 + }, + { + "epoch": 2.381503759216172, + "grad_norm": 3.880501608932718e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491050 + }, + { + "epoch": 2.3815522574090084, + "grad_norm": 3.903684486772363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491060 + }, + { + "epoch": 2.3816007556018444, + "grad_norm": 3.765635980812476e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491070 + }, + { + "epoch": 2.3816492537946803, + "grad_norm": 3.656981562016881e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491080 + }, + { + "epoch": 2.3816977519875167, + "grad_norm": 3.570033868527389e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491090 + }, + { + "epoch": 2.3817462501803526, + "grad_norm": 3.73080446536278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491100 + }, + { + "epoch": 2.3817947483731885, + "grad_norm": 3.84528000552109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491110 + }, + { + "epoch": 2.381843246566025, + "grad_norm": 3.933549308499096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491120 + }, + { + "epoch": 2.381891744758861, + "grad_norm": 3.6520187762789647e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491130 + }, + { + "epoch": 2.381940242951697, + "grad_norm": 3.675843629480369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491140 + }, + { + "epoch": 2.381988741144533, + "grad_norm": 3.591626551724403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491150 + }, + { + "epoch": 2.382037239337369, + "grad_norm": 3.713060436894011e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491160 + }, + { + "epoch": 2.3820857375302054, + "grad_norm": 3.698553285857997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491170 + }, + { + "epoch": 2.3821342357230413, + "grad_norm": 3.4427777961809625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491180 + }, + { + "epoch": 2.3821827339158776, + "grad_norm": 9.943387624389288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491190 + }, + { + "epoch": 2.3822312321087136, + "grad_norm": 3.523842195818361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491200 + }, + { + "epoch": 2.3822797303015495, + "grad_norm": 3.9477139779364734e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491210 + }, + { + "epoch": 2.382328228494386, + "grad_norm": 3.9093368542353346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491220 + }, + { + "epoch": 2.382376726687222, + "grad_norm": 4.003856801659822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491230 + }, + { + "epoch": 2.382425224880058, + "grad_norm": 3.949352489485136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491240 + }, + { + "epoch": 2.382473723072894, + "grad_norm": 6.143878295006289e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491250 + }, + { + "epoch": 2.38252222126573, + "grad_norm": 3.579077656468144e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491260 + }, + { + "epoch": 2.3825707194585664, + "grad_norm": 3.731758724256906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491270 + }, + { + "epoch": 2.3826192176514023, + "grad_norm": 3.7887733839170323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491280 + }, + { + "epoch": 2.382667715844238, + "grad_norm": 3.511538082534571e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491290 + }, + { + "epoch": 2.3827162140370746, + "grad_norm": 6.052412260260098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491300 + }, + { + "epoch": 2.3827647122299105, + "grad_norm": 3.512585777798449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491310 + }, + { + "epoch": 2.3828132104227464, + "grad_norm": 3.53294389299208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491320 + }, + { + "epoch": 2.3828617086155828, + "grad_norm": 4.046441759442132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491330 + }, + { + "epoch": 2.3829102068084187, + "grad_norm": 3.637205736595206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491340 + }, + { + "epoch": 2.382958705001255, + "grad_norm": 3.482037769231283e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491350 + }, + { + "epoch": 2.383007203194091, + "grad_norm": 3.427652828236205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491360 + }, + { + "epoch": 2.383055701386927, + "grad_norm": 3.5361491512730936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491370 + }, + { + "epoch": 2.3831041995797633, + "grad_norm": 3.293872552490029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491380 + }, + { + "epoch": 2.383152697772599, + "grad_norm": 3.348332811015098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491390 + }, + { + "epoch": 2.3832011959654356, + "grad_norm": 3.787470248539648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491400 + }, + { + "epoch": 2.3832496941582715, + "grad_norm": 4.5215770683171286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491410 + }, + { + "epoch": 2.3832981923511074, + "grad_norm": 3.33314922329464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491420 + }, + { + "epoch": 2.3833466905439438, + "grad_norm": 3.445127916279489e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491430 + }, + { + "epoch": 2.3833951887367797, + "grad_norm": 3.649216040457759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491440 + }, + { + "epoch": 2.3834436869296156, + "grad_norm": 5.738587205428303e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491450 + }, + { + "epoch": 2.383492185122452, + "grad_norm": 3.251624747235837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491460 + }, + { + "epoch": 2.383540683315288, + "grad_norm": 3.574815465867687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491470 + }, + { + "epoch": 2.383589181508124, + "grad_norm": 3.5768241701816805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491480 + }, + { + "epoch": 2.38363767970096, + "grad_norm": 1.1074762795715287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491490 + }, + { + "epoch": 2.383686177893796, + "grad_norm": 3.757686428684792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491500 + }, + { + "epoch": 2.3837346760866325, + "grad_norm": 3.464788633777971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491510 + }, + { + "epoch": 2.3837831742794684, + "grad_norm": 3.446503171744553e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491520 + }, + { + "epoch": 2.3838316724723043, + "grad_norm": 4.0263000045115405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491530 + }, + { + "epoch": 2.3838801706651407, + "grad_norm": 3.551896554654377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491540 + }, + { + "epoch": 2.3839286688579766, + "grad_norm": 3.3398958265706824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491550 + }, + { + "epoch": 2.383977167050813, + "grad_norm": 3.2527172066920684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491560 + }, + { + "epoch": 2.384025665243649, + "grad_norm": 4.011377541246475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491570 + }, + { + "epoch": 2.384074163436485, + "grad_norm": 3.499942025086966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491580 + }, + { + "epoch": 2.384122661629321, + "grad_norm": 3.2670111949073544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491590 + }, + { + "epoch": 2.384171159822157, + "grad_norm": 3.437078532897431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491600 + }, + { + "epoch": 2.384219658014993, + "grad_norm": 3.56258453848568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491610 + }, + { + "epoch": 2.3842681562078294, + "grad_norm": 3.507378920630799e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491620 + }, + { + "epoch": 2.3843166544006653, + "grad_norm": 3.812724713725402e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491630 + }, + { + "epoch": 2.3843651525935012, + "grad_norm": 3.149299843130393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491640 + }, + { + "epoch": 2.3844136507863376, + "grad_norm": 3.4075377186582045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491650 + }, + { + "epoch": 2.3844621489791735, + "grad_norm": 3.497559220022595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491660 + }, + { + "epoch": 2.38451064717201, + "grad_norm": 2.466267233103281e-06, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 491670 + }, + { + "epoch": 2.384559145364846, + "grad_norm": 0.0004766569472849369, + "learning_rate": 0.0002, + "loss": 0.0022, + "step": 491680 + }, + { + "epoch": 2.3846076435576817, + "grad_norm": 0.000877421407494694, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 491690 + }, + { + "epoch": 2.384656141750518, + "grad_norm": 0.0001585394493304193, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491700 + }, + { + "epoch": 2.384704639943354, + "grad_norm": 2.4323075194843113e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491710 + }, + { + "epoch": 2.3847531381361904, + "grad_norm": 1.464800789108267e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491720 + }, + { + "epoch": 2.3848016363290263, + "grad_norm": 8.579561108490452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491730 + }, + { + "epoch": 2.3848501345218622, + "grad_norm": 9.484196198172867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491740 + }, + { + "epoch": 2.3848986327146986, + "grad_norm": 8.717665878066327e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491750 + }, + { + "epoch": 2.3849471309075345, + "grad_norm": 9.156221494777128e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491760 + }, + { + "epoch": 2.384995629100371, + "grad_norm": 8.24451035441598e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491770 + }, + { + "epoch": 2.385044127293207, + "grad_norm": 1.6344500181730837e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491780 + }, + { + "epoch": 2.3850926254860427, + "grad_norm": 7.265618478413671e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491790 + }, + { + "epoch": 2.385141123678879, + "grad_norm": 6.760067208233522e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491800 + }, + { + "epoch": 2.385189621871715, + "grad_norm": 7.040555829007644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491810 + }, + { + "epoch": 2.385238120064551, + "grad_norm": 5.587167379417224e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491820 + }, + { + "epoch": 2.3852866182573873, + "grad_norm": 9.940396921592765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491830 + }, + { + "epoch": 2.3853351164502232, + "grad_norm": 5.424669780040858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491840 + }, + { + "epoch": 2.385383614643059, + "grad_norm": 5.20367439094116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491850 + }, + { + "epoch": 2.3854321128358955, + "grad_norm": 4.825991254620021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491860 + }, + { + "epoch": 2.3854806110287314, + "grad_norm": 4.3435902625788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491870 + }, + { + "epoch": 2.385529109221568, + "grad_norm": 7.954249667818658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491880 + }, + { + "epoch": 2.3855776074144037, + "grad_norm": 4.658228590415092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491890 + }, + { + "epoch": 2.3856261056072396, + "grad_norm": 3.4415106711094268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491900 + }, + { + "epoch": 2.385674603800076, + "grad_norm": 3.3997027912846534e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491910 + }, + { + "epoch": 2.385723101992912, + "grad_norm": 3.05909111375513e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491920 + }, + { + "epoch": 2.3857716001857483, + "grad_norm": 6.126746939116856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491930 + }, + { + "epoch": 2.385820098378584, + "grad_norm": 3.4398597108520335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491940 + }, + { + "epoch": 2.38586859657142, + "grad_norm": 2.8119022772443714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491950 + }, + { + "epoch": 2.3859170947642565, + "grad_norm": 2.7645774025586434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491960 + }, + { + "epoch": 2.3859655929570924, + "grad_norm": 1.0516271686356049e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491970 + }, + { + "epoch": 2.3860140911499284, + "grad_norm": 6.423075774364406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491980 + }, + { + "epoch": 2.3860625893427647, + "grad_norm": 2.727946139202686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 491990 + }, + { + "epoch": 2.3861110875356006, + "grad_norm": 2.556147364884964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492000 + }, + { + "epoch": 2.3861595857284366, + "grad_norm": 2.55754321187851e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492010 + }, + { + "epoch": 2.386208083921273, + "grad_norm": 2.4780995317996712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492020 + }, + { + "epoch": 2.386256582114109, + "grad_norm": 6.767085324099753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492030 + }, + { + "epoch": 2.386305080306945, + "grad_norm": 2.0068277990503702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492040 + }, + { + "epoch": 2.386353578499781, + "grad_norm": 2.9688392260140972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492050 + }, + { + "epoch": 2.386402076692617, + "grad_norm": 1.8408208006803761e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492060 + }, + { + "epoch": 2.3864505748854534, + "grad_norm": 2.121851821357268e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492070 + }, + { + "epoch": 2.3864990730782893, + "grad_norm": 8.334235644724686e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492080 + }, + { + "epoch": 2.3865475712711257, + "grad_norm": 1.9162637272529537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492090 + }, + { + "epoch": 2.3865960694639616, + "grad_norm": 1.891330725811713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492100 + }, + { + "epoch": 2.3866445676567976, + "grad_norm": 1.6657768355798908e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492110 + }, + { + "epoch": 2.386693065849634, + "grad_norm": 1.0294174899172504e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492120 + }, + { + "epoch": 2.38674156404247, + "grad_norm": 3.1356246381619712e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492130 + }, + { + "epoch": 2.3867900622353058, + "grad_norm": 1.7789758430808433e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492140 + }, + { + "epoch": 2.386838560428142, + "grad_norm": 1.5975707583493204e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492150 + }, + { + "epoch": 2.386887058620978, + "grad_norm": 1.7909028429130558e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492160 + }, + { + "epoch": 2.3869355568138144, + "grad_norm": 1.863734723883681e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492170 + }, + { + "epoch": 2.3869840550066503, + "grad_norm": 2.982043952215463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492180 + }, + { + "epoch": 2.3870325531994863, + "grad_norm": 1.5420600902871229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492190 + }, + { + "epoch": 2.3870810513923226, + "grad_norm": 1.5011200957815163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492200 + }, + { + "epoch": 2.3871295495851585, + "grad_norm": 1.31764591060346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492210 + }, + { + "epoch": 2.3871780477779945, + "grad_norm": 1.5109689002201776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492220 + }, + { + "epoch": 2.387226545970831, + "grad_norm": 2.7434318781160982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492230 + }, + { + "epoch": 2.3872750441636668, + "grad_norm": 1.3735241282120114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492240 + }, + { + "epoch": 2.387323542356503, + "grad_norm": 1.3012561339564854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492250 + }, + { + "epoch": 2.387372040549339, + "grad_norm": 4.010132215626072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492260 + }, + { + "epoch": 2.387420538742175, + "grad_norm": 1.4511620065604802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492270 + }, + { + "epoch": 2.3874690369350113, + "grad_norm": 2.51380311055982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492280 + }, + { + "epoch": 2.3875175351278473, + "grad_norm": 1.2584974911078461e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492290 + }, + { + "epoch": 2.3875660333206836, + "grad_norm": 1.2589356401804253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492300 + }, + { + "epoch": 2.3876145315135195, + "grad_norm": 1.1667630133160856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492310 + }, + { + "epoch": 2.3876630297063555, + "grad_norm": 1.0729553423516336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492320 + }, + { + "epoch": 2.387711527899192, + "grad_norm": 2.3067254915076774e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492330 + }, + { + "epoch": 2.3877600260920278, + "grad_norm": 1.0490955446584849e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492340 + }, + { + "epoch": 2.3878085242848637, + "grad_norm": 1.1528295544849243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492350 + }, + { + "epoch": 2.3878570224777, + "grad_norm": 1.1083315030191443e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492360 + }, + { + "epoch": 2.387905520670536, + "grad_norm": 1.0502885743335355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492370 + }, + { + "epoch": 2.387954018863372, + "grad_norm": 1.9509013782226248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492380 + }, + { + "epoch": 2.3880025170562083, + "grad_norm": 1.0272942745359614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492390 + }, + { + "epoch": 2.388051015249044, + "grad_norm": 1.0017121212513302e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492400 + }, + { + "epoch": 2.3880995134418805, + "grad_norm": 9.832593832470593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492410 + }, + { + "epoch": 2.3881480116347165, + "grad_norm": 1.0524429399083601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492420 + }, + { + "epoch": 2.3881965098275524, + "grad_norm": 1.8203871832156437e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492430 + }, + { + "epoch": 2.3882450080203887, + "grad_norm": 9.598867336535477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492440 + }, + { + "epoch": 2.3882935062132247, + "grad_norm": 9.473675390836434e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492450 + }, + { + "epoch": 2.388342004406061, + "grad_norm": 9.237680842488771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492460 + }, + { + "epoch": 2.388390502598897, + "grad_norm": 8.808960387796105e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492470 + }, + { + "epoch": 2.388439000791733, + "grad_norm": 2.2286853891273495e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492480 + }, + { + "epoch": 2.3884874989845692, + "grad_norm": 8.785363547758607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492490 + }, + { + "epoch": 2.388535997177405, + "grad_norm": 8.42865347294719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492500 + }, + { + "epoch": 2.388584495370241, + "grad_norm": 8.433726179646328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492510 + }, + { + "epoch": 2.3886329935630775, + "grad_norm": 1.8632644014360267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492520 + }, + { + "epoch": 2.3886814917559134, + "grad_norm": 1.6992837572615827e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492530 + }, + { + "epoch": 2.3887299899487493, + "grad_norm": 8.367929353880754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492540 + }, + { + "epoch": 2.3887784881415857, + "grad_norm": 8.138830480675097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492550 + }, + { + "epoch": 2.3888269863344216, + "grad_norm": 7.826262731214229e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492560 + }, + { + "epoch": 2.388875484527258, + "grad_norm": 8.117223728731915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492570 + }, + { + "epoch": 2.388923982720094, + "grad_norm": 1.5193742228802876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492580 + }, + { + "epoch": 2.38897248091293, + "grad_norm": 5.939005404798081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492590 + }, + { + "epoch": 2.389020979105766, + "grad_norm": 8.035071914491709e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492600 + }, + { + "epoch": 2.389069477298602, + "grad_norm": 7.090347935445607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492610 + }, + { + "epoch": 2.3891179754914385, + "grad_norm": 7.45777128940972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492620 + }, + { + "epoch": 2.3891664736842744, + "grad_norm": 1.4284620419857674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492630 + }, + { + "epoch": 2.3892149718771103, + "grad_norm": 7.008493980720232e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492640 + }, + { + "epoch": 2.3892634700699467, + "grad_norm": 8.632210324321932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492650 + }, + { + "epoch": 2.3893119682627826, + "grad_norm": 7.883516559559212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492660 + }, + { + "epoch": 2.3893604664556185, + "grad_norm": 6.892857982165879e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492670 + }, + { + "epoch": 2.389408964648455, + "grad_norm": 1.4524059679388301e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492680 + }, + { + "epoch": 2.389457462841291, + "grad_norm": 6.823360081398278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492690 + }, + { + "epoch": 2.389505961034127, + "grad_norm": 6.81654739764781e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492700 + }, + { + "epoch": 2.389554459226963, + "grad_norm": 6.954479658816126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492710 + }, + { + "epoch": 2.389602957419799, + "grad_norm": 6.880184741930861e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492720 + }, + { + "epoch": 2.3896514556126354, + "grad_norm": 1.2880670965387253e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492730 + }, + { + "epoch": 2.3896999538054713, + "grad_norm": 7.137851412153395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492740 + }, + { + "epoch": 2.389748451998307, + "grad_norm": 7.828563184375525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492750 + }, + { + "epoch": 2.3897969501911436, + "grad_norm": 7.604936058669409e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492760 + }, + { + "epoch": 2.3898454483839795, + "grad_norm": 7.091989573382307e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492770 + }, + { + "epoch": 2.389893946576816, + "grad_norm": 1.2547308188004536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492780 + }, + { + "epoch": 2.389942444769652, + "grad_norm": 6.702563268845552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492790 + }, + { + "epoch": 2.3899909429624877, + "grad_norm": 6.288645408858429e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492800 + }, + { + "epoch": 2.390039441155324, + "grad_norm": 8.639104862595559e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492810 + }, + { + "epoch": 2.39008793934816, + "grad_norm": 6.075141527617234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492820 + }, + { + "epoch": 2.3901364375409964, + "grad_norm": 1.1994023907391238e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492830 + }, + { + "epoch": 2.3901849357338323, + "grad_norm": 6.973328368076182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492840 + }, + { + "epoch": 2.390233433926668, + "grad_norm": 5.811641585751204e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492850 + }, + { + "epoch": 2.3902819321195046, + "grad_norm": 5.962882596577401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492860 + }, + { + "epoch": 2.3903304303123405, + "grad_norm": 6.567793207068462e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492870 + }, + { + "epoch": 2.3903789285051764, + "grad_norm": 1.13187559236394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492880 + }, + { + "epoch": 2.390427426698013, + "grad_norm": 7.232612233565305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492890 + }, + { + "epoch": 2.3904759248908487, + "grad_norm": 5.61377817120956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492900 + }, + { + "epoch": 2.3905244230836846, + "grad_norm": 5.871258963452419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492910 + }, + { + "epoch": 2.390572921276521, + "grad_norm": 7.485787136829458e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492920 + }, + { + "epoch": 2.390621419469357, + "grad_norm": 9.82053961706697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492930 + }, + { + "epoch": 2.3906699176621933, + "grad_norm": 5.811979235659237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492940 + }, + { + "epoch": 2.390718415855029, + "grad_norm": 5.840876156071317e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492950 + }, + { + "epoch": 2.390766914047865, + "grad_norm": 5.899911457163398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492960 + }, + { + "epoch": 2.3908154122407015, + "grad_norm": 5.738029358326457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492970 + }, + { + "epoch": 2.3908639104335374, + "grad_norm": 9.918554724208661e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492980 + }, + { + "epoch": 2.3909124086263738, + "grad_norm": 5.40371900115133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 492990 + }, + { + "epoch": 2.3909609068192097, + "grad_norm": 1.8735212279352709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493000 + }, + { + "epoch": 2.3910094050120456, + "grad_norm": 5.269550911179977e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493010 + }, + { + "epoch": 2.391057903204882, + "grad_norm": 5.835768206452485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493020 + }, + { + "epoch": 2.391106401397718, + "grad_norm": 9.81746893558011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493030 + }, + { + "epoch": 2.391154899590554, + "grad_norm": 5.501408963937138e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493040 + }, + { + "epoch": 2.39120339778339, + "grad_norm": 5.12122142026783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493050 + }, + { + "epoch": 2.391251895976226, + "grad_norm": 4.920178753309301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493060 + }, + { + "epoch": 2.391300394169062, + "grad_norm": 4.722206199403445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493070 + }, + { + "epoch": 2.3913488923618984, + "grad_norm": 1.0214404255748377e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493080 + }, + { + "epoch": 2.3913973905547343, + "grad_norm": 6.737615763086069e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493090 + }, + { + "epoch": 2.3914458887475707, + "grad_norm": 4.964173854205001e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493100 + }, + { + "epoch": 2.3914943869404066, + "grad_norm": 4.791261858372309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493110 + }, + { + "epoch": 2.3915428851332425, + "grad_norm": 3.391274276509648e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493120 + }, + { + "epoch": 2.391591383326079, + "grad_norm": 8.709740768608754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493130 + }, + { + "epoch": 2.391639881518915, + "grad_norm": 4.6284657173600863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493140 + }, + { + "epoch": 2.391688379711751, + "grad_norm": 4.7235849365279137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493150 + }, + { + "epoch": 2.391736877904587, + "grad_norm": 4.5626026690115395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493160 + }, + { + "epoch": 2.391785376097423, + "grad_norm": 6.75041007980326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493170 + }, + { + "epoch": 2.3918338742902594, + "grad_norm": 8.372833235625876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493180 + }, + { + "epoch": 2.3918823724830953, + "grad_norm": 4.683481336087425e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493190 + }, + { + "epoch": 2.3919308706759317, + "grad_norm": 4.273702813861746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493200 + }, + { + "epoch": 2.3919793688687676, + "grad_norm": 4.821997094950348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493210 + }, + { + "epoch": 2.3920278670616035, + "grad_norm": 4.60889452824631e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493220 + }, + { + "epoch": 2.39207636525444, + "grad_norm": 8.827616397866223e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493230 + }, + { + "epoch": 2.392124863447276, + "grad_norm": 4.734122285299236e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493240 + }, + { + "epoch": 2.3921733616401117, + "grad_norm": 4.5331628939493385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493250 + }, + { + "epoch": 2.392221859832948, + "grad_norm": 4.969298288415303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493260 + }, + { + "epoch": 2.392270358025784, + "grad_norm": 5.16109992076963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493270 + }, + { + "epoch": 2.39231885621862, + "grad_norm": 7.503616075155151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493280 + }, + { + "epoch": 2.3923673544114563, + "grad_norm": 4.9796722123574e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493290 + }, + { + "epoch": 2.3924158526042922, + "grad_norm": 4.2990896531591716e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493300 + }, + { + "epoch": 2.3924643507971286, + "grad_norm": 4.2721615045593353e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493310 + }, + { + "epoch": 2.3925128489899645, + "grad_norm": 7.379006274277344e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493320 + }, + { + "epoch": 2.3925613471828004, + "grad_norm": 1.0840557251867722e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493330 + }, + { + "epoch": 2.392609845375637, + "grad_norm": 4.2258909616066376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493340 + }, + { + "epoch": 2.3926583435684727, + "grad_norm": 4.278001881630189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493350 + }, + { + "epoch": 2.392706841761309, + "grad_norm": 4.316567867590493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493360 + }, + { + "epoch": 2.392755339954145, + "grad_norm": 4.019698565116414e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493370 + }, + { + "epoch": 2.392803838146981, + "grad_norm": 7.678287943235773e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493380 + }, + { + "epoch": 2.3928523363398173, + "grad_norm": 4.0809544543662923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493390 + }, + { + "epoch": 2.3929008345326532, + "grad_norm": 4.351203131136572e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493400 + }, + { + "epoch": 2.392949332725489, + "grad_norm": 4.0550222024648974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493410 + }, + { + "epoch": 2.3929978309183255, + "grad_norm": 4.1527829353071866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493420 + }, + { + "epoch": 2.3930463291111614, + "grad_norm": 7.608838927808392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493430 + }, + { + "epoch": 2.3930948273039974, + "grad_norm": 3.8321863371493237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493440 + }, + { + "epoch": 2.3931433254968337, + "grad_norm": 4.101601120964915e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493450 + }, + { + "epoch": 2.3931918236896697, + "grad_norm": 4.399557838041801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493460 + }, + { + "epoch": 2.393240321882506, + "grad_norm": 3.928504099803831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493470 + }, + { + "epoch": 2.393288820075342, + "grad_norm": 7.219920803436253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493480 + }, + { + "epoch": 2.393337318268178, + "grad_norm": 7.255658829308231e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493490 + }, + { + "epoch": 2.3933858164610142, + "grad_norm": 3.7964582588756457e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493500 + }, + { + "epoch": 2.39343431465385, + "grad_norm": 4.4928160036761255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493510 + }, + { + "epoch": 2.3934828128466865, + "grad_norm": 3.879025598507724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493520 + }, + { + "epoch": 2.3935313110395224, + "grad_norm": 7.030814117570117e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493530 + }, + { + "epoch": 2.3935798092323584, + "grad_norm": 3.824740986146935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493540 + }, + { + "epoch": 2.3936283074251947, + "grad_norm": 3.7897183347013197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493550 + }, + { + "epoch": 2.3936768056180306, + "grad_norm": 3.4914387470053043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493560 + }, + { + "epoch": 2.3937253038108666, + "grad_norm": 4.283461123577581e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493570 + }, + { + "epoch": 2.393773802003703, + "grad_norm": 6.590208272427844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493580 + }, + { + "epoch": 2.393822300196539, + "grad_norm": 3.5945888043897867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493590 + }, + { + "epoch": 2.3938707983893748, + "grad_norm": 3.804219375069806e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493600 + }, + { + "epoch": 2.393919296582211, + "grad_norm": 3.521678308970877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493610 + }, + { + "epoch": 2.393967794775047, + "grad_norm": 3.210827799193794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493620 + }, + { + "epoch": 2.3940162929678834, + "grad_norm": 6.113485255809792e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493630 + }, + { + "epoch": 2.3940647911607194, + "grad_norm": 3.5690217714545724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493640 + }, + { + "epoch": 2.3941132893535553, + "grad_norm": 3.608397207699454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493650 + }, + { + "epoch": 2.3941617875463916, + "grad_norm": 3.301614981410239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493660 + }, + { + "epoch": 2.3942102857392276, + "grad_norm": 3.150477141389274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493670 + }, + { + "epoch": 2.394258783932064, + "grad_norm": 6.09110657023848e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493680 + }, + { + "epoch": 2.3943072821249, + "grad_norm": 3.516673245940183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493690 + }, + { + "epoch": 2.3943557803177358, + "grad_norm": 3.571885827113874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493700 + }, + { + "epoch": 2.394404278510572, + "grad_norm": 3.261407073296141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493710 + }, + { + "epoch": 2.394452776703408, + "grad_norm": 3.568971749245975e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493720 + }, + { + "epoch": 2.3945012748962444, + "grad_norm": 5.869227948096523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493730 + }, + { + "epoch": 2.3945497730890803, + "grad_norm": 3.469357352514635e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493740 + }, + { + "epoch": 2.3945982712819163, + "grad_norm": 3.1342781880994153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493750 + }, + { + "epoch": 2.3946467694747526, + "grad_norm": 3.301179276604671e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493760 + }, + { + "epoch": 2.3946952676675886, + "grad_norm": 3.1759947205500794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493770 + }, + { + "epoch": 2.3947437658604245, + "grad_norm": 5.979822503832111e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493780 + }, + { + "epoch": 2.394792264053261, + "grad_norm": 3.31888713844819e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493790 + }, + { + "epoch": 2.3948407622460968, + "grad_norm": 3.586243337849737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493800 + }, + { + "epoch": 2.3948892604389327, + "grad_norm": 3.478679104773619e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493810 + }, + { + "epoch": 2.394937758631769, + "grad_norm": 3.2093655022435996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493820 + }, + { + "epoch": 2.394986256824605, + "grad_norm": 2.7949822651862632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493830 + }, + { + "epoch": 2.3950347550174413, + "grad_norm": 0.000781382666900754, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493840 + }, + { + "epoch": 2.3950832532102773, + "grad_norm": 1.3114842658978887e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493850 + }, + { + "epoch": 2.395131751403113, + "grad_norm": 4.407832534525369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493860 + }, + { + "epoch": 2.3951802495959496, + "grad_norm": 6.795202352805063e-06, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 493870 + }, + { + "epoch": 2.3952287477887855, + "grad_norm": 4.778414222528227e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 493880 + }, + { + "epoch": 2.395277245981622, + "grad_norm": 4.468808401725255e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 493890 + }, + { + "epoch": 2.3953257441744578, + "grad_norm": 0.0003583035140763968, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 493900 + }, + { + "epoch": 2.3953742423672937, + "grad_norm": 1.1442961294960696e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493910 + }, + { + "epoch": 2.39542274056013, + "grad_norm": 5.303747911966639e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493920 + }, + { + "epoch": 2.395471238752966, + "grad_norm": 5.804857210023329e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493930 + }, + { + "epoch": 2.395519736945802, + "grad_norm": 3.926927547581727e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493940 + }, + { + "epoch": 2.3955682351386383, + "grad_norm": 4.11635073760408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493950 + }, + { + "epoch": 2.395616733331474, + "grad_norm": 5.548760327656055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493960 + }, + { + "epoch": 2.39566523152431, + "grad_norm": 3.5501946058502654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493970 + }, + { + "epoch": 2.3957137297171465, + "grad_norm": 3.9315982576226816e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493980 + }, + { + "epoch": 2.3957622279099824, + "grad_norm": 3.203273081453517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 493990 + }, + { + "epoch": 2.3958107261028188, + "grad_norm": 3.206175961167901e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494000 + }, + { + "epoch": 2.3958592242956547, + "grad_norm": 3.2806713079480687e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494010 + }, + { + "epoch": 2.3959077224884906, + "grad_norm": 2.9412583444354823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494020 + }, + { + "epoch": 2.395956220681327, + "grad_norm": 2.7521984975464875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494030 + }, + { + "epoch": 2.396004718874163, + "grad_norm": 2.527663582441164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494040 + }, + { + "epoch": 2.3960532170669993, + "grad_norm": 2.4429366476397263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494050 + }, + { + "epoch": 2.396101715259835, + "grad_norm": 2.2613983219343936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494060 + }, + { + "epoch": 2.396150213452671, + "grad_norm": 2.2101592094259104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494070 + }, + { + "epoch": 2.3961987116455075, + "grad_norm": 3.11383723783365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494080 + }, + { + "epoch": 2.3962472098383434, + "grad_norm": 2.026841457336559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494090 + }, + { + "epoch": 2.3962957080311793, + "grad_norm": 1.925315018524998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494100 + }, + { + "epoch": 2.3963442062240157, + "grad_norm": 1.8000991985900328e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494110 + }, + { + "epoch": 2.3963927044168516, + "grad_norm": 1.899753556244832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494120 + }, + { + "epoch": 2.3964412026096875, + "grad_norm": 2.007950570259709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494130 + }, + { + "epoch": 2.396489700802524, + "grad_norm": 1.702500867395429e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494140 + }, + { + "epoch": 2.39653819899536, + "grad_norm": 0.023173989728093147, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494150 + }, + { + "epoch": 2.396586697188196, + "grad_norm": 1.5083409152794047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494160 + }, + { + "epoch": 2.396635195381032, + "grad_norm": 1.57083741214592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494170 + }, + { + "epoch": 2.396683693573868, + "grad_norm": 1.657509869801288e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494180 + }, + { + "epoch": 2.3967321917667044, + "grad_norm": 1.4971880091252388e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494190 + }, + { + "epoch": 2.3967806899595403, + "grad_norm": 0.0903049036860466, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 494200 + }, + { + "epoch": 2.3968291881523767, + "grad_norm": 0.0005943197174929082, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494210 + }, + { + "epoch": 2.3968776863452126, + "grad_norm": 1.2857685760536697e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494220 + }, + { + "epoch": 2.3969261845380485, + "grad_norm": 0.0002787773555610329, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494230 + }, + { + "epoch": 2.396974682730885, + "grad_norm": 1.766293735272484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494240 + }, + { + "epoch": 2.397023180923721, + "grad_norm": 0.01112598180770874, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494250 + }, + { + "epoch": 2.397071679116557, + "grad_norm": 9.979905826185131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494260 + }, + { + "epoch": 2.397120177309393, + "grad_norm": 1.0223353683613823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494270 + }, + { + "epoch": 2.397168675502229, + "grad_norm": 7.581692216263036e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494280 + }, + { + "epoch": 2.3972171736950654, + "grad_norm": 9.139399708146811e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494290 + }, + { + "epoch": 2.3972656718879013, + "grad_norm": 9.9655767371587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494300 + }, + { + "epoch": 2.397314170080737, + "grad_norm": 8.61095884374663e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494310 + }, + { + "epoch": 2.3973626682735736, + "grad_norm": 8.607682389083493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494320 + }, + { + "epoch": 2.3974111664664095, + "grad_norm": 7.472251581930323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494330 + }, + { + "epoch": 2.3974596646592454, + "grad_norm": 8.110080784717866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494340 + }, + { + "epoch": 2.397508162852082, + "grad_norm": 8.70286896770267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494350 + }, + { + "epoch": 2.3975566610449177, + "grad_norm": 8.475692538922885e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494360 + }, + { + "epoch": 2.397605159237754, + "grad_norm": 8.604963568359381e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494370 + }, + { + "epoch": 2.39765365743059, + "grad_norm": 6.731830239914416e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494380 + }, + { + "epoch": 2.397702155623426, + "grad_norm": 7.846466019145737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494390 + }, + { + "epoch": 2.3977506538162623, + "grad_norm": 7.728893365310796e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494400 + }, + { + "epoch": 2.397799152009098, + "grad_norm": 7.71446366343298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494410 + }, + { + "epoch": 2.3978476502019346, + "grad_norm": 9.039573001246026e-07, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 494420 + }, + { + "epoch": 2.3978961483947705, + "grad_norm": 1.7349077552353265e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 494430 + }, + { + "epoch": 2.3979446465876064, + "grad_norm": 0.00010954958270303905, + "learning_rate": 0.0002, + "loss": 0.0426, + "step": 494440 + }, + { + "epoch": 2.397993144780443, + "grad_norm": 0.0003171212738379836, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494450 + }, + { + "epoch": 2.3980416429732787, + "grad_norm": 0.00029642158187925816, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 494460 + }, + { + "epoch": 2.3980901411661146, + "grad_norm": 0.00018096914573106915, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494470 + }, + { + "epoch": 2.398138639358951, + "grad_norm": 5.782704829471186e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494480 + }, + { + "epoch": 2.398187137551787, + "grad_norm": 6.414909876184538e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494490 + }, + { + "epoch": 2.398235635744623, + "grad_norm": 5.035818321630359e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494500 + }, + { + "epoch": 2.398284133937459, + "grad_norm": 3.916128116543405e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494510 + }, + { + "epoch": 2.398332632130295, + "grad_norm": 3.482376632746309e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494520 + }, + { + "epoch": 2.3983811303231315, + "grad_norm": 1.7034630218404345e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494530 + }, + { + "epoch": 2.3984296285159674, + "grad_norm": 2.6287647415301763e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494540 + }, + { + "epoch": 2.3984781267088033, + "grad_norm": 2.4303824829985388e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494550 + }, + { + "epoch": 2.3985266249016397, + "grad_norm": 2.1816633307025768e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494560 + }, + { + "epoch": 2.3985751230944756, + "grad_norm": 2.0267680156393908e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494570 + }, + { + "epoch": 2.398623621287312, + "grad_norm": 1.1016450116585474e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494580 + }, + { + "epoch": 2.398672119480148, + "grad_norm": 1.757619611453265e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494590 + }, + { + "epoch": 2.398720617672984, + "grad_norm": 1.6671509001753293e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494600 + }, + { + "epoch": 2.39876911586582, + "grad_norm": 1.4924502465873957e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494610 + }, + { + "epoch": 2.398817614058656, + "grad_norm": 1.5691466614953242e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494620 + }, + { + "epoch": 2.398866112251492, + "grad_norm": 1.0699860467866529e-05, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 494630 + }, + { + "epoch": 2.3989146104443284, + "grad_norm": 6.633777229581028e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494640 + }, + { + "epoch": 2.3989631086371643, + "grad_norm": 9.024200699059293e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494650 + }, + { + "epoch": 2.3990116068300003, + "grad_norm": 0.007041081320494413, + "learning_rate": 0.0002, + "loss": 0.0006, + "step": 494660 + }, + { + "epoch": 2.3990601050228366, + "grad_norm": 0.00018248632841277868, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494670 + }, + { + "epoch": 2.3991086032156725, + "grad_norm": 0.0002299916377523914, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494680 + }, + { + "epoch": 2.399157101408509, + "grad_norm": 9.721479000290856e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494690 + }, + { + "epoch": 2.399205599601345, + "grad_norm": 6.86976927681826e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494700 + }, + { + "epoch": 2.3992540977941808, + "grad_norm": 0.044387511909008026, + "learning_rate": 0.0002, + "loss": 0.0024, + "step": 494710 + }, + { + "epoch": 2.399302595987017, + "grad_norm": 0.0006158098112791777, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494720 + }, + { + "epoch": 2.399351094179853, + "grad_norm": 0.00022342512966133654, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494730 + }, + { + "epoch": 2.3993995923726894, + "grad_norm": 0.00025710902991704643, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494740 + }, + { + "epoch": 2.3994480905655253, + "grad_norm": 0.001339390641078353, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 494750 + }, + { + "epoch": 2.3994965887583612, + "grad_norm": 0.00011203991016373038, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494760 + }, + { + "epoch": 2.3995450869511976, + "grad_norm": 5.6547694839537144e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494770 + }, + { + "epoch": 2.3995935851440335, + "grad_norm": 1.7704944184515625e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494780 + }, + { + "epoch": 2.39964208333687, + "grad_norm": 9.834090451477095e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494790 + }, + { + "epoch": 2.399690581529706, + "grad_norm": 4.7525907575618476e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494800 + }, + { + "epoch": 2.3997390797225417, + "grad_norm": 3.216830373276025e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494810 + }, + { + "epoch": 2.399787577915378, + "grad_norm": 2.9235601687105373e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494820 + }, + { + "epoch": 2.399836076108214, + "grad_norm": 1.0490610293345526e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494830 + }, + { + "epoch": 2.39988457430105, + "grad_norm": 2.4798966478556395e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494840 + }, + { + "epoch": 2.3999330724938863, + "grad_norm": 2.3221482479129918e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494850 + }, + { + "epoch": 2.3999815706867222, + "grad_norm": 2.1414931325125508e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494860 + }, + { + "epoch": 2.400030068879558, + "grad_norm": 1.9751547370105982e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494870 + }, + { + "epoch": 2.4000785670723945, + "grad_norm": 7.0821861299918965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494880 + }, + { + "epoch": 2.4001270652652305, + "grad_norm": 1.6500745914527215e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 494890 + }, + { + "epoch": 2.400175563458067, + "grad_norm": 3.3935695682885125e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494900 + }, + { + "epoch": 2.4002240616509027, + "grad_norm": 3.8613688957411796e-05, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 494910 + }, + { + "epoch": 2.4002725598437387, + "grad_norm": 4.817805893253535e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494920 + }, + { + "epoch": 2.400321058036575, + "grad_norm": 1.827502273954451e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494930 + }, + { + "epoch": 2.400369556229411, + "grad_norm": 4.624609209713526e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494940 + }, + { + "epoch": 2.4004180544222473, + "grad_norm": 3.972408740082756e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494950 + }, + { + "epoch": 2.4004665526150832, + "grad_norm": 2.5253977582906373e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494960 + }, + { + "epoch": 2.400515050807919, + "grad_norm": 2.2074469598010182e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494970 + }, + { + "epoch": 2.4005635490007555, + "grad_norm": 6.419838882720796e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494980 + }, + { + "epoch": 2.4006120471935914, + "grad_norm": 1.8185708540841006e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 494990 + }, + { + "epoch": 2.4006605453864274, + "grad_norm": 1.6661204426782206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495000 + }, + { + "epoch": 2.4007090435792637, + "grad_norm": 1.7022832253132947e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495010 + }, + { + "epoch": 2.4007575417720997, + "grad_norm": 1.4933965758245904e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495020 + }, + { + "epoch": 2.4008060399649356, + "grad_norm": 4.6511017899319995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495030 + }, + { + "epoch": 2.400854538157772, + "grad_norm": 4.1921746742445976e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495040 + }, + { + "epoch": 2.400903036350608, + "grad_norm": 1.2098295883333776e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495050 + }, + { + "epoch": 2.4009515345434442, + "grad_norm": 1.1957435162912589e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495060 + }, + { + "epoch": 2.40100003273628, + "grad_norm": 1.1064871614507865e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495070 + }, + { + "epoch": 2.401048530929116, + "grad_norm": 3.997640760644572e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495080 + }, + { + "epoch": 2.4010970291219524, + "grad_norm": 9.778521416592412e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495090 + }, + { + "epoch": 2.4011455273147884, + "grad_norm": 1.1241221727686934e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495100 + }, + { + "epoch": 2.4011940255076247, + "grad_norm": 8.335056918440387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495110 + }, + { + "epoch": 2.4012425237004607, + "grad_norm": 8.620297194283921e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495120 + }, + { + "epoch": 2.4012910218932966, + "grad_norm": 3.4638148918020306e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495130 + }, + { + "epoch": 2.401339520086133, + "grad_norm": 8.005608833627775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495140 + }, + { + "epoch": 2.401388018278969, + "grad_norm": 8.011483259906527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495150 + }, + { + "epoch": 2.401436516471805, + "grad_norm": 7.457051196979592e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495160 + }, + { + "epoch": 2.401485014664641, + "grad_norm": 7.481754892069148e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495170 + }, + { + "epoch": 2.401533512857477, + "grad_norm": 2.9594507395813707e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495180 + }, + { + "epoch": 2.401582011050313, + "grad_norm": 6.654820481344359e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495190 + }, + { + "epoch": 2.4016305092431494, + "grad_norm": 6.5704289227142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495200 + }, + { + "epoch": 2.4016790074359853, + "grad_norm": 6.984310857660603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495210 + }, + { + "epoch": 2.4017275056288216, + "grad_norm": 6.404479336197255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495220 + }, + { + "epoch": 2.4017760038216576, + "grad_norm": 4.6496602408296894e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 495230 + }, + { + "epoch": 2.4018245020144935, + "grad_norm": 0.0014845734694972634, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495240 + }, + { + "epoch": 2.40187300020733, + "grad_norm": 8.624639303889126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495250 + }, + { + "epoch": 2.4019214984001658, + "grad_norm": 9.685641998657957e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 495260 + }, + { + "epoch": 2.401969996593002, + "grad_norm": 6.829430731158936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495270 + }, + { + "epoch": 2.402018494785838, + "grad_norm": 3.0693424832861638e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495280 + }, + { + "epoch": 2.402066992978674, + "grad_norm": 6.7787391344609205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495290 + }, + { + "epoch": 2.4021154911715104, + "grad_norm": 6.36932145425817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495300 + }, + { + "epoch": 2.4021639893643463, + "grad_norm": 5.8046416597790085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495310 + }, + { + "epoch": 2.4022124875571826, + "grad_norm": 6.18380363448523e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495320 + }, + { + "epoch": 2.4022609857500186, + "grad_norm": 3.331645757498336e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495330 + }, + { + "epoch": 2.4023094839428545, + "grad_norm": 5.775174031441566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495340 + }, + { + "epoch": 2.402357982135691, + "grad_norm": 5.5164027799037285e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495350 + }, + { + "epoch": 2.4024064803285268, + "grad_norm": 5.095616870676167e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495360 + }, + { + "epoch": 2.4024549785213627, + "grad_norm": 5.808420610264875e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495370 + }, + { + "epoch": 2.402503476714199, + "grad_norm": 2.791528686429956e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495380 + }, + { + "epoch": 2.402551974907035, + "grad_norm": 5.31688101546024e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495390 + }, + { + "epoch": 2.402600473099871, + "grad_norm": 4.885638645646395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495400 + }, + { + "epoch": 2.4026489712927073, + "grad_norm": 4.767774044012185e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495410 + }, + { + "epoch": 2.402697469485543, + "grad_norm": 4.554518000077223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495420 + }, + { + "epoch": 2.4027459676783796, + "grad_norm": 2.26035763262189e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495430 + }, + { + "epoch": 2.4027944658712155, + "grad_norm": 4.695090410677949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495440 + }, + { + "epoch": 2.4028429640640514, + "grad_norm": 4.648391950468067e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495450 + }, + { + "epoch": 2.4028914622568878, + "grad_norm": 4.471332431421615e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495460 + }, + { + "epoch": 2.4029399604497237, + "grad_norm": 4.318992523622001e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495470 + }, + { + "epoch": 2.40298845864256, + "grad_norm": 1.9656890799524263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495480 + }, + { + "epoch": 2.403036956835396, + "grad_norm": 4.354989869170822e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495490 + }, + { + "epoch": 2.403085455028232, + "grad_norm": 4.12064991905936e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495500 + }, + { + "epoch": 2.4031339532210683, + "grad_norm": 4.17151250076131e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495510 + }, + { + "epoch": 2.403182451413904, + "grad_norm": 4.00403177991393e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495520 + }, + { + "epoch": 2.40323094960674, + "grad_norm": 1.7214491663253284e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495530 + }, + { + "epoch": 2.4032794477995765, + "grad_norm": 3.8029145343898563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495540 + }, + { + "epoch": 2.4033279459924124, + "grad_norm": 3.731822744157398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495550 + }, + { + "epoch": 2.4033764441852483, + "grad_norm": 3.712025318236556e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495560 + }, + { + "epoch": 2.4034249423780847, + "grad_norm": 3.6210487905918853e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495570 + }, + { + "epoch": 2.4034734405709206, + "grad_norm": 1.646882537897909e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495580 + }, + { + "epoch": 2.403521938763757, + "grad_norm": 3.8520247471751645e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495590 + }, + { + "epoch": 2.403570436956593, + "grad_norm": 3.5933212529926095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495600 + }, + { + "epoch": 2.403618935149429, + "grad_norm": 3.3527453524584416e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495610 + }, + { + "epoch": 2.403667433342265, + "grad_norm": 3.4322356441407464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495620 + }, + { + "epoch": 2.403715931535101, + "grad_norm": 1.4420289744521142e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495630 + }, + { + "epoch": 2.4037644297279375, + "grad_norm": 3.2763205126684625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495640 + }, + { + "epoch": 2.4038129279207734, + "grad_norm": 3.147586994600715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495650 + }, + { + "epoch": 2.4038614261136093, + "grad_norm": 3.2024900065152906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495660 + }, + { + "epoch": 2.4039099243064457, + "grad_norm": 2.947241682704771e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495670 + }, + { + "epoch": 2.4039584224992816, + "grad_norm": 1.4419672424992314e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495680 + }, + { + "epoch": 2.4040069206921175, + "grad_norm": 2.7862440674653044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495690 + }, + { + "epoch": 2.404055418884954, + "grad_norm": 0.00013282989675644785, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495700 + }, + { + "epoch": 2.40410391707779, + "grad_norm": 3.511873046591063e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 495710 + }, + { + "epoch": 2.4041524152706257, + "grad_norm": 3.405117695365334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495720 + }, + { + "epoch": 2.404200913463462, + "grad_norm": 1.4515126167680137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495730 + }, + { + "epoch": 2.404249411656298, + "grad_norm": 3.592836264942889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495740 + }, + { + "epoch": 2.4042979098491344, + "grad_norm": 3.547786946000997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495750 + }, + { + "epoch": 2.4043464080419703, + "grad_norm": 3.4065692489093635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495760 + }, + { + "epoch": 2.4043949062348062, + "grad_norm": 3.599394176490023e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495770 + }, + { + "epoch": 2.4044434044276426, + "grad_norm": 1.5699769164712052e-06, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 495780 + }, + { + "epoch": 2.4044919026204785, + "grad_norm": 7.61159890316776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495790 + }, + { + "epoch": 2.404540400813315, + "grad_norm": 1.4531688066199422e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495800 + }, + { + "epoch": 2.404588899006151, + "grad_norm": 0.0005468243616633117, + "learning_rate": 0.0002, + "loss": 0.0025, + "step": 495810 + }, + { + "epoch": 2.4046373971989867, + "grad_norm": 0.0003194583987351507, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495820 + }, + { + "epoch": 2.404685895391823, + "grad_norm": 0.0001794331765267998, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495830 + }, + { + "epoch": 2.404734393584659, + "grad_norm": 3.94154449168127e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495840 + }, + { + "epoch": 2.4047828917774954, + "grad_norm": 0.0003734049678314477, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495850 + }, + { + "epoch": 2.4048313899703313, + "grad_norm": 1.8075272237183526e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495860 + }, + { + "epoch": 2.4048798881631672, + "grad_norm": 1.6612937542959116e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495870 + }, + { + "epoch": 2.4049283863560036, + "grad_norm": 2.0274774215067737e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495880 + }, + { + "epoch": 2.4049768845488395, + "grad_norm": 1.373101167700952e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495890 + }, + { + "epoch": 2.4050253827416754, + "grad_norm": 1.2177118151157629e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495900 + }, + { + "epoch": 2.405073880934512, + "grad_norm": 1.3663228855875786e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495910 + }, + { + "epoch": 2.4051223791273477, + "grad_norm": 1.1474883649498224e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495920 + }, + { + "epoch": 2.4051708773201836, + "grad_norm": 7.894409463915508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495930 + }, + { + "epoch": 2.40521937551302, + "grad_norm": 1.0857039342226926e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495940 + }, + { + "epoch": 2.405267873705856, + "grad_norm": 9.2774262157036e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495950 + }, + { + "epoch": 2.4053163718986923, + "grad_norm": 9.012602276925463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495960 + }, + { + "epoch": 2.405364870091528, + "grad_norm": 9.661660442361608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 495970 + }, + { + "epoch": 2.405413368284364, + "grad_norm": 0.05887821689248085, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 495980 + }, + { + "epoch": 2.4054618664772005, + "grad_norm": 0.000904686632566154, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 495990 + }, + { + "epoch": 2.4055103646700364, + "grad_norm": 6.367994956235634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496000 + }, + { + "epoch": 2.405558862862873, + "grad_norm": 6.044313522579614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496010 + }, + { + "epoch": 2.4056073610557087, + "grad_norm": 5.5889108807605226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496020 + }, + { + "epoch": 2.4056558592485446, + "grad_norm": 3.3530714063090272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496030 + }, + { + "epoch": 2.405704357441381, + "grad_norm": 5.401282578532118e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496040 + }, + { + "epoch": 2.405752855634217, + "grad_norm": 5.378416062740143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496050 + }, + { + "epoch": 2.405801353827053, + "grad_norm": 5.268501354294131e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496060 + }, + { + "epoch": 2.405849852019889, + "grad_norm": 4.7515914047835395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496070 + }, + { + "epoch": 2.405898350212725, + "grad_norm": 2.971947878904757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496080 + }, + { + "epoch": 2.405946848405561, + "grad_norm": 4.813670329895103e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496090 + }, + { + "epoch": 2.4059953465983974, + "grad_norm": 4.389776222524233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496100 + }, + { + "epoch": 2.4060438447912333, + "grad_norm": 4.761097898153821e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496110 + }, + { + "epoch": 2.4060923429840697, + "grad_norm": 4.036600330437068e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496120 + }, + { + "epoch": 2.4061408411769056, + "grad_norm": 2.4405435397056863e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496130 + }, + { + "epoch": 2.4061893393697416, + "grad_norm": 4.334580808063038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496140 + }, + { + "epoch": 2.406237837562578, + "grad_norm": 4.134569280722644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496150 + }, + { + "epoch": 2.406286335755414, + "grad_norm": 3.7084691939526238e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496160 + }, + { + "epoch": 2.40633483394825, + "grad_norm": 6.709984518238343e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 496170 + }, + { + "epoch": 2.406383332141086, + "grad_norm": 9.892540219880175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496180 + }, + { + "epoch": 2.406431830333922, + "grad_norm": 1.649416117288638e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496190 + }, + { + "epoch": 2.4064803285267584, + "grad_norm": 1.837288618844468e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496200 + }, + { + "epoch": 2.4065288267195943, + "grad_norm": 1.3113309250911698e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496210 + }, + { + "epoch": 2.4065773249124303, + "grad_norm": 6.851432408438995e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496220 + }, + { + "epoch": 2.4066258231052666, + "grad_norm": 4.616628757503349e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496230 + }, + { + "epoch": 2.4066743212981025, + "grad_norm": 7.261008249770384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496240 + }, + { + "epoch": 2.4067228194909385, + "grad_norm": 5.281735866446979e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496250 + }, + { + "epoch": 2.406771317683775, + "grad_norm": 9.130543730861973e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496260 + }, + { + "epoch": 2.4068198158766108, + "grad_norm": 6.307207513600588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496270 + }, + { + "epoch": 2.406868314069447, + "grad_norm": 1.0688167094485834e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496280 + }, + { + "epoch": 2.406916812262283, + "grad_norm": 4.973920567863388e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496290 + }, + { + "epoch": 2.406965310455119, + "grad_norm": 1.5733321561128832e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496300 + }, + { + "epoch": 2.4070138086479553, + "grad_norm": 1.0624786227708682e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496310 + }, + { + "epoch": 2.4070623068407913, + "grad_norm": 3.6317503600002965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496320 + }, + { + "epoch": 2.4071108050336276, + "grad_norm": 4.7513790377706755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496330 + }, + { + "epoch": 2.4071593032264635, + "grad_norm": 3.3351850561302854e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496340 + }, + { + "epoch": 2.4072078014192995, + "grad_norm": 3.710372538989759e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496350 + }, + { + "epoch": 2.407256299612136, + "grad_norm": 3.6001285934617044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496360 + }, + { + "epoch": 2.4073047978049718, + "grad_norm": 5.038444669480668e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496370 + }, + { + "epoch": 2.407353295997808, + "grad_norm": 2.1104954157635802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496380 + }, + { + "epoch": 2.407401794190644, + "grad_norm": 2.978326847369317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496390 + }, + { + "epoch": 2.40745029238348, + "grad_norm": 2.5906410883180797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496400 + }, + { + "epoch": 2.4074987905763163, + "grad_norm": 3.7764195894851582e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496410 + }, + { + "epoch": 2.4075472887691522, + "grad_norm": 4.351478764874628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496420 + }, + { + "epoch": 2.407595786961988, + "grad_norm": 1.6192622069866047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496430 + }, + { + "epoch": 2.4076442851548245, + "grad_norm": 2.4393027615587926e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496440 + }, + { + "epoch": 2.4076927833476605, + "grad_norm": 2.224948957518791e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496450 + }, + { + "epoch": 2.4077412815404964, + "grad_norm": 2.4126343305397313e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496460 + }, + { + "epoch": 2.4077897797333327, + "grad_norm": 2.777995405267575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496470 + }, + { + "epoch": 2.4078382779261687, + "grad_norm": 1.444831355001952e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496480 + }, + { + "epoch": 2.407886776119005, + "grad_norm": 2.049801423709141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496490 + }, + { + "epoch": 2.407935274311841, + "grad_norm": 2.018697159655858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496500 + }, + { + "epoch": 2.407983772504677, + "grad_norm": 4.864443326368928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496510 + }, + { + "epoch": 2.4080322706975132, + "grad_norm": 1.925516244227765e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496520 + }, + { + "epoch": 2.408080768890349, + "grad_norm": 3.0975752451922745e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496530 + }, + { + "epoch": 2.4081292670831855, + "grad_norm": 2.004374664466013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496540 + }, + { + "epoch": 2.4081777652760215, + "grad_norm": 2.043713720922824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496550 + }, + { + "epoch": 2.4082262634688574, + "grad_norm": 2.003004965445143e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496560 + }, + { + "epoch": 2.4082747616616937, + "grad_norm": 1.932232862600358e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496570 + }, + { + "epoch": 2.4083232598545297, + "grad_norm": 0.0007070046267472208, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496580 + }, + { + "epoch": 2.4083717580473656, + "grad_norm": 1.673224346632196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496590 + }, + { + "epoch": 2.408420256240202, + "grad_norm": 1.7251700228371192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496600 + }, + { + "epoch": 2.408468754433038, + "grad_norm": 1.841028733906569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496610 + }, + { + "epoch": 2.408517252625874, + "grad_norm": 2.6726768282969715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496620 + }, + { + "epoch": 2.40856575081871, + "grad_norm": 1.0558214853517711e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496630 + }, + { + "epoch": 2.408614249011546, + "grad_norm": 1.552171170260408e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496640 + }, + { + "epoch": 2.4086627472043824, + "grad_norm": 1.5971313587215263e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496650 + }, + { + "epoch": 2.4087112453972184, + "grad_norm": 2.19054732042423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496660 + }, + { + "epoch": 2.4087597435900543, + "grad_norm": 1.59248168074555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496670 + }, + { + "epoch": 2.4088082417828907, + "grad_norm": 9.546325827614055e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496680 + }, + { + "epoch": 2.4088567399757266, + "grad_norm": 1.3911713949710247e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496690 + }, + { + "epoch": 2.408905238168563, + "grad_norm": 1.415215933775471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496700 + }, + { + "epoch": 2.408953736361399, + "grad_norm": 1.4043142755326699e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496710 + }, + { + "epoch": 2.409002234554235, + "grad_norm": 1.442005327589868e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496720 + }, + { + "epoch": 2.409050732747071, + "grad_norm": 8.545687819605519e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496730 + }, + { + "epoch": 2.409099230939907, + "grad_norm": 1.330376335317851e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496740 + }, + { + "epoch": 2.409147729132743, + "grad_norm": 1.688228735474695e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496750 + }, + { + "epoch": 2.4091962273255794, + "grad_norm": 9.03491581993876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496760 + }, + { + "epoch": 2.4092447255184153, + "grad_norm": 1.2822544022128568e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496770 + }, + { + "epoch": 2.4092932237112517, + "grad_norm": 8.567577083340439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496780 + }, + { + "epoch": 2.4093417219040876, + "grad_norm": 1.2276951792955515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496790 + }, + { + "epoch": 2.4093902200969235, + "grad_norm": 1.2554128261399455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496800 + }, + { + "epoch": 2.40943871828976, + "grad_norm": 1.2388097729854053e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496810 + }, + { + "epoch": 2.409487216482596, + "grad_norm": 1.227287043548131e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496820 + }, + { + "epoch": 2.4095357146754317, + "grad_norm": 8.676358334014367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496830 + }, + { + "epoch": 2.409584212868268, + "grad_norm": 1.1644926871667849e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496840 + }, + { + "epoch": 2.409632711061104, + "grad_norm": 1.2589791822392726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496850 + }, + { + "epoch": 2.4096812092539404, + "grad_norm": 1.2419002359820297e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496860 + }, + { + "epoch": 2.4097297074467763, + "grad_norm": 1.134607259700715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496870 + }, + { + "epoch": 2.409778205639612, + "grad_norm": 7.953710223773669e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496880 + }, + { + "epoch": 2.4098267038324486, + "grad_norm": 1.1058134532504482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496890 + }, + { + "epoch": 2.4098752020252845, + "grad_norm": 1.1187684094693395e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496900 + }, + { + "epoch": 2.409923700218121, + "grad_norm": 1.5324683317885501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496910 + }, + { + "epoch": 2.409972198410957, + "grad_norm": 4.24265408582869e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496920 + }, + { + "epoch": 2.4100206966037927, + "grad_norm": 7.17072850875411e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496930 + }, + { + "epoch": 2.410069194796629, + "grad_norm": 1.4039314919500612e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496940 + }, + { + "epoch": 2.410117692989465, + "grad_norm": 9.914274414768443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496950 + }, + { + "epoch": 2.410166191182301, + "grad_norm": 1.0696340950744343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496960 + }, + { + "epoch": 2.4102146893751373, + "grad_norm": 9.61821911005245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496970 + }, + { + "epoch": 2.410263187567973, + "grad_norm": 7.041538765406585e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496980 + }, + { + "epoch": 2.410311685760809, + "grad_norm": 9.83591121439531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 496990 + }, + { + "epoch": 2.4103601839536455, + "grad_norm": 9.887414762488334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497000 + }, + { + "epoch": 2.4104086821464814, + "grad_norm": 1.3306606660989928e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497010 + }, + { + "epoch": 2.4104571803393178, + "grad_norm": 9.807046126297791e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497020 + }, + { + "epoch": 2.4105056785321537, + "grad_norm": 8.16105341527873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497030 + }, + { + "epoch": 2.4105541767249896, + "grad_norm": 1.035767581925029e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497040 + }, + { + "epoch": 2.410602674917826, + "grad_norm": 1.8281555185239995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497050 + }, + { + "epoch": 2.410651173110662, + "grad_norm": 9.048235369846225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497060 + }, + { + "epoch": 2.4106996713034983, + "grad_norm": 9.351697940473969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497070 + }, + { + "epoch": 2.410748169496334, + "grad_norm": 1.168680228147423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497080 + }, + { + "epoch": 2.41079666768917, + "grad_norm": 9.288489764003316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497090 + }, + { + "epoch": 2.4108451658820065, + "grad_norm": 1.1050659622924286e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497100 + }, + { + "epoch": 2.4108936640748424, + "grad_norm": 7.900298442109488e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497110 + }, + { + "epoch": 2.4109421622676783, + "grad_norm": 9.692776075098664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497120 + }, + { + "epoch": 2.4109906604605147, + "grad_norm": 6.134167733762297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497130 + }, + { + "epoch": 2.4110391586533506, + "grad_norm": 9.122273354478239e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497140 + }, + { + "epoch": 2.4110876568461865, + "grad_norm": 8.244180094152398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497150 + }, + { + "epoch": 2.411136155039023, + "grad_norm": 8.138404723467829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497160 + }, + { + "epoch": 2.411184653231859, + "grad_norm": 8.548299774702173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497170 + }, + { + "epoch": 2.411233151424695, + "grad_norm": 1.0248243142996216e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497180 + }, + { + "epoch": 2.411281649617531, + "grad_norm": 1.2963089375261916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497190 + }, + { + "epoch": 2.411330147810367, + "grad_norm": 7.868633247198886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497200 + }, + { + "epoch": 2.4113786460032034, + "grad_norm": 8.133080768857326e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497210 + }, + { + "epoch": 2.4114271441960393, + "grad_norm": 7.510139425903617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497220 + }, + { + "epoch": 2.4114756423888757, + "grad_norm": 6.89102364503924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497230 + }, + { + "epoch": 2.4115241405817116, + "grad_norm": 7.805583095432667e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497240 + }, + { + "epoch": 2.4115726387745475, + "grad_norm": 2.222949888164294e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497250 + }, + { + "epoch": 2.411621136967384, + "grad_norm": 7.457598485416383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497260 + }, + { + "epoch": 2.41166963516022, + "grad_norm": 7.521222187278909e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497270 + }, + { + "epoch": 2.411718133353056, + "grad_norm": 5.860679834768234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497280 + }, + { + "epoch": 2.411766631545892, + "grad_norm": 7.564447059849044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497290 + }, + { + "epoch": 2.411815129738728, + "grad_norm": 6.957905043236678e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497300 + }, + { + "epoch": 2.4118636279315644, + "grad_norm": 8.042511581152212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497310 + }, + { + "epoch": 2.4119121261244003, + "grad_norm": 8.115480909509643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497320 + }, + { + "epoch": 2.4119606243172362, + "grad_norm": 5.131124112267571e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497330 + }, + { + "epoch": 2.4120091225100726, + "grad_norm": 1.1409640592319192e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497340 + }, + { + "epoch": 2.4120576207029085, + "grad_norm": 7.157232744248176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497350 + }, + { + "epoch": 2.4121061188957444, + "grad_norm": 7.132094310691173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497360 + }, + { + "epoch": 2.412154617088581, + "grad_norm": 7.893164024608268e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497370 + }, + { + "epoch": 2.4122031152814167, + "grad_norm": 4.811190592590719e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497380 + }, + { + "epoch": 2.412251613474253, + "grad_norm": 7.657305332031683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497390 + }, + { + "epoch": 2.412300111667089, + "grad_norm": 8.297149065583653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497400 + }, + { + "epoch": 2.412348609859925, + "grad_norm": 6.878848353153444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497410 + }, + { + "epoch": 2.4123971080527613, + "grad_norm": 6.808963348703401e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497420 + }, + { + "epoch": 2.4124456062455972, + "grad_norm": 4.607217363172822e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497430 + }, + { + "epoch": 2.4124941044384336, + "grad_norm": 9.871447446130333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497440 + }, + { + "epoch": 2.4125426026312695, + "grad_norm": 6.535720444844628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497450 + }, + { + "epoch": 2.4125911008241054, + "grad_norm": 7.389480742858723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497460 + }, + { + "epoch": 2.412639599016942, + "grad_norm": 7.075756229824037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497470 + }, + { + "epoch": 2.4126880972097777, + "grad_norm": 4.4116117692283296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497480 + }, + { + "epoch": 2.4127365954026136, + "grad_norm": 7.160517725424143e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497490 + }, + { + "epoch": 2.41278509359545, + "grad_norm": 5.551053163799224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497500 + }, + { + "epoch": 2.412833591788286, + "grad_norm": 6.112636015132011e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497510 + }, + { + "epoch": 2.412882089981122, + "grad_norm": 6.507881948891736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497520 + }, + { + "epoch": 2.4129305881739582, + "grad_norm": 7.687725656069233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497530 + }, + { + "epoch": 2.412979086366794, + "grad_norm": 1.1912945865333313e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497540 + }, + { + "epoch": 2.4130275845596305, + "grad_norm": 6.110826689109672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497550 + }, + { + "epoch": 2.4130760827524664, + "grad_norm": 6.568376420545974e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497560 + }, + { + "epoch": 2.4131245809453024, + "grad_norm": 5.606980266747996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497570 + }, + { + "epoch": 2.4131730791381387, + "grad_norm": 4.533536355211254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497580 + }, + { + "epoch": 2.4132215773309746, + "grad_norm": 5.733213015446381e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497590 + }, + { + "epoch": 2.413270075523811, + "grad_norm": 6.285527547333913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497600 + }, + { + "epoch": 2.413318573716647, + "grad_norm": 5.8818233128477e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497610 + }, + { + "epoch": 2.413367071909483, + "grad_norm": 5.802476152894087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497620 + }, + { + "epoch": 2.413415570102319, + "grad_norm": 5.06457467963628e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497630 + }, + { + "epoch": 2.413464068295155, + "grad_norm": 6.016315978740749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497640 + }, + { + "epoch": 2.413512566487991, + "grad_norm": 5.911161906624329e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497650 + }, + { + "epoch": 2.4135610646808274, + "grad_norm": 5.742431312683038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497660 + }, + { + "epoch": 2.4136095628736634, + "grad_norm": 5.932669182584505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497670 + }, + { + "epoch": 2.4136580610664993, + "grad_norm": 3.7778033856739057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497680 + }, + { + "epoch": 2.4137065592593356, + "grad_norm": 4.948720402353501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497690 + }, + { + "epoch": 2.4137550574521716, + "grad_norm": 5.166560868019587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497700 + }, + { + "epoch": 2.413803555645008, + "grad_norm": 5.123555411046254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497710 + }, + { + "epoch": 2.413852053837844, + "grad_norm": 5.388102977121889e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497720 + }, + { + "epoch": 2.4139005520306798, + "grad_norm": 3.4448083852112177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497730 + }, + { + "epoch": 2.413949050223516, + "grad_norm": 5.349600087356521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497740 + }, + { + "epoch": 2.413997548416352, + "grad_norm": 5.248894581200148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497750 + }, + { + "epoch": 2.4140460466091884, + "grad_norm": 5.195323637963156e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497760 + }, + { + "epoch": 2.4140945448020243, + "grad_norm": 1.3797530300507788e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497770 + }, + { + "epoch": 2.4141430429948603, + "grad_norm": 3.48756287849028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497780 + }, + { + "epoch": 2.4141915411876966, + "grad_norm": 4.870586849392566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497790 + }, + { + "epoch": 2.4142400393805326, + "grad_norm": 6.486528718596674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497800 + }, + { + "epoch": 2.414288537573369, + "grad_norm": 6.480491947513656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497810 + }, + { + "epoch": 2.414337035766205, + "grad_norm": 6.448091767197184e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497820 + }, + { + "epoch": 2.4143855339590408, + "grad_norm": 3.1343773798653274e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497830 + }, + { + "epoch": 2.414434032151877, + "grad_norm": 4.549014818167052e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497840 + }, + { + "epoch": 2.414482530344713, + "grad_norm": 4.5666996584259323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497850 + }, + { + "epoch": 2.414531028537549, + "grad_norm": 4.6749087800890265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497860 + }, + { + "epoch": 2.4145795267303853, + "grad_norm": 4.62025781189368e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497870 + }, + { + "epoch": 2.4146280249232213, + "grad_norm": 4.1283095697508543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497880 + }, + { + "epoch": 2.414676523116057, + "grad_norm": 4.263983441887831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497890 + }, + { + "epoch": 2.4147250213088935, + "grad_norm": 4.268816269359377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497900 + }, + { + "epoch": 2.4147735195017295, + "grad_norm": 5.226260100243962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497910 + }, + { + "epoch": 2.414822017694566, + "grad_norm": 6.914384016454278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497920 + }, + { + "epoch": 2.4148705158874018, + "grad_norm": 3.0459398203674937e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497930 + }, + { + "epoch": 2.4149190140802377, + "grad_norm": 4.3172929053980624e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497940 + }, + { + "epoch": 2.414967512273074, + "grad_norm": 5.556672704187804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497950 + }, + { + "epoch": 2.41501601046591, + "grad_norm": 4.039358429963613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497960 + }, + { + "epoch": 2.4150645086587463, + "grad_norm": 5.089763135401881e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497970 + }, + { + "epoch": 2.4151130068515823, + "grad_norm": 3.1747075013299764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497980 + }, + { + "epoch": 2.415161505044418, + "grad_norm": 4.1830497821138124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 497990 + }, + { + "epoch": 2.4152100032372545, + "grad_norm": 4.464394578462816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498000 + }, + { + "epoch": 2.4152585014300905, + "grad_norm": 4.596147391566774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498010 + }, + { + "epoch": 2.4153069996229264, + "grad_norm": 6.726040169269254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498020 + }, + { + "epoch": 2.4153554978157628, + "grad_norm": 4.028400439892721e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498030 + }, + { + "epoch": 2.4154039960085987, + "grad_norm": 1.1334535656715161e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498040 + }, + { + "epoch": 2.4154524942014346, + "grad_norm": 4.1849736476251564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498050 + }, + { + "epoch": 2.415500992394271, + "grad_norm": 4.299012630326615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498060 + }, + { + "epoch": 2.415549490587107, + "grad_norm": 4.366457346804964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498070 + }, + { + "epoch": 2.4155979887799433, + "grad_norm": 7.173349558797781e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498080 + }, + { + "epoch": 2.415646486972779, + "grad_norm": 3.9448718780477066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498090 + }, + { + "epoch": 2.415694985165615, + "grad_norm": 3.7629465055033506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498100 + }, + { + "epoch": 2.4157434833584515, + "grad_norm": 3.9469057355745463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498110 + }, + { + "epoch": 2.4157919815512874, + "grad_norm": 4.489899652071472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498120 + }, + { + "epoch": 2.4158404797441237, + "grad_norm": 2.5690056304483733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498130 + }, + { + "epoch": 2.4158889779369597, + "grad_norm": 4.370921260488103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498140 + }, + { + "epoch": 2.4159374761297956, + "grad_norm": 3.84712848244817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498150 + }, + { + "epoch": 2.415985974322632, + "grad_norm": 3.71326308368225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498160 + }, + { + "epoch": 2.416034472515468, + "grad_norm": 3.8114757217044826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498170 + }, + { + "epoch": 2.416082970708304, + "grad_norm": 2.528717288896587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498180 + }, + { + "epoch": 2.41613146890114, + "grad_norm": 3.3329723692077096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498190 + }, + { + "epoch": 2.416179967093976, + "grad_norm": 3.8052357922424562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498200 + }, + { + "epoch": 2.416228465286812, + "grad_norm": 6.441837854254118e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498210 + }, + { + "epoch": 2.4162769634796484, + "grad_norm": 3.7626182347594295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498220 + }, + { + "epoch": 2.4163254616724843, + "grad_norm": 2.2927027032437763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498230 + }, + { + "epoch": 2.4163739598653207, + "grad_norm": 3.1764508889864373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498240 + }, + { + "epoch": 2.4164224580581566, + "grad_norm": 3.4994727116099966e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498250 + }, + { + "epoch": 2.4164709562509925, + "grad_norm": 3.865355324705888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498260 + }, + { + "epoch": 2.416519454443829, + "grad_norm": 3.397888406198035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498270 + }, + { + "epoch": 2.416567952636665, + "grad_norm": 2.3280811944914603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498280 + }, + { + "epoch": 2.416616450829501, + "grad_norm": 5.62780599011603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498290 + }, + { + "epoch": 2.416664949022337, + "grad_norm": 3.4933822234961553e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498300 + }, + { + "epoch": 2.416713447215173, + "grad_norm": 6.897275284245552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498310 + }, + { + "epoch": 2.4167619454080094, + "grad_norm": 4.651981271308614e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498320 + }, + { + "epoch": 2.4168104436008453, + "grad_norm": 2.0622235297196312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498330 + }, + { + "epoch": 2.4168589417936817, + "grad_norm": 3.567586190911243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498340 + }, + { + "epoch": 2.4169074399865176, + "grad_norm": 2.928412413893966e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498350 + }, + { + "epoch": 2.4169559381793535, + "grad_norm": 3.345180346059351e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498360 + }, + { + "epoch": 2.41700443637219, + "grad_norm": 1.0002581802837085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498370 + }, + { + "epoch": 2.417052934565026, + "grad_norm": 2.1392980897871894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498380 + }, + { + "epoch": 2.4171014327578617, + "grad_norm": 3.2748090461609536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498390 + }, + { + "epoch": 2.417149930950698, + "grad_norm": 3.49438494140486e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498400 + }, + { + "epoch": 2.417198429143534, + "grad_norm": 2.944821915207285e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498410 + }, + { + "epoch": 2.41724692733637, + "grad_norm": 3.116064988262224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498420 + }, + { + "epoch": 2.4172954255292063, + "grad_norm": 2.493963506822183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498430 + }, + { + "epoch": 2.417343923722042, + "grad_norm": 3.010475779774424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498440 + }, + { + "epoch": 2.4173924219148786, + "grad_norm": 3.149610563468741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498450 + }, + { + "epoch": 2.4174409201077145, + "grad_norm": 3.221867643787846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498460 + }, + { + "epoch": 2.4174894183005504, + "grad_norm": 3.0914767989997927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498470 + }, + { + "epoch": 2.417537916493387, + "grad_norm": 2.088957842261152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498480 + }, + { + "epoch": 2.4175864146862227, + "grad_norm": 2.846608708750864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498490 + }, + { + "epoch": 2.417634912879059, + "grad_norm": 3.369273713360599e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498500 + }, + { + "epoch": 2.417683411071895, + "grad_norm": 3.010907505540672e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498510 + }, + { + "epoch": 2.417731909264731, + "grad_norm": 1.1390640111130779e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498520 + }, + { + "epoch": 2.4177804074575673, + "grad_norm": 2.0039433934471163e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498530 + }, + { + "epoch": 2.417828905650403, + "grad_norm": 3.7319156831472355e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498540 + }, + { + "epoch": 2.417877403843239, + "grad_norm": 3.3321458658974734e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498550 + }, + { + "epoch": 2.4179259020360755, + "grad_norm": 2.696915260003152e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498560 + }, + { + "epoch": 2.4179744002289114, + "grad_norm": 8.667256042826921e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498570 + }, + { + "epoch": 2.4180228984217473, + "grad_norm": 1.7921234984896728e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498580 + }, + { + "epoch": 2.4180713966145837, + "grad_norm": 2.819881785853795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498590 + }, + { + "epoch": 2.4181198948074196, + "grad_norm": 2.7564112770050997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498600 + }, + { + "epoch": 2.418168393000256, + "grad_norm": 3.134449855224375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498610 + }, + { + "epoch": 2.418216891193092, + "grad_norm": 3.037531826066697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498620 + }, + { + "epoch": 2.418265389385928, + "grad_norm": 2.22232486635221e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498630 + }, + { + "epoch": 2.418313887578764, + "grad_norm": 2.6626963745002286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498640 + }, + { + "epoch": 2.4183623857716, + "grad_norm": 2.9602892936964054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498650 + }, + { + "epoch": 2.4184108839644365, + "grad_norm": 3.9443233390557e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498660 + }, + { + "epoch": 2.4184593821572724, + "grad_norm": 2.431962684568134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498670 + }, + { + "epoch": 2.4185078803501083, + "grad_norm": 0.2416875660419464, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498680 + }, + { + "epoch": 2.4185563785429447, + "grad_norm": 1.4129837836662773e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 498690 + }, + { + "epoch": 2.4186048767357806, + "grad_norm": 0.0002051424962701276, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498700 + }, + { + "epoch": 2.4186533749286165, + "grad_norm": 3.427567571634427e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498710 + }, + { + "epoch": 2.418701873121453, + "grad_norm": 1.0612126970954705e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498720 + }, + { + "epoch": 2.418750371314289, + "grad_norm": 0.00826122798025608, + "learning_rate": 0.0002, + "loss": 0.0042, + "step": 498730 + }, + { + "epoch": 2.4187988695071247, + "grad_norm": 0.00012929631338920444, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 498740 + }, + { + "epoch": 2.418847367699961, + "grad_norm": 0.0008485518046654761, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498750 + }, + { + "epoch": 2.418895865892797, + "grad_norm": 3.5169221519026905e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 498760 + }, + { + "epoch": 2.4189443640856334, + "grad_norm": 3.9522270526504144e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498770 + }, + { + "epoch": 2.4189928622784693, + "grad_norm": 3.296780778327957e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498780 + }, + { + "epoch": 2.4190413604713052, + "grad_norm": 3.673421451821923e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498790 + }, + { + "epoch": 2.4190898586641416, + "grad_norm": 3.21089755743742e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498800 + }, + { + "epoch": 2.4191383568569775, + "grad_norm": 2.6053867259179242e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498810 + }, + { + "epoch": 2.419186855049814, + "grad_norm": 2.3771606720401905e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498820 + }, + { + "epoch": 2.41923535324265, + "grad_norm": 1.838364732975606e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498830 + }, + { + "epoch": 2.4192838514354857, + "grad_norm": 1.8123708287021145e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498840 + }, + { + "epoch": 2.419332349628322, + "grad_norm": 1.723506829875987e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498850 + }, + { + "epoch": 2.419380847821158, + "grad_norm": 1.6659150787745602e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498860 + }, + { + "epoch": 2.4194293460139944, + "grad_norm": 1.4374218153534457e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498870 + }, + { + "epoch": 2.4194778442068303, + "grad_norm": 1.2636415704037063e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498880 + }, + { + "epoch": 2.4195263423996662, + "grad_norm": 1.3605786989501212e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498890 + }, + { + "epoch": 2.4195748405925026, + "grad_norm": 1.1912999070773367e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498900 + }, + { + "epoch": 2.4196233387853385, + "grad_norm": 1.203280953632202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498910 + }, + { + "epoch": 2.4196718369781745, + "grad_norm": 1.136901028075954e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498920 + }, + { + "epoch": 2.419720335171011, + "grad_norm": 9.626207429391798e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498930 + }, + { + "epoch": 2.4197688333638467, + "grad_norm": 1.0130519513040781e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498940 + }, + { + "epoch": 2.4198173315566827, + "grad_norm": 9.187864634441212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498950 + }, + { + "epoch": 2.419865829749519, + "grad_norm": 9.28726058191387e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498960 + }, + { + "epoch": 2.419914327942355, + "grad_norm": 8.832740604702849e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498970 + }, + { + "epoch": 2.4199628261351913, + "grad_norm": 8.139613782986999e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498980 + }, + { + "epoch": 2.4200113243280272, + "grad_norm": 7.761019332974683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 498990 + }, + { + "epoch": 2.420059822520863, + "grad_norm": 7.5453153840499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499000 + }, + { + "epoch": 2.4201083207136995, + "grad_norm": 7.601700872328365e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499010 + }, + { + "epoch": 2.4201568189065354, + "grad_norm": 7.14181260264013e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499020 + }, + { + "epoch": 2.420205317099372, + "grad_norm": 6.176705028337892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499030 + }, + { + "epoch": 2.4202538152922077, + "grad_norm": 6.679001216980396e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499040 + }, + { + "epoch": 2.4203023134850437, + "grad_norm": 6.385859251167858e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499050 + }, + { + "epoch": 2.42035081167788, + "grad_norm": 6.311501238087658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499060 + }, + { + "epoch": 2.420399309870716, + "grad_norm": 6.334800673357677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499070 + }, + { + "epoch": 2.420447808063552, + "grad_norm": 5.260860689304536e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499080 + }, + { + "epoch": 2.4204963062563882, + "grad_norm": 5.820837031933479e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499090 + }, + { + "epoch": 2.420544804449224, + "grad_norm": 5.30088118466665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499100 + }, + { + "epoch": 2.42059330264206, + "grad_norm": 5.571971996687353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499110 + }, + { + "epoch": 2.4206418008348964, + "grad_norm": 5.3375133575173095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499120 + }, + { + "epoch": 2.4206902990277324, + "grad_norm": 4.685293788497802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499130 + }, + { + "epoch": 2.4207387972205687, + "grad_norm": 5.6262970247189514e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499140 + }, + { + "epoch": 2.4207872954134046, + "grad_norm": 4.8368888201366644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499150 + }, + { + "epoch": 2.4208357936062406, + "grad_norm": 4.899024588667089e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499160 + }, + { + "epoch": 2.420884291799077, + "grad_norm": 4.728043222712586e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499170 + }, + { + "epoch": 2.420932789991913, + "grad_norm": 4.245941454428248e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499180 + }, + { + "epoch": 2.4209812881847492, + "grad_norm": 4.546312538877828e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499190 + }, + { + "epoch": 2.421029786377585, + "grad_norm": 3.993844075012021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499200 + }, + { + "epoch": 2.421078284570421, + "grad_norm": 4.0155100577976555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499210 + }, + { + "epoch": 2.4211267827632574, + "grad_norm": 4.021936547360383e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499220 + }, + { + "epoch": 2.4211752809560934, + "grad_norm": 3.53498944605235e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499230 + }, + { + "epoch": 2.4212237791489293, + "grad_norm": 3.923968051822158e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499240 + }, + { + "epoch": 2.4212722773417656, + "grad_norm": 3.683837121570832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499250 + }, + { + "epoch": 2.4213207755346016, + "grad_norm": 3.6488600017037243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499260 + }, + { + "epoch": 2.4213692737274375, + "grad_norm": 3.6030501178174745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499270 + }, + { + "epoch": 2.421417771920274, + "grad_norm": 3.120832388958661e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499280 + }, + { + "epoch": 2.4214662701131098, + "grad_norm": 4.364534106571227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499290 + }, + { + "epoch": 2.421514768305946, + "grad_norm": 3.2627610835334053e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499300 + }, + { + "epoch": 2.421563266498782, + "grad_norm": 3.3108944990090095e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499310 + }, + { + "epoch": 2.421611764691618, + "grad_norm": 3.4311763101868564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499320 + }, + { + "epoch": 2.4216602628844544, + "grad_norm": 3.0178375709510874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499330 + }, + { + "epoch": 2.4217087610772903, + "grad_norm": 3.3323308343824465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499340 + }, + { + "epoch": 2.4217572592701266, + "grad_norm": 2.900368826885824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499350 + }, + { + "epoch": 2.4218057574629626, + "grad_norm": 2.904658458646736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499360 + }, + { + "epoch": 2.4218542556557985, + "grad_norm": 2.8556644338095794e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499370 + }, + { + "epoch": 2.421902753848635, + "grad_norm": 2.510280410206178e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499380 + }, + { + "epoch": 2.4219512520414708, + "grad_norm": 2.6048805921163876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499390 + }, + { + "epoch": 2.421999750234307, + "grad_norm": 2.851440285667195e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499400 + }, + { + "epoch": 2.422048248427143, + "grad_norm": 2.541081812523771e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499410 + }, + { + "epoch": 2.422096746619979, + "grad_norm": 2.661356347744004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499420 + }, + { + "epoch": 2.4221452448128153, + "grad_norm": 2.6009365683421493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499430 + }, + { + "epoch": 2.4221937430056513, + "grad_norm": 2.5296126295870636e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499440 + }, + { + "epoch": 2.422242241198487, + "grad_norm": 2.3936383968248265e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499450 + }, + { + "epoch": 2.4222907393913236, + "grad_norm": 7.401792390737683e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499460 + }, + { + "epoch": 2.4223392375841595, + "grad_norm": 2.3562890874018194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499470 + }, + { + "epoch": 2.4223877357769954, + "grad_norm": 2.1529224341065856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499480 + }, + { + "epoch": 2.4224362339698318, + "grad_norm": 2.2575106868316652e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499490 + }, + { + "epoch": 2.4224847321626677, + "grad_norm": 2.342063226024038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499500 + }, + { + "epoch": 2.422533230355504, + "grad_norm": 2.1807165921927663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499510 + }, + { + "epoch": 2.42258172854834, + "grad_norm": 2.1659286630892893e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499520 + }, + { + "epoch": 2.422630226741176, + "grad_norm": 1.929466634464916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499530 + }, + { + "epoch": 2.4226787249340123, + "grad_norm": 2.1402663605840644e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499540 + }, + { + "epoch": 2.422727223126848, + "grad_norm": 2.023110482696211e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499550 + }, + { + "epoch": 2.4227757213196845, + "grad_norm": 2.0076734017493436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499560 + }, + { + "epoch": 2.4228242195125205, + "grad_norm": 2.1964199277135776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499570 + }, + { + "epoch": 2.4228727177053564, + "grad_norm": 2.0717498045996763e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499580 + }, + { + "epoch": 2.4229212158981928, + "grad_norm": 2.0780746581294807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499590 + }, + { + "epoch": 2.4229697140910287, + "grad_norm": 1.9300318854220677e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499600 + }, + { + "epoch": 2.4230182122838646, + "grad_norm": 1.8747236936178524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499610 + }, + { + "epoch": 2.423066710476701, + "grad_norm": 1.912038896989543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499620 + }, + { + "epoch": 2.423115208669537, + "grad_norm": 1.6343035440513631e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499630 + }, + { + "epoch": 2.423163706862373, + "grad_norm": 1.9375204374227906e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499640 + }, + { + "epoch": 2.423212205055209, + "grad_norm": 1.7714860405249055e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499650 + }, + { + "epoch": 2.423260703248045, + "grad_norm": 1.803405780265166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499660 + }, + { + "epoch": 2.4233092014408815, + "grad_norm": 1.6965481108854874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499670 + }, + { + "epoch": 2.4233576996337174, + "grad_norm": 1.7522164625916048e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499680 + }, + { + "epoch": 2.4234061978265533, + "grad_norm": 1.7271639762839186e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499690 + }, + { + "epoch": 2.4234546960193897, + "grad_norm": 1.5256118786055595e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499700 + }, + { + "epoch": 2.4235031942122256, + "grad_norm": 1.8301176396562369e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499710 + }, + { + "epoch": 2.423551692405062, + "grad_norm": 1.5577020349155646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499720 + }, + { + "epoch": 2.423600190597898, + "grad_norm": 1.5103114492376335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499730 + }, + { + "epoch": 2.423648688790734, + "grad_norm": 1.6193043848033994e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499740 + }, + { + "epoch": 2.42369718698357, + "grad_norm": 1.3834239780408097e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499750 + }, + { + "epoch": 2.423745685176406, + "grad_norm": 1.6799526747490745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499760 + }, + { + "epoch": 2.423794183369242, + "grad_norm": 1.460960788790544e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499770 + }, + { + "epoch": 2.4238426815620784, + "grad_norm": 1.4570695157090086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499780 + }, + { + "epoch": 2.4238911797549143, + "grad_norm": 1.5128058521440835e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499790 + }, + { + "epoch": 2.4239396779477502, + "grad_norm": 1.4213901522452943e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499800 + }, + { + "epoch": 2.4239881761405866, + "grad_norm": 1.379256787004124e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499810 + }, + { + "epoch": 2.4240366743334225, + "grad_norm": 1.5615615893693757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499820 + }, + { + "epoch": 2.424085172526259, + "grad_norm": 1.3942177474746131e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499830 + }, + { + "epoch": 2.424133670719095, + "grad_norm": 1.3649888614963857e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499840 + }, + { + "epoch": 2.4241821689119307, + "grad_norm": 1.5421690022776602e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499850 + }, + { + "epoch": 2.424230667104767, + "grad_norm": 1.4572423197023454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499860 + }, + { + "epoch": 2.424279165297603, + "grad_norm": 1.330399641119584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499870 + }, + { + "epoch": 2.4243276634904394, + "grad_norm": 1.2236284874234116e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499880 + }, + { + "epoch": 2.4243761616832753, + "grad_norm": 1.3308592770044925e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499890 + }, + { + "epoch": 2.424424659876111, + "grad_norm": 1.3360495358938351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499900 + }, + { + "epoch": 2.4244731580689476, + "grad_norm": 1.2139474847572274e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499910 + }, + { + "epoch": 2.4245216562617835, + "grad_norm": 1.245767862201319e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499920 + }, + { + "epoch": 2.42457015445462, + "grad_norm": 1.181817424367182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499930 + }, + { + "epoch": 2.424618652647456, + "grad_norm": 1.280392098124139e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499940 + }, + { + "epoch": 2.4246671508402917, + "grad_norm": 1.2690212543020607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499950 + }, + { + "epoch": 2.424715649033128, + "grad_norm": 1.2466068710637046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499960 + }, + { + "epoch": 2.424764147225964, + "grad_norm": 1.380661160510499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499970 + }, + { + "epoch": 2.4248126454188, + "grad_norm": 1.2246046026120894e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499980 + }, + { + "epoch": 2.4248611436116363, + "grad_norm": 1.1896163414348848e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 499990 + }, + { + "epoch": 2.424909641804472, + "grad_norm": 1.1839232456622995e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500000 + }, + { + "epoch": 2.424958139997308, + "grad_norm": 1.214775011249003e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500010 + }, + { + "epoch": 2.4250066381901445, + "grad_norm": 1.3549027926273993e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500020 + }, + { + "epoch": 2.4250551363829804, + "grad_norm": 1.0664001592886052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500030 + }, + { + "epoch": 2.425103634575817, + "grad_norm": 1.2034780638714437e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500040 + }, + { + "epoch": 2.4251521327686527, + "grad_norm": 1.090217892851797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500050 + }, + { + "epoch": 2.4252006309614886, + "grad_norm": 1.1199173286513542e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500060 + }, + { + "epoch": 2.425249129154325, + "grad_norm": 1.0069927611766616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500070 + }, + { + "epoch": 2.425297627347161, + "grad_norm": 1.0170023188038613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500080 + }, + { + "epoch": 2.4253461255399973, + "grad_norm": 9.924527830662555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500090 + }, + { + "epoch": 2.425394623732833, + "grad_norm": 1.0629669304762501e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500100 + }, + { + "epoch": 2.425443121925669, + "grad_norm": 1.0145230362468283e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500110 + }, + { + "epoch": 2.4254916201185055, + "grad_norm": 1.0126889264938654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500120 + }, + { + "epoch": 2.4255401183113414, + "grad_norm": 1.0175110674026655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500130 + }, + { + "epoch": 2.4255886165041773, + "grad_norm": 1.7070884723580093e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500140 + }, + { + "epoch": 2.4256371146970137, + "grad_norm": 1.0074919600810972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500150 + }, + { + "epoch": 2.4256856128898496, + "grad_norm": 1.1157383141835453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500160 + }, + { + "epoch": 2.4257341110826856, + "grad_norm": 9.265868925467657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500170 + }, + { + "epoch": 2.425782609275522, + "grad_norm": 1.0550243132456671e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500180 + }, + { + "epoch": 2.425831107468358, + "grad_norm": 9.63180241342343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500190 + }, + { + "epoch": 2.425879605661194, + "grad_norm": 9.870528856481542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500200 + }, + { + "epoch": 2.42592810385403, + "grad_norm": 9.395891424901492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500210 + }, + { + "epoch": 2.425976602046866, + "grad_norm": 8.940360771703126e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500220 + }, + { + "epoch": 2.4260251002397024, + "grad_norm": 8.466274152851838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500230 + }, + { + "epoch": 2.4260735984325383, + "grad_norm": 8.545189302822109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500240 + }, + { + "epoch": 2.4261220966253747, + "grad_norm": 8.321468385474873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500250 + }, + { + "epoch": 2.4261705948182106, + "grad_norm": 8.312431418744382e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500260 + }, + { + "epoch": 2.4262190930110465, + "grad_norm": 8.383080398743914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500270 + }, + { + "epoch": 2.426267591203883, + "grad_norm": 8.276778089566506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500280 + }, + { + "epoch": 2.426316089396719, + "grad_norm": 8.758282774579129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500290 + }, + { + "epoch": 2.4263645875895548, + "grad_norm": 8.766292012296617e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500300 + }, + { + "epoch": 2.426413085782391, + "grad_norm": 8.66595144088933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500310 + }, + { + "epoch": 2.426461583975227, + "grad_norm": 7.767849297124485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500320 + }, + { + "epoch": 2.426510082168063, + "grad_norm": 8.621577762824018e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500330 + }, + { + "epoch": 2.4265585803608993, + "grad_norm": 7.736690577075933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500340 + }, + { + "epoch": 2.4266070785537353, + "grad_norm": 8.217827485168527e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500350 + }, + { + "epoch": 2.4266555767465716, + "grad_norm": 7.832946948838071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500360 + }, + { + "epoch": 2.4267040749394075, + "grad_norm": 7.499139087485673e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500370 + }, + { + "epoch": 2.4267525731322435, + "grad_norm": 7.381221962532436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500380 + }, + { + "epoch": 2.42680107132508, + "grad_norm": 7.146616098907543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500390 + }, + { + "epoch": 2.4268495695179158, + "grad_norm": 7.22692163890315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500400 + }, + { + "epoch": 2.426898067710752, + "grad_norm": 7.520981739617127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500410 + }, + { + "epoch": 2.426946565903588, + "grad_norm": 8.303429694933584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500420 + }, + { + "epoch": 2.426995064096424, + "grad_norm": 7.130033736757468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500430 + }, + { + "epoch": 2.4270435622892603, + "grad_norm": 1.0565328238953953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500440 + }, + { + "epoch": 2.4270920604820962, + "grad_norm": 7.586197057207755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500450 + }, + { + "epoch": 2.4271405586749326, + "grad_norm": 7.547222367065842e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500460 + }, + { + "epoch": 2.4271890568677685, + "grad_norm": 6.90035960815294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500470 + }, + { + "epoch": 2.4272375550606045, + "grad_norm": 8.923901759771979e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500480 + }, + { + "epoch": 2.427286053253441, + "grad_norm": 7.403871222777525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500490 + }, + { + "epoch": 2.4273345514462767, + "grad_norm": 6.415685902538826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500500 + }, + { + "epoch": 2.4273830496391127, + "grad_norm": 6.821898637099366e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500510 + }, + { + "epoch": 2.427431547831949, + "grad_norm": 6.329657367132313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500520 + }, + { + "epoch": 2.427480046024785, + "grad_norm": 5.913810809943243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500530 + }, + { + "epoch": 2.427528544217621, + "grad_norm": 5.826477149639686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500540 + }, + { + "epoch": 2.4275770424104572, + "grad_norm": 6.240038032956363e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500550 + }, + { + "epoch": 2.427625540603293, + "grad_norm": 5.866808123755618e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500560 + }, + { + "epoch": 2.4276740387961295, + "grad_norm": 6.148273996586795e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500570 + }, + { + "epoch": 2.4277225369889655, + "grad_norm": 5.902155066905834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500580 + }, + { + "epoch": 2.4277710351818014, + "grad_norm": 6.144570647848013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500590 + }, + { + "epoch": 2.4278195333746377, + "grad_norm": 5.922579475736711e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500600 + }, + { + "epoch": 2.4278680315674737, + "grad_norm": 5.708256480829732e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500610 + }, + { + "epoch": 2.42791652976031, + "grad_norm": 5.831706175740692e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500620 + }, + { + "epoch": 2.427965027953146, + "grad_norm": 6.162096042316989e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500630 + }, + { + "epoch": 2.428013526145982, + "grad_norm": 5.898768336010107e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500640 + }, + { + "epoch": 2.4280620243388182, + "grad_norm": 6.022836487318273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500650 + }, + { + "epoch": 2.428110522531654, + "grad_norm": 5.508238700713264e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500660 + }, + { + "epoch": 2.42815902072449, + "grad_norm": 5.530730504688108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500670 + }, + { + "epoch": 2.4282075189173264, + "grad_norm": 5.335392074812262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500680 + }, + { + "epoch": 2.4282560171101624, + "grad_norm": 5.732228487431712e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500690 + }, + { + "epoch": 2.4283045153029983, + "grad_norm": 6.572836923623981e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500700 + }, + { + "epoch": 2.4283530134958347, + "grad_norm": 5.046688329457538e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500710 + }, + { + "epoch": 2.4284015116886706, + "grad_norm": 5.474866497934272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500720 + }, + { + "epoch": 2.428450009881507, + "grad_norm": 5.384939072428097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500730 + }, + { + "epoch": 2.428498508074343, + "grad_norm": 5.770393727289047e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500740 + }, + { + "epoch": 2.428547006267179, + "grad_norm": 5.096647441860114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500750 + }, + { + "epoch": 2.428595504460015, + "grad_norm": 4.838570362153405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500760 + }, + { + "epoch": 2.428644002652851, + "grad_norm": 5.052648361925094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500770 + }, + { + "epoch": 2.4286925008456874, + "grad_norm": 5.181295819056686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500780 + }, + { + "epoch": 2.4287409990385234, + "grad_norm": 5.895680601497588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500790 + }, + { + "epoch": 2.4287894972313593, + "grad_norm": 5.046962314736447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500800 + }, + { + "epoch": 2.4288379954241957, + "grad_norm": 5.152006110620277e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500810 + }, + { + "epoch": 2.4288864936170316, + "grad_norm": 5.099441295897122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500820 + }, + { + "epoch": 2.4289349918098675, + "grad_norm": 4.704700415913976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500830 + }, + { + "epoch": 2.428983490002704, + "grad_norm": 5.068305881650303e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500840 + }, + { + "epoch": 2.42903198819554, + "grad_norm": 8.691273478689254e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500850 + }, + { + "epoch": 2.4290804863883757, + "grad_norm": 5.480864615492465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500860 + }, + { + "epoch": 2.429128984581212, + "grad_norm": 4.896004952570365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500870 + }, + { + "epoch": 2.429177482774048, + "grad_norm": 4.4915614694218675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500880 + }, + { + "epoch": 2.4292259809668844, + "grad_norm": 5.065233494860877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500890 + }, + { + "epoch": 2.4292744791597203, + "grad_norm": 5.204782382861595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500900 + }, + { + "epoch": 2.429322977352556, + "grad_norm": 4.7388417101501545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500910 + }, + { + "epoch": 2.4293714755453926, + "grad_norm": 4.597674205797375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500920 + }, + { + "epoch": 2.4294199737382285, + "grad_norm": 4.20895844399638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500930 + }, + { + "epoch": 2.429468471931065, + "grad_norm": 4.7370841116389784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500940 + }, + { + "epoch": 2.4295169701239008, + "grad_norm": 4.3858813114638906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500950 + }, + { + "epoch": 2.4295654683167367, + "grad_norm": 4.963578703609528e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500960 + }, + { + "epoch": 2.429613966509573, + "grad_norm": 4.3167733565496746e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500970 + }, + { + "epoch": 2.429662464702409, + "grad_norm": 4.4277416577642725e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500980 + }, + { + "epoch": 2.4297109628952454, + "grad_norm": 4.4714136038237484e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 500990 + }, + { + "epoch": 2.4297594610880813, + "grad_norm": 4.480410780161037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501000 + }, + { + "epoch": 2.429807959280917, + "grad_norm": 4.4489203787634324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501010 + }, + { + "epoch": 2.4298564574737536, + "grad_norm": 4.4957195655115356e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501020 + }, + { + "epoch": 2.4299049556665895, + "grad_norm": 4.329431817495788e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501030 + }, + { + "epoch": 2.4299534538594254, + "grad_norm": 4.2550865941848315e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501040 + }, + { + "epoch": 2.4300019520522618, + "grad_norm": 4.1439153619649005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501050 + }, + { + "epoch": 2.4300504502450977, + "grad_norm": 4.154191231009463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501060 + }, + { + "epoch": 2.4300989484379336, + "grad_norm": 4.138960605359898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501070 + }, + { + "epoch": 2.43014744663077, + "grad_norm": 4.235669450736168e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501080 + }, + { + "epoch": 2.430195944823606, + "grad_norm": 4.455275757209165e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501090 + }, + { + "epoch": 2.4302444430164423, + "grad_norm": 4.044364914079779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501100 + }, + { + "epoch": 2.430292941209278, + "grad_norm": 4.3619613165901683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501110 + }, + { + "epoch": 2.430341439402114, + "grad_norm": 4.01133036120882e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501120 + }, + { + "epoch": 2.4303899375949505, + "grad_norm": 4.018012020878814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501130 + }, + { + "epoch": 2.4304384357877864, + "grad_norm": 4.4801365106650337e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501140 + }, + { + "epoch": 2.4304869339806228, + "grad_norm": 4.627900977993704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501150 + }, + { + "epoch": 2.4305354321734587, + "grad_norm": 3.7076716807860066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501160 + }, + { + "epoch": 2.4305839303662946, + "grad_norm": 4.09638602150153e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501170 + }, + { + "epoch": 2.430632428559131, + "grad_norm": 3.817763456481771e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501180 + }, + { + "epoch": 2.430680926751967, + "grad_norm": 4.126166857076896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501190 + }, + { + "epoch": 2.430729424944803, + "grad_norm": 3.712920886300708e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501200 + }, + { + "epoch": 2.430777923137639, + "grad_norm": 3.875346408221958e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501210 + }, + { + "epoch": 2.430826421330475, + "grad_norm": 3.950645748318493e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501220 + }, + { + "epoch": 2.430874919523311, + "grad_norm": 4.4292636403042707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501230 + }, + { + "epoch": 2.4309234177161474, + "grad_norm": 3.8500135701724503e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501240 + }, + { + "epoch": 2.4309719159089833, + "grad_norm": 4.174843866167066e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501250 + }, + { + "epoch": 2.4310204141018197, + "grad_norm": 3.5167917644685076e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501260 + }, + { + "epoch": 2.4310689122946556, + "grad_norm": 3.625040108090616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501270 + }, + { + "epoch": 2.4311174104874915, + "grad_norm": 3.934141830086446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501280 + }, + { + "epoch": 2.431165908680328, + "grad_norm": 3.700696424857597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501290 + }, + { + "epoch": 2.431214406873164, + "grad_norm": 3.449625012308388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501300 + }, + { + "epoch": 2.431262905066, + "grad_norm": 3.824879115654767e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501310 + }, + { + "epoch": 2.431311403258836, + "grad_norm": 3.729355455561745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501320 + }, + { + "epoch": 2.431359901451672, + "grad_norm": 3.352742794504593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501330 + }, + { + "epoch": 2.4314083996445084, + "grad_norm": 3.698637272009364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501340 + }, + { + "epoch": 2.4314568978373443, + "grad_norm": 3.920568474313768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501350 + }, + { + "epoch": 2.4315053960301802, + "grad_norm": 3.468727243216563e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501360 + }, + { + "epoch": 2.4315538942230166, + "grad_norm": 3.1442218073607364e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501370 + }, + { + "epoch": 2.4316023924158525, + "grad_norm": 3.4398613024677616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501380 + }, + { + "epoch": 2.431650890608689, + "grad_norm": 3.547155245087197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501390 + }, + { + "epoch": 2.431699388801525, + "grad_norm": 3.5449582469482266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501400 + }, + { + "epoch": 2.4317478869943607, + "grad_norm": 3.531336858486611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501410 + }, + { + "epoch": 2.431796385187197, + "grad_norm": 3.484615263005253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501420 + }, + { + "epoch": 2.431844883380033, + "grad_norm": 3.3413041933272325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501430 + }, + { + "epoch": 2.431893381572869, + "grad_norm": 3.4054650654979923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501440 + }, + { + "epoch": 2.4319418797657053, + "grad_norm": 3.5022659972128167e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501450 + }, + { + "epoch": 2.4319903779585412, + "grad_norm": 3.061670952320128e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501460 + }, + { + "epoch": 2.4320388761513776, + "grad_norm": 3.3853362424451916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501470 + }, + { + "epoch": 2.4320873743442135, + "grad_norm": 3.2476333444719785e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501480 + }, + { + "epoch": 2.4321358725370494, + "grad_norm": 3.473423646482843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501490 + }, + { + "epoch": 2.432184370729886, + "grad_norm": 3.0104078518888855e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501500 + }, + { + "epoch": 2.4322328689227217, + "grad_norm": 3.286040168859472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501510 + }, + { + "epoch": 2.432281367115558, + "grad_norm": 3.4433620044183044e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501520 + }, + { + "epoch": 2.432329865308394, + "grad_norm": 3.0774862125326763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501530 + }, + { + "epoch": 2.43237836350123, + "grad_norm": 3.1589271998200275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501540 + }, + { + "epoch": 2.4324268616940663, + "grad_norm": 3.015444178799953e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501550 + }, + { + "epoch": 2.432475359886902, + "grad_norm": 2.9063670581308543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501560 + }, + { + "epoch": 2.432523858079738, + "grad_norm": 3.130120092009747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501570 + }, + { + "epoch": 2.4325723562725745, + "grad_norm": 3.148101995975594e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501580 + }, + { + "epoch": 2.4326208544654104, + "grad_norm": 3.174534413119545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501590 + }, + { + "epoch": 2.4326693526582464, + "grad_norm": 3.2448886599922844e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501600 + }, + { + "epoch": 2.4327178508510827, + "grad_norm": 2.7224663767810853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501610 + }, + { + "epoch": 2.4327663490439186, + "grad_norm": 2.827150069606432e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501620 + }, + { + "epoch": 2.432814847236755, + "grad_norm": 2.7436894356469566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501630 + }, + { + "epoch": 2.432863345429591, + "grad_norm": 3.0555608532267797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501640 + }, + { + "epoch": 2.432911843622427, + "grad_norm": 2.7508451694302494e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501650 + }, + { + "epoch": 2.432960341815263, + "grad_norm": 3.0638739190180786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501660 + }, + { + "epoch": 2.433008840008099, + "grad_norm": 2.695815055631101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501670 + }, + { + "epoch": 2.4330573382009355, + "grad_norm": 2.612219134334737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501680 + }, + { + "epoch": 2.4331058363937714, + "grad_norm": 2.5609824660932645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501690 + }, + { + "epoch": 2.4331543345866073, + "grad_norm": 2.828737137861026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501700 + }, + { + "epoch": 2.4332028327794437, + "grad_norm": 2.7243808631283173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501710 + }, + { + "epoch": 2.4332513309722796, + "grad_norm": 2.6567923328002507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501720 + }, + { + "epoch": 2.4332998291651156, + "grad_norm": 2.532884479933273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501730 + }, + { + "epoch": 2.433348327357952, + "grad_norm": 2.78411363296982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501740 + }, + { + "epoch": 2.433396825550788, + "grad_norm": 2.548685529291106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501750 + }, + { + "epoch": 2.4334453237436238, + "grad_norm": 2.71398448603577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501760 + }, + { + "epoch": 2.43349382193646, + "grad_norm": 2.6379746032034745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501770 + }, + { + "epoch": 2.433542320129296, + "grad_norm": 2.63023963498199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501780 + }, + { + "epoch": 2.4335908183221324, + "grad_norm": 2.60653109762643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501790 + }, + { + "epoch": 2.4336393165149683, + "grad_norm": 2.3433800322436582e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501800 + }, + { + "epoch": 2.4336878147078043, + "grad_norm": 2.561041014814691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501810 + }, + { + "epoch": 2.4337363129006406, + "grad_norm": 2.573527808635845e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501820 + }, + { + "epoch": 2.4337848110934766, + "grad_norm": 2.3818174099687894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501830 + }, + { + "epoch": 2.433833309286313, + "grad_norm": 2.800614709030924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501840 + }, + { + "epoch": 2.433881807479149, + "grad_norm": 2.2116770992397505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501850 + }, + { + "epoch": 2.4339303056719848, + "grad_norm": 2.612986804706452e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501860 + }, + { + "epoch": 2.433978803864821, + "grad_norm": 2.2308329050702014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501870 + }, + { + "epoch": 2.434027302057657, + "grad_norm": 2.1790084758777084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501880 + }, + { + "epoch": 2.4340758002504934, + "grad_norm": 2.424565366254683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501890 + }, + { + "epoch": 2.4341242984433293, + "grad_norm": 2.4422917022093316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501900 + }, + { + "epoch": 2.4341727966361653, + "grad_norm": 2.4686417532393534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501910 + }, + { + "epoch": 2.4342212948290016, + "grad_norm": 2.298274779377607e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501920 + }, + { + "epoch": 2.4342697930218375, + "grad_norm": 2.1842780029146525e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501930 + }, + { + "epoch": 2.4343182912146735, + "grad_norm": 2.203633471253852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501940 + }, + { + "epoch": 2.43436678940751, + "grad_norm": 4.4140375621282146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501950 + }, + { + "epoch": 2.4344152876003458, + "grad_norm": 2.1731466404162347e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501960 + }, + { + "epoch": 2.4344637857931817, + "grad_norm": 2.2563271784292738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501970 + }, + { + "epoch": 2.434512283986018, + "grad_norm": 2.1756687829110888e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501980 + }, + { + "epoch": 2.434560782178854, + "grad_norm": 2.2505159336105862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 501990 + }, + { + "epoch": 2.4346092803716903, + "grad_norm": 1.9485185021039797e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502000 + }, + { + "epoch": 2.4346577785645263, + "grad_norm": 2.1162591679058096e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502010 + }, + { + "epoch": 2.434706276757362, + "grad_norm": 2.1563749896813533e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502020 + }, + { + "epoch": 2.4347547749501985, + "grad_norm": 2.0822145074816945e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502030 + }, + { + "epoch": 2.4348032731430345, + "grad_norm": 1.9753436220071308e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502040 + }, + { + "epoch": 2.434851771335871, + "grad_norm": 1.9321409183703508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502050 + }, + { + "epoch": 2.4349002695287068, + "grad_norm": 2.481737908510695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502060 + }, + { + "epoch": 2.4349487677215427, + "grad_norm": 1.8962715842008038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502070 + }, + { + "epoch": 2.434997265914379, + "grad_norm": 2.264899450210578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502080 + }, + { + "epoch": 2.435045764107215, + "grad_norm": 1.8133496837435814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502090 + }, + { + "epoch": 2.435094262300051, + "grad_norm": 2.1968924102111487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502100 + }, + { + "epoch": 2.4351427604928872, + "grad_norm": 1.813440917430853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502110 + }, + { + "epoch": 2.435191258685723, + "grad_norm": 1.9333195666604297e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502120 + }, + { + "epoch": 2.435239756878559, + "grad_norm": 2.074380347494298e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502130 + }, + { + "epoch": 2.4352882550713955, + "grad_norm": 1.887464975425246e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502140 + }, + { + "epoch": 2.4353367532642314, + "grad_norm": 1.8820813352249388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502150 + }, + { + "epoch": 2.4353852514570677, + "grad_norm": 2.6369417582827737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502160 + }, + { + "epoch": 2.4354337496499037, + "grad_norm": 1.8760306375043e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502170 + }, + { + "epoch": 2.4354822478427396, + "grad_norm": 1.9352516744675086e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502180 + }, + { + "epoch": 2.435530746035576, + "grad_norm": 2.041826832055449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502190 + }, + { + "epoch": 2.435579244228412, + "grad_norm": 1.7727043655213492e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502200 + }, + { + "epoch": 2.4356277424212482, + "grad_norm": 1.868761643208927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502210 + }, + { + "epoch": 2.435676240614084, + "grad_norm": 1.7288043352436944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502220 + }, + { + "epoch": 2.43572473880692, + "grad_norm": 1.8040833538179868e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502230 + }, + { + "epoch": 2.4357732369997565, + "grad_norm": 6.171719633130124e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502240 + }, + { + "epoch": 2.4358217351925924, + "grad_norm": 1.813803578443185e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502250 + }, + { + "epoch": 2.4358702333854283, + "grad_norm": 1.7295560894581286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502260 + }, + { + "epoch": 2.4359187315782647, + "grad_norm": 1.6204087671667367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502270 + }, + { + "epoch": 2.4359672297711006, + "grad_norm": 1.639472486658633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502280 + }, + { + "epoch": 2.4360157279639365, + "grad_norm": 1.6097835953132744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502290 + }, + { + "epoch": 2.436064226156773, + "grad_norm": 1.7010326303079637e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502300 + }, + { + "epoch": 2.436112724349609, + "grad_norm": 1.661623514337407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502310 + }, + { + "epoch": 2.436161222542445, + "grad_norm": 1.7313281830411142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502320 + }, + { + "epoch": 2.436209720735281, + "grad_norm": 1.9238968889112584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502330 + }, + { + "epoch": 2.436258218928117, + "grad_norm": 1.7374698302319302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502340 + }, + { + "epoch": 2.4363067171209534, + "grad_norm": 1.7191943868510862e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502350 + }, + { + "epoch": 2.4363552153137893, + "grad_norm": 1.7904331173212995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502360 + }, + { + "epoch": 2.4364037135066257, + "grad_norm": 1.6946228242886718e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502370 + }, + { + "epoch": 2.4364522116994616, + "grad_norm": 1.4609261711484578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502380 + }, + { + "epoch": 2.4365007098922975, + "grad_norm": 1.685331056933137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502390 + }, + { + "epoch": 2.436549208085134, + "grad_norm": 1.8205075491550815e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502400 + }, + { + "epoch": 2.43659770627797, + "grad_norm": 1.5719757584520266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502410 + }, + { + "epoch": 2.436646204470806, + "grad_norm": 1.5680578258070454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502420 + }, + { + "epoch": 2.436694702663642, + "grad_norm": 1.4586207441880106e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502430 + }, + { + "epoch": 2.436743200856478, + "grad_norm": 1.5152227206272073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502440 + }, + { + "epoch": 2.4367916990493144, + "grad_norm": 1.5842820744182973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502450 + }, + { + "epoch": 2.4368401972421503, + "grad_norm": 1.5570337552617275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502460 + }, + { + "epoch": 2.436888695434986, + "grad_norm": 1.383548067224183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502470 + }, + { + "epoch": 2.4369371936278226, + "grad_norm": 1.6464724694742472e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502480 + }, + { + "epoch": 2.4369856918206585, + "grad_norm": 1.5566057243177056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502490 + }, + { + "epoch": 2.4370341900134944, + "grad_norm": 1.363116268748854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502500 + }, + { + "epoch": 2.437082688206331, + "grad_norm": 1.5192190971902164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502510 + }, + { + "epoch": 2.4371311863991667, + "grad_norm": 1.6690769655269833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502520 + }, + { + "epoch": 2.437179684592003, + "grad_norm": 1.641113982486786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502530 + }, + { + "epoch": 2.437228182784839, + "grad_norm": 1.6111354739223316e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502540 + }, + { + "epoch": 2.437276680977675, + "grad_norm": 1.4331570241665759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502550 + }, + { + "epoch": 2.4373251791705113, + "grad_norm": 4.0091816799758817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502560 + }, + { + "epoch": 2.437373677363347, + "grad_norm": 1.4969093342642736e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502570 + }, + { + "epoch": 2.4374221755561836, + "grad_norm": 1.5809236231234536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502580 + }, + { + "epoch": 2.4374706737490195, + "grad_norm": 1.4305409479220543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502590 + }, + { + "epoch": 2.4375191719418554, + "grad_norm": 1.6025214222281647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502600 + }, + { + "epoch": 2.4375676701346918, + "grad_norm": 1.407412639764516e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502610 + }, + { + "epoch": 2.4376161683275277, + "grad_norm": 1.363875270499193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502620 + }, + { + "epoch": 2.4376646665203636, + "grad_norm": 1.5954076104662818e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502630 + }, + { + "epoch": 2.4377131647132, + "grad_norm": 1.312694877242393e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502640 + }, + { + "epoch": 2.437761662906036, + "grad_norm": 1.2658334469506372e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502650 + }, + { + "epoch": 2.437810161098872, + "grad_norm": 1.460260676822145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502660 + }, + { + "epoch": 2.437858659291708, + "grad_norm": 1.3718943137064343e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502670 + }, + { + "epoch": 2.437907157484544, + "grad_norm": 1.566759948445906e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502680 + }, + { + "epoch": 2.4379556556773805, + "grad_norm": 1.1843614089457333e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502690 + }, + { + "epoch": 2.4380041538702164, + "grad_norm": 1.297063221272765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502700 + }, + { + "epoch": 2.4380526520630523, + "grad_norm": 1.619025482568759e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502710 + }, + { + "epoch": 2.4381011502558887, + "grad_norm": 1.297564153901476e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502720 + }, + { + "epoch": 2.4381496484487246, + "grad_norm": 3.5846676382789155e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502730 + }, + { + "epoch": 2.438198146641561, + "grad_norm": 1.9275930185358447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502740 + }, + { + "epoch": 2.438246644834397, + "grad_norm": 1.2947678840191656e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502750 + }, + { + "epoch": 2.438295143027233, + "grad_norm": 1.3410701171778783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502760 + }, + { + "epoch": 2.438343641220069, + "grad_norm": 2.6516696038925147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502770 + }, + { + "epoch": 2.438392139412905, + "grad_norm": 1.312765078864686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502780 + }, + { + "epoch": 2.438440637605741, + "grad_norm": 1.1926161391784262e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502790 + }, + { + "epoch": 2.4384891357985774, + "grad_norm": 1.2803400295524625e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502800 + }, + { + "epoch": 2.4385376339914133, + "grad_norm": 1.2899279511202622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502810 + }, + { + "epoch": 2.4385861321842492, + "grad_norm": 1.4261607361731876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502820 + }, + { + "epoch": 2.4386346303770856, + "grad_norm": 1.2862365394994413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502830 + }, + { + "epoch": 2.4386831285699215, + "grad_norm": 1.2478135147375724e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502840 + }, + { + "epoch": 2.438731626762758, + "grad_norm": 1.3199185389112245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502850 + }, + { + "epoch": 2.438780124955594, + "grad_norm": 1.2193592624498706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502860 + }, + { + "epoch": 2.4388286231484297, + "grad_norm": 1.7451269229695754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502870 + }, + { + "epoch": 2.438877121341266, + "grad_norm": 1.5553651167010685e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502880 + }, + { + "epoch": 2.438925619534102, + "grad_norm": 1.1981025238583243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502890 + }, + { + "epoch": 2.4389741177269384, + "grad_norm": 1.2956661521457136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502900 + }, + { + "epoch": 2.4390226159197743, + "grad_norm": 1.3021656286582584e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502910 + }, + { + "epoch": 2.4390711141126102, + "grad_norm": 1.2613988076282112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502920 + }, + { + "epoch": 2.4391196123054466, + "grad_norm": 1.191613350215448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502930 + }, + { + "epoch": 2.4391681104982825, + "grad_norm": 1.1678501010692344e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502940 + }, + { + "epoch": 2.439216608691119, + "grad_norm": 1.168929912864769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502950 + }, + { + "epoch": 2.439265106883955, + "grad_norm": 1.292607976211002e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502960 + }, + { + "epoch": 2.4393136050767907, + "grad_norm": 1.169904919606779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502970 + }, + { + "epoch": 2.439362103269627, + "grad_norm": 1.1301651881012731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502980 + }, + { + "epoch": 2.439410601462463, + "grad_norm": 1.0669042183053534e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 502990 + }, + { + "epoch": 2.439459099655299, + "grad_norm": 1.5398801167521015e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503000 + }, + { + "epoch": 2.4395075978481353, + "grad_norm": 1.2858055242759292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503010 + }, + { + "epoch": 2.4395560960409712, + "grad_norm": 1.202822943469073e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503020 + }, + { + "epoch": 2.439604594233807, + "grad_norm": 1.2763338474997e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503030 + }, + { + "epoch": 2.4396530924266435, + "grad_norm": 1.1592823767614391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503040 + }, + { + "epoch": 2.4397015906194794, + "grad_norm": 1.089687273747586e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503050 + }, + { + "epoch": 2.439750088812316, + "grad_norm": 1.1737616745222113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503060 + }, + { + "epoch": 2.4397985870051517, + "grad_norm": 1.379502236886765e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503070 + }, + { + "epoch": 2.4398470851979877, + "grad_norm": 1.1029388247152383e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503080 + }, + { + "epoch": 2.439895583390824, + "grad_norm": 9.891009966622732e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503090 + }, + { + "epoch": 2.43994408158366, + "grad_norm": 1.2007471639208234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503100 + }, + { + "epoch": 2.4399925797764963, + "grad_norm": 1.1140213018734357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503110 + }, + { + "epoch": 2.4400410779693322, + "grad_norm": 1.1712899805615962e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503120 + }, + { + "epoch": 2.440089576162168, + "grad_norm": 1.1258913445999497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503130 + }, + { + "epoch": 2.4401380743550045, + "grad_norm": 1.0256948712594749e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503140 + }, + { + "epoch": 2.4401865725478404, + "grad_norm": 9.863713756885772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503150 + }, + { + "epoch": 2.4402350707406764, + "grad_norm": 1.146144086305867e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503160 + }, + { + "epoch": 2.4402835689335127, + "grad_norm": 1.083204423935058e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503170 + }, + { + "epoch": 2.4403320671263486, + "grad_norm": 1.099384405733872e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503180 + }, + { + "epoch": 2.4403805653191846, + "grad_norm": 1.1882595885026603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503190 + }, + { + "epoch": 2.440429063512021, + "grad_norm": 1.1304237546028162e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503200 + }, + { + "epoch": 2.440477561704857, + "grad_norm": 1.1572305425033846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503210 + }, + { + "epoch": 2.4405260598976932, + "grad_norm": 1.0198925082249843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503220 + }, + { + "epoch": 2.440574558090529, + "grad_norm": 1.0042960951750501e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503230 + }, + { + "epoch": 2.440623056283365, + "grad_norm": 1.0042132458920605e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503240 + }, + { + "epoch": 2.4406715544762014, + "grad_norm": 1.0260500715730814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503250 + }, + { + "epoch": 2.4407200526690374, + "grad_norm": 1.1434382685138189e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503260 + }, + { + "epoch": 2.4407685508618737, + "grad_norm": 1.1105233710395623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503270 + }, + { + "epoch": 2.4408170490547096, + "grad_norm": 8.858905431452513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503280 + }, + { + "epoch": 2.4408655472475456, + "grad_norm": 1.3213488614383095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503290 + }, + { + "epoch": 2.440914045440382, + "grad_norm": 1.0459920929406508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503300 + }, + { + "epoch": 2.440962543633218, + "grad_norm": 1.0774587622108811e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503310 + }, + { + "epoch": 2.4410110418260538, + "grad_norm": 1.1058710214228995e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503320 + }, + { + "epoch": 2.44105954001889, + "grad_norm": 1.0421038609820243e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503330 + }, + { + "epoch": 2.441108038211726, + "grad_norm": 1.0613748457899419e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503340 + }, + { + "epoch": 2.441156536404562, + "grad_norm": 1.0237965142323446e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503350 + }, + { + "epoch": 2.4412050345973983, + "grad_norm": 1.0230228042473755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503360 + }, + { + "epoch": 2.4412535327902343, + "grad_norm": 1.0376921011356899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503370 + }, + { + "epoch": 2.4413020309830706, + "grad_norm": 1.0416061257956244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503380 + }, + { + "epoch": 2.4413505291759066, + "grad_norm": 9.891535057704459e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503390 + }, + { + "epoch": 2.4413990273687425, + "grad_norm": 9.791151001081744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503400 + }, + { + "epoch": 2.441447525561579, + "grad_norm": 1.022741002998373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503410 + }, + { + "epoch": 2.4414960237544148, + "grad_norm": 1.1335284000324464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503420 + }, + { + "epoch": 2.441544521947251, + "grad_norm": 9.665193800856287e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503430 + }, + { + "epoch": 2.441593020140087, + "grad_norm": 1.0477437939471201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503440 + }, + { + "epoch": 2.441641518332923, + "grad_norm": 1.0473447531467173e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503450 + }, + { + "epoch": 2.4416900165257593, + "grad_norm": 9.742002760049218e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503460 + }, + { + "epoch": 2.4417385147185953, + "grad_norm": 9.581553683801758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503470 + }, + { + "epoch": 2.4417870129114316, + "grad_norm": 9.901114594867977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503480 + }, + { + "epoch": 2.4418355111042676, + "grad_norm": 1.3580132929291722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503490 + }, + { + "epoch": 2.4418840092971035, + "grad_norm": 8.872640222534756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503500 + }, + { + "epoch": 2.44193250748994, + "grad_norm": 1.0195787325528727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503510 + }, + { + "epoch": 2.4419810056827758, + "grad_norm": 1.1245697351114359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503520 + }, + { + "epoch": 2.4420295038756117, + "grad_norm": 9.027705516473361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503530 + }, + { + "epoch": 2.442078002068448, + "grad_norm": 1.0550333939818302e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503540 + }, + { + "epoch": 2.442126500261284, + "grad_norm": 9.89139579132825e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503550 + }, + { + "epoch": 2.44217499845412, + "grad_norm": 9.427704128484038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503560 + }, + { + "epoch": 2.4422234966469563, + "grad_norm": 9.153672664297119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503570 + }, + { + "epoch": 2.442271994839792, + "grad_norm": 9.899899566789827e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503580 + }, + { + "epoch": 2.4423204930326285, + "grad_norm": 1.0004045947198392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503590 + }, + { + "epoch": 2.4423689912254645, + "grad_norm": 9.055192151663505e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503600 + }, + { + "epoch": 2.4424174894183004, + "grad_norm": 1.0291081764535193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503610 + }, + { + "epoch": 2.4424659876111368, + "grad_norm": 9.96291262822524e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503620 + }, + { + "epoch": 2.4425144858039727, + "grad_norm": 9.2315993072134e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503630 + }, + { + "epoch": 2.442562983996809, + "grad_norm": 9.685848567642097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503640 + }, + { + "epoch": 2.442611482189645, + "grad_norm": 9.193982464239525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503650 + }, + { + "epoch": 2.442659980382481, + "grad_norm": 9.966993985699446e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503660 + }, + { + "epoch": 2.4427084785753173, + "grad_norm": 9.045008653174591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503670 + }, + { + "epoch": 2.442756976768153, + "grad_norm": 9.949389578878254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503680 + }, + { + "epoch": 2.442805474960989, + "grad_norm": 8.735344181332039e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503690 + }, + { + "epoch": 2.4428539731538255, + "grad_norm": 1.021274300683217e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503700 + }, + { + "epoch": 2.4429024713466614, + "grad_norm": 9.321187377508977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503710 + }, + { + "epoch": 2.4429509695394973, + "grad_norm": 8.528422767994925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503720 + }, + { + "epoch": 2.4429994677323337, + "grad_norm": 9.428203640027277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503730 + }, + { + "epoch": 2.4430479659251696, + "grad_norm": 8.770917503397868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503740 + }, + { + "epoch": 2.443096464118006, + "grad_norm": 9.129340128311014e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503750 + }, + { + "epoch": 2.443144962310842, + "grad_norm": 8.633924153400585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503760 + }, + { + "epoch": 2.443193460503678, + "grad_norm": 8.859114331016826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503770 + }, + { + "epoch": 2.443241958696514, + "grad_norm": 8.982680554936451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503780 + }, + { + "epoch": 2.44329045688935, + "grad_norm": 8.560393638390451e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503790 + }, + { + "epoch": 2.4433389550821865, + "grad_norm": 8.56777262470132e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503800 + }, + { + "epoch": 2.4433874532750224, + "grad_norm": 8.59634070593529e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503810 + }, + { + "epoch": 2.4434359514678583, + "grad_norm": 8.320239430759102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503820 + }, + { + "epoch": 2.4434844496606947, + "grad_norm": 9.137565371020173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503830 + }, + { + "epoch": 2.4435329478535306, + "grad_norm": 8.35713862556986e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503840 + }, + { + "epoch": 2.4435814460463665, + "grad_norm": 8.218972169515837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503850 + }, + { + "epoch": 2.443629944239203, + "grad_norm": 3.536659676228737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503860 + }, + { + "epoch": 2.443678442432039, + "grad_norm": 8.92697542553833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503870 + }, + { + "epoch": 2.4437269406248747, + "grad_norm": 8.491501546359359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503880 + }, + { + "epoch": 2.443775438817711, + "grad_norm": 9.395962763392163e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503890 + }, + { + "epoch": 2.443823937010547, + "grad_norm": 8.444545329666653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503900 + }, + { + "epoch": 2.4438724352033834, + "grad_norm": 8.242049887030589e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503910 + }, + { + "epoch": 2.4439209333962193, + "grad_norm": 8.91141525016792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503920 + }, + { + "epoch": 2.443969431589055, + "grad_norm": 9.995642358262558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503930 + }, + { + "epoch": 2.4440179297818916, + "grad_norm": 9.79322294369922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503940 + }, + { + "epoch": 2.4440664279747275, + "grad_norm": 7.662504231120693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503950 + }, + { + "epoch": 2.444114926167564, + "grad_norm": 8.62113154198596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503960 + }, + { + "epoch": 2.4441634243604, + "grad_norm": 2.236297405033838e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503970 + }, + { + "epoch": 2.4442119225532357, + "grad_norm": 8.441764265398888e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503980 + }, + { + "epoch": 2.444260420746072, + "grad_norm": 8.366172465912314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 503990 + }, + { + "epoch": 2.444308918938908, + "grad_norm": 8.124776229578856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504000 + }, + { + "epoch": 2.4443574171317444, + "grad_norm": 7.932672474453284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504010 + }, + { + "epoch": 2.4444059153245803, + "grad_norm": 8.669223916513147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504020 + }, + { + "epoch": 2.444454413517416, + "grad_norm": 8.56252384551226e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504030 + }, + { + "epoch": 2.4445029117102526, + "grad_norm": 7.935992130114755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504040 + }, + { + "epoch": 2.4445514099030885, + "grad_norm": 9.101029974090125e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504050 + }, + { + "epoch": 2.4445999080959244, + "grad_norm": 8.549007901592631e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504060 + }, + { + "epoch": 2.444648406288761, + "grad_norm": 8.563625186752688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504070 + }, + { + "epoch": 2.4446969044815967, + "grad_norm": 8.489934799627008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504080 + }, + { + "epoch": 2.4447454026744326, + "grad_norm": 9.01204657566268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504090 + }, + { + "epoch": 2.444793900867269, + "grad_norm": 8.392554917691086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504100 + }, + { + "epoch": 2.444842399060105, + "grad_norm": 8.375773319357904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504110 + }, + { + "epoch": 2.4448908972529413, + "grad_norm": 7.964874271237932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504120 + }, + { + "epoch": 2.444939395445777, + "grad_norm": 8.375032933827242e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504130 + }, + { + "epoch": 2.444987893638613, + "grad_norm": 7.413468239292342e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504140 + }, + { + "epoch": 2.4450363918314495, + "grad_norm": 8.030646370116301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504150 + }, + { + "epoch": 2.4450848900242854, + "grad_norm": 8.208469637338567e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504160 + }, + { + "epoch": 2.445133388217122, + "grad_norm": 8.014218622065528e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504170 + }, + { + "epoch": 2.4451818864099577, + "grad_norm": 7.559723513850258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504180 + }, + { + "epoch": 2.4452303846027936, + "grad_norm": 8.965923115056285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504190 + }, + { + "epoch": 2.44527888279563, + "grad_norm": 7.788425193666626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504200 + }, + { + "epoch": 2.445327380988466, + "grad_norm": 7.399162882393284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504210 + }, + { + "epoch": 2.445375879181302, + "grad_norm": 7.566185189489261e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504220 + }, + { + "epoch": 2.445424377374138, + "grad_norm": 8.016140640165759e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504230 + }, + { + "epoch": 2.445472875566974, + "grad_norm": 8.082485436489151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504240 + }, + { + "epoch": 2.44552137375981, + "grad_norm": 8.263197770475017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504250 + }, + { + "epoch": 2.4455698719526464, + "grad_norm": 7.370110211013525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504260 + }, + { + "epoch": 2.4456183701454823, + "grad_norm": 7.865623530278754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504270 + }, + { + "epoch": 2.4456668683383187, + "grad_norm": 7.35680814045736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504280 + }, + { + "epoch": 2.4457153665311546, + "grad_norm": 7.39648058356579e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504290 + }, + { + "epoch": 2.4457638647239905, + "grad_norm": 7.938568558074621e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504300 + }, + { + "epoch": 2.445812362916827, + "grad_norm": 7.71859873793801e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504310 + }, + { + "epoch": 2.445860861109663, + "grad_norm": 7.662183776346865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504320 + }, + { + "epoch": 2.445909359302499, + "grad_norm": 7.531819790074223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504330 + }, + { + "epoch": 2.445957857495335, + "grad_norm": 6.995612977789278e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504340 + }, + { + "epoch": 2.446006355688171, + "grad_norm": 7.390891454406301e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504350 + }, + { + "epoch": 2.4460548538810074, + "grad_norm": 8.005512341924259e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504360 + }, + { + "epoch": 2.4461033520738433, + "grad_norm": 7.321938255699934e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504370 + }, + { + "epoch": 2.4461518502666793, + "grad_norm": 1.0286198914855049e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504380 + }, + { + "epoch": 2.4462003484595156, + "grad_norm": 7.761446596532551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504390 + }, + { + "epoch": 2.4462488466523515, + "grad_norm": 1.0399079286571578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504400 + }, + { + "epoch": 2.4462973448451875, + "grad_norm": 7.963762982399203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504410 + }, + { + "epoch": 2.446345843038024, + "grad_norm": 7.183484740380663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504420 + }, + { + "epoch": 2.4463943412308597, + "grad_norm": 7.138980606669065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504430 + }, + { + "epoch": 2.446442839423696, + "grad_norm": 6.869596802516753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504440 + }, + { + "epoch": 2.446491337616532, + "grad_norm": 7.876063534695277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504450 + }, + { + "epoch": 2.446539835809368, + "grad_norm": 6.93207127255846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504460 + }, + { + "epoch": 2.4465883340022043, + "grad_norm": 7.514640287809016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504470 + }, + { + "epoch": 2.4466368321950402, + "grad_norm": 7.491431119888148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504480 + }, + { + "epoch": 2.4466853303878766, + "grad_norm": 7.164231163869772e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504490 + }, + { + "epoch": 2.4467338285807125, + "grad_norm": 7.338030627579428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504500 + }, + { + "epoch": 2.4467823267735485, + "grad_norm": 6.975671595910171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504510 + }, + { + "epoch": 2.446830824966385, + "grad_norm": 7.366755028215266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504520 + }, + { + "epoch": 2.4468793231592207, + "grad_norm": 7.03507225807698e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504530 + }, + { + "epoch": 2.446927821352057, + "grad_norm": 7.384070244143004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504540 + }, + { + "epoch": 2.446976319544893, + "grad_norm": 7.024376458275583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504550 + }, + { + "epoch": 2.447024817737729, + "grad_norm": 7.184362260659327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504560 + }, + { + "epoch": 2.4470733159305653, + "grad_norm": 7.304521432160982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504570 + }, + { + "epoch": 2.4471218141234012, + "grad_norm": 7.578731242574577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504580 + }, + { + "epoch": 2.447170312316237, + "grad_norm": 7.472808505326611e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504590 + }, + { + "epoch": 2.4472188105090735, + "grad_norm": 7.057730755377634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504600 + }, + { + "epoch": 2.4472673087019094, + "grad_norm": 7.123596645897123e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504610 + }, + { + "epoch": 2.4473158068947454, + "grad_norm": 6.90907029365917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504620 + }, + { + "epoch": 2.4473643050875817, + "grad_norm": 7.416814185035037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504630 + }, + { + "epoch": 2.4474128032804177, + "grad_norm": 6.672055263834409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504640 + }, + { + "epoch": 2.447461301473254, + "grad_norm": 6.316744105561156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504650 + }, + { + "epoch": 2.44750979966609, + "grad_norm": 7.395995282877266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504660 + }, + { + "epoch": 2.447558297858926, + "grad_norm": 7.082812913949965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504670 + }, + { + "epoch": 2.4476067960517622, + "grad_norm": 6.80968241795199e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504680 + }, + { + "epoch": 2.447655294244598, + "grad_norm": 6.598853019568196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504690 + }, + { + "epoch": 2.4477037924374345, + "grad_norm": 6.700611265841872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504700 + }, + { + "epoch": 2.4477522906302704, + "grad_norm": 6.740583557984792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504710 + }, + { + "epoch": 2.4478007888231064, + "grad_norm": 6.767147198161183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504720 + }, + { + "epoch": 2.4478492870159427, + "grad_norm": 7.102108412482266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504730 + }, + { + "epoch": 2.4478977852087787, + "grad_norm": 1.5844591416680487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504740 + }, + { + "epoch": 2.4479462834016146, + "grad_norm": 7.206794805370009e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504750 + }, + { + "epoch": 2.447994781594451, + "grad_norm": 2.0134937130933395e-06, + "learning_rate": 0.0002, + "loss": 0.0012, + "step": 504760 + }, + { + "epoch": 2.448043279787287, + "grad_norm": 0.00020528539607767016, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504770 + }, + { + "epoch": 2.448091777980123, + "grad_norm": 2.534197483328171e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504780 + }, + { + "epoch": 2.448140276172959, + "grad_norm": 1.3013886018597987e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504790 + }, + { + "epoch": 2.448188774365795, + "grad_norm": 8.252006409747992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504800 + }, + { + "epoch": 2.4482372725586314, + "grad_norm": 5.609079380519688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504810 + }, + { + "epoch": 2.4482857707514674, + "grad_norm": 4.824397819902515e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504820 + }, + { + "epoch": 2.4483342689443033, + "grad_norm": 4.6330602344824e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504830 + }, + { + "epoch": 2.4483827671371396, + "grad_norm": 3.905436187778832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504840 + }, + { + "epoch": 2.4484312653299756, + "grad_norm": 3.5889618175133364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504850 + }, + { + "epoch": 2.448479763522812, + "grad_norm": 3.3534238355059642e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504860 + }, + { + "epoch": 2.448528261715648, + "grad_norm": 3.065338205487933e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504870 + }, + { + "epoch": 2.448576759908484, + "grad_norm": 2.564090436862898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504880 + }, + { + "epoch": 2.44862525810132, + "grad_norm": 2.730091864577844e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504890 + }, + { + "epoch": 2.448673756294156, + "grad_norm": 2.587360086181434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504900 + }, + { + "epoch": 2.448722254486992, + "grad_norm": 2.4295209186675493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504910 + }, + { + "epoch": 2.4487707526798284, + "grad_norm": 2.2544138573721284e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504920 + }, + { + "epoch": 2.4488192508726643, + "grad_norm": 1.7451210396757233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504930 + }, + { + "epoch": 2.4488677490655, + "grad_norm": 2.055018967439537e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504940 + }, + { + "epoch": 2.4489162472583366, + "grad_norm": 1.7845096635937807e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504950 + }, + { + "epoch": 2.4489647454511725, + "grad_norm": 1.7153596445496078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504960 + }, + { + "epoch": 2.449013243644009, + "grad_norm": 1.620290390746959e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504970 + }, + { + "epoch": 2.4490617418368448, + "grad_norm": 1.311164055550762e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504980 + }, + { + "epoch": 2.4491102400296807, + "grad_norm": 1.4198602684700745e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 504990 + }, + { + "epoch": 2.449158738222517, + "grad_norm": 1.3588162346422905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505000 + }, + { + "epoch": 2.449207236415353, + "grad_norm": 1.3453501424010028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505010 + }, + { + "epoch": 2.4492557346081893, + "grad_norm": 1.2609864370460855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505020 + }, + { + "epoch": 2.4493042328010253, + "grad_norm": 1.0522315960770356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505030 + }, + { + "epoch": 2.449352730993861, + "grad_norm": 1.1570023161766585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505040 + }, + { + "epoch": 2.4494012291866976, + "grad_norm": 1.141886286859517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505050 + }, + { + "epoch": 2.4494497273795335, + "grad_norm": 1.086783413484227e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505060 + }, + { + "epoch": 2.44949822557237, + "grad_norm": 1.0373690884080133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505070 + }, + { + "epoch": 2.4495467237652058, + "grad_norm": 9.292859886045335e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505080 + }, + { + "epoch": 2.4495952219580417, + "grad_norm": 9.850760989138507e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505090 + }, + { + "epoch": 2.449643720150878, + "grad_norm": 9.555210453982e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505100 + }, + { + "epoch": 2.449692218343714, + "grad_norm": 9.302107173425611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505110 + }, + { + "epoch": 2.44974071653655, + "grad_norm": 9.291713354286912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505120 + }, + { + "epoch": 2.4497892147293863, + "grad_norm": 8.177028689715371e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505130 + }, + { + "epoch": 2.449837712922222, + "grad_norm": 8.783387102084816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505140 + }, + { + "epoch": 2.449886211115058, + "grad_norm": 8.72420287123532e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505150 + }, + { + "epoch": 2.4499347093078945, + "grad_norm": 8.5341872591016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505160 + }, + { + "epoch": 2.4499832075007304, + "grad_norm": 8.059806759774801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505170 + }, + { + "epoch": 2.4500317056935668, + "grad_norm": 7.183829779933149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505180 + }, + { + "epoch": 2.4500802038864027, + "grad_norm": 7.957151524351502e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505190 + }, + { + "epoch": 2.4501287020792386, + "grad_norm": 7.754393891445943e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505200 + }, + { + "epoch": 2.450177200272075, + "grad_norm": 7.507405825890601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505210 + }, + { + "epoch": 2.450225698464911, + "grad_norm": 7.422678436341812e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505220 + }, + { + "epoch": 2.4502741966577473, + "grad_norm": 6.51802452011907e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505230 + }, + { + "epoch": 2.450322694850583, + "grad_norm": 7.257924039549835e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505240 + }, + { + "epoch": 2.450371193043419, + "grad_norm": 6.933121881047555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505250 + }, + { + "epoch": 2.4504196912362555, + "grad_norm": 6.795202693865576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505260 + }, + { + "epoch": 2.4504681894290914, + "grad_norm": 6.898403626109939e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505270 + }, + { + "epoch": 2.4505166876219273, + "grad_norm": 6.077974603613256e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505280 + }, + { + "epoch": 2.4505651858147637, + "grad_norm": 6.65459538140567e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505290 + }, + { + "epoch": 2.4506136840075996, + "grad_norm": 6.405705335055245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505300 + }, + { + "epoch": 2.4506621822004355, + "grad_norm": 6.59698912386375e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505310 + }, + { + "epoch": 2.450710680393272, + "grad_norm": 7.195968692030874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505320 + }, + { + "epoch": 2.450759178586108, + "grad_norm": 5.706091883439512e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505330 + }, + { + "epoch": 2.450807676778944, + "grad_norm": 5.991084890411003e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505340 + }, + { + "epoch": 2.45085617497178, + "grad_norm": 6.010327524563763e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505350 + }, + { + "epoch": 2.450904673164616, + "grad_norm": 5.943640530858829e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505360 + }, + { + "epoch": 2.4509531713574524, + "grad_norm": 5.84352562782442e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505370 + }, + { + "epoch": 2.4510016695502883, + "grad_norm": 5.356102974474197e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505380 + }, + { + "epoch": 2.4510501677431247, + "grad_norm": 5.769060180682573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505390 + }, + { + "epoch": 2.4510986659359606, + "grad_norm": 5.730332759412704e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505400 + }, + { + "epoch": 2.4511471641287965, + "grad_norm": 5.631575277220691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505410 + }, + { + "epoch": 2.451195662321633, + "grad_norm": 5.539592962122697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505420 + }, + { + "epoch": 2.451244160514469, + "grad_norm": 4.992141953152895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505430 + }, + { + "epoch": 2.4512926587073047, + "grad_norm": 5.209132609707012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505440 + }, + { + "epoch": 2.451341156900141, + "grad_norm": 5.27451561538328e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505450 + }, + { + "epoch": 2.451389655092977, + "grad_norm": 5.154086011316394e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505460 + }, + { + "epoch": 2.4514381532858134, + "grad_norm": 5.209592472965596e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505470 + }, + { + "epoch": 2.4514866514786493, + "grad_norm": 4.503491766172374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505480 + }, + { + "epoch": 2.4515351496714852, + "grad_norm": 5.028145437790954e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505490 + }, + { + "epoch": 2.4515836478643216, + "grad_norm": 4.875648187407933e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505500 + }, + { + "epoch": 2.4516321460571575, + "grad_norm": 4.817217131858342e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505510 + }, + { + "epoch": 2.4516806442499934, + "grad_norm": 4.653084317851608e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505520 + }, + { + "epoch": 2.45172914244283, + "grad_norm": 3.95921688323142e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505530 + }, + { + "epoch": 2.4517776406356657, + "grad_norm": 4.715310240044346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505540 + }, + { + "epoch": 2.451826138828502, + "grad_norm": 4.717174419965886e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505550 + }, + { + "epoch": 2.451874637021338, + "grad_norm": 4.4600426463148324e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505560 + }, + { + "epoch": 2.451923135214174, + "grad_norm": 1.1571247569008847e-06, + "learning_rate": 0.0002, + "loss": 0.0099, + "step": 505570 + }, + { + "epoch": 2.4519716334070103, + "grad_norm": 3.273257107139216e-06, + "learning_rate": 0.0002, + "loss": 0.0014, + "step": 505580 + }, + { + "epoch": 2.452020131599846, + "grad_norm": 0.2067042738199234, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 505590 + }, + { + "epoch": 2.4520686297926826, + "grad_norm": 3.8909816794330254e-05, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 505600 + }, + { + "epoch": 2.4521171279855185, + "grad_norm": 0.0006512180552817881, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 505610 + }, + { + "epoch": 2.4521656261783544, + "grad_norm": 2.5876672225422226e-05, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 505620 + }, + { + "epoch": 2.452214124371191, + "grad_norm": 3.07480149785988e-05, + "learning_rate": 0.0002, + "loss": 0.0009, + "step": 505630 + }, + { + "epoch": 2.4522626225640267, + "grad_norm": 0.00023914971097838134, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505640 + }, + { + "epoch": 2.4523111207568626, + "grad_norm": 8.524022268829867e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505650 + }, + { + "epoch": 2.452359618949699, + "grad_norm": 5.057160524302162e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505660 + }, + { + "epoch": 2.452408117142535, + "grad_norm": 3.962645132560283e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505670 + }, + { + "epoch": 2.452456615335371, + "grad_norm": 3.140367698506452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505680 + }, + { + "epoch": 2.452505113528207, + "grad_norm": 2.7561665774555877e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505690 + }, + { + "epoch": 2.452553611721043, + "grad_norm": 3.0632079869974405e-05, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 505700 + }, + { + "epoch": 2.4526021099138795, + "grad_norm": 4.33542845712509e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505710 + }, + { + "epoch": 2.4526506081067154, + "grad_norm": 3.554453360266052e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505720 + }, + { + "epoch": 2.4526991062995513, + "grad_norm": 3.271200694143772e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505730 + }, + { + "epoch": 2.4527476044923877, + "grad_norm": 2.3690834495937452e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505740 + }, + { + "epoch": 2.4527961026852236, + "grad_norm": 2.1163856217754073e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505750 + }, + { + "epoch": 2.45284460087806, + "grad_norm": 2.0881270756945014e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505760 + }, + { + "epoch": 2.452893099070896, + "grad_norm": 1.8541044482844882e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505770 + }, + { + "epoch": 2.452941597263732, + "grad_norm": 1.3604989362647757e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505780 + }, + { + "epoch": 2.452990095456568, + "grad_norm": 1.5635761883459054e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505790 + }, + { + "epoch": 2.453038593649404, + "grad_norm": 1.708056151983328e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505800 + }, + { + "epoch": 2.45308709184224, + "grad_norm": 1.2494419024733361e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505810 + }, + { + "epoch": 2.4531355900350764, + "grad_norm": 1.3112524356984068e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505820 + }, + { + "epoch": 2.4531840882279123, + "grad_norm": 9.520258572592866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505830 + }, + { + "epoch": 2.4532325864207483, + "grad_norm": 1.0982223102473654e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505840 + }, + { + "epoch": 2.4532810846135846, + "grad_norm": 1.0537534762988798e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505850 + }, + { + "epoch": 2.4533295828064206, + "grad_norm": 1.0769943401101045e-05, + "learning_rate": 0.0002, + "loss": 0.0007, + "step": 505860 + }, + { + "epoch": 2.453378080999257, + "grad_norm": 3.665210533654317e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505870 + }, + { + "epoch": 2.453426579192093, + "grad_norm": 0.0002317190374014899, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505880 + }, + { + "epoch": 2.4534750773849288, + "grad_norm": 2.8013477276545018e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505890 + }, + { + "epoch": 2.453523575577765, + "grad_norm": 2.480700277374126e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505900 + }, + { + "epoch": 2.453572073770601, + "grad_norm": 1.7158603441203013e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505910 + }, + { + "epoch": 2.4536205719634374, + "grad_norm": 1.6405632777605206e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505920 + }, + { + "epoch": 2.4536690701562733, + "grad_norm": 1.9070759663009085e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505930 + }, + { + "epoch": 2.4537175683491093, + "grad_norm": 1.2557033187476918e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505940 + }, + { + "epoch": 2.4537660665419456, + "grad_norm": 1.197475467051845e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505950 + }, + { + "epoch": 2.4538145647347815, + "grad_norm": 1.2384761248540599e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505960 + }, + { + "epoch": 2.4538630629276175, + "grad_norm": 1.1730900041584391e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505970 + }, + { + "epoch": 2.453911561120454, + "grad_norm": 1.2768678971042391e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505980 + }, + { + "epoch": 2.4539600593132898, + "grad_norm": 9.302391845267266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 505990 + }, + { + "epoch": 2.454008557506126, + "grad_norm": 9.393797881784849e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506000 + }, + { + "epoch": 2.454057055698962, + "grad_norm": 8.390519724343903e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506010 + }, + { + "epoch": 2.454105553891798, + "grad_norm": 8.61007265484659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506020 + }, + { + "epoch": 2.4541540520846343, + "grad_norm": 8.708285349712241e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506030 + }, + { + "epoch": 2.4542025502774703, + "grad_norm": 7.1915033004188444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506040 + }, + { + "epoch": 2.454251048470306, + "grad_norm": 7.54647498979466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506050 + }, + { + "epoch": 2.4542995466631425, + "grad_norm": 6.63987748339423e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506060 + }, + { + "epoch": 2.4543480448559785, + "grad_norm": 8.87483747646911e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506070 + }, + { + "epoch": 2.454396543048815, + "grad_norm": 7.3740411608014256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506080 + }, + { + "epoch": 2.4544450412416507, + "grad_norm": 6.236049102881225e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 506090 + }, + { + "epoch": 2.4544935394344867, + "grad_norm": 9.992799277824815e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506100 + }, + { + "epoch": 2.454542037627323, + "grad_norm": 1.1082708624599036e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506110 + }, + { + "epoch": 2.454590535820159, + "grad_norm": 7.565673513454385e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506120 + }, + { + "epoch": 2.4546390340129953, + "grad_norm": 6.326924449240323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506130 + }, + { + "epoch": 2.4546875322058312, + "grad_norm": 6.5605972849880345e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506140 + }, + { + "epoch": 2.454736030398667, + "grad_norm": 6.314403435681015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506150 + }, + { + "epoch": 2.4547845285915035, + "grad_norm": 6.384329026332125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506160 + }, + { + "epoch": 2.4548330267843395, + "grad_norm": 6.034993475623196e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506170 + }, + { + "epoch": 2.4548815249771754, + "grad_norm": 6.428484084608499e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506180 + }, + { + "epoch": 2.4549300231700117, + "grad_norm": 5.921914180362364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506190 + }, + { + "epoch": 2.4549785213628477, + "grad_norm": 5.4822053243697155e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506200 + }, + { + "epoch": 2.4550270195556836, + "grad_norm": 5.464312835101737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506210 + }, + { + "epoch": 2.45507551774852, + "grad_norm": 5.339303243090399e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506220 + }, + { + "epoch": 2.455124015941356, + "grad_norm": 5.358932412491413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506230 + }, + { + "epoch": 2.4551725141341922, + "grad_norm": 4.9934001253859606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506240 + }, + { + "epoch": 2.455221012327028, + "grad_norm": 5.261576916382182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506250 + }, + { + "epoch": 2.455269510519864, + "grad_norm": 7.212325272121234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506260 + }, + { + "epoch": 2.4553180087127005, + "grad_norm": 4.791916126123397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506270 + }, + { + "epoch": 2.4553665069055364, + "grad_norm": 5.57998873773613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506280 + }, + { + "epoch": 2.4554150050983727, + "grad_norm": 9.51254514802713e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506290 + }, + { + "epoch": 2.4554635032912087, + "grad_norm": 4.843910573981702e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506300 + }, + { + "epoch": 2.4555120014840446, + "grad_norm": 4.625362635124475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506310 + }, + { + "epoch": 2.455560499676881, + "grad_norm": 4.414162503962871e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506320 + }, + { + "epoch": 2.455608997869717, + "grad_norm": 3.73452962776355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506330 + }, + { + "epoch": 2.455657496062553, + "grad_norm": 4.1199568840966094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506340 + }, + { + "epoch": 2.455705994255389, + "grad_norm": 4.0359532249567565e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506350 + }, + { + "epoch": 2.455754492448225, + "grad_norm": 3.7337113099056296e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506360 + }, + { + "epoch": 2.455802990641061, + "grad_norm": 4.247561264492106e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506370 + }, + { + "epoch": 2.4558514888338974, + "grad_norm": 4.170757165411487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506380 + }, + { + "epoch": 2.4558999870267333, + "grad_norm": 3.836870291706873e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506390 + }, + { + "epoch": 2.4559484852195697, + "grad_norm": 3.7857967072341125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506400 + }, + { + "epoch": 2.4559969834124056, + "grad_norm": 3.662281642391463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506410 + }, + { + "epoch": 2.4560454816052415, + "grad_norm": 3.596515398385236e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506420 + }, + { + "epoch": 2.456093979798078, + "grad_norm": 3.1989145554689458e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506430 + }, + { + "epoch": 2.456142477990914, + "grad_norm": 4.249634002917446e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506440 + }, + { + "epoch": 2.45619097618375, + "grad_norm": 3.261943220422836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506450 + }, + { + "epoch": 2.456239474376586, + "grad_norm": 3.221129190933425e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506460 + }, + { + "epoch": 2.456287972569422, + "grad_norm": 3.511762770358473e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506470 + }, + { + "epoch": 2.4563364707622584, + "grad_norm": 2.8156916869193083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506480 + }, + { + "epoch": 2.4563849689550943, + "grad_norm": 3.854127953673014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506490 + }, + { + "epoch": 2.4564334671479306, + "grad_norm": 5.967721335764509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506500 + }, + { + "epoch": 2.4564819653407666, + "grad_norm": 1.0556194865785073e-05, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 506510 + }, + { + "epoch": 2.4565304635336025, + "grad_norm": 3.62085884262342e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506520 + }, + { + "epoch": 2.456578961726439, + "grad_norm": 1.3648073036165442e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506530 + }, + { + "epoch": 2.456627459919275, + "grad_norm": 2.172416316170711e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506540 + }, + { + "epoch": 2.4566759581121107, + "grad_norm": 3.297761577414349e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506550 + }, + { + "epoch": 2.456724456304947, + "grad_norm": 1.5117174370971043e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506560 + }, + { + "epoch": 2.456772954497783, + "grad_norm": 1.8248369087814353e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506570 + }, + { + "epoch": 2.456821452690619, + "grad_norm": 8.704808351467364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506580 + }, + { + "epoch": 2.4568699508834553, + "grad_norm": 1.1170693142048549e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506590 + }, + { + "epoch": 2.456918449076291, + "grad_norm": 0.0002406842540949583, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506600 + }, + { + "epoch": 2.4569669472691276, + "grad_norm": 1.2031859114358667e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506610 + }, + { + "epoch": 2.4570154454619635, + "grad_norm": 1.2689307368418667e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506620 + }, + { + "epoch": 2.4570639436547994, + "grad_norm": 6.9236716626619454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506630 + }, + { + "epoch": 2.4571124418476358, + "grad_norm": 8.72414602781646e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506640 + }, + { + "epoch": 2.4571609400404717, + "grad_norm": 1.2592338862305041e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506650 + }, + { + "epoch": 2.457209438233308, + "grad_norm": 1.3508053598343395e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506660 + }, + { + "epoch": 2.457257936426144, + "grad_norm": 8.145751053234562e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506670 + }, + { + "epoch": 2.45730643461898, + "grad_norm": 8.470188731735107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506680 + }, + { + "epoch": 2.4573549328118163, + "grad_norm": 7.386395736830309e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506690 + }, + { + "epoch": 2.457403431004652, + "grad_norm": 7.2931097747641616e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506700 + }, + { + "epoch": 2.457451929197488, + "grad_norm": 7.894783266237937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506710 + }, + { + "epoch": 2.4575004273903245, + "grad_norm": 6.263429440878099e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506720 + }, + { + "epoch": 2.4575489255831604, + "grad_norm": 4.66874371340964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506730 + }, + { + "epoch": 2.4575974237759963, + "grad_norm": 6.028834832250141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506740 + }, + { + "epoch": 2.4576459219688327, + "grad_norm": 5.215532837610226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506750 + }, + { + "epoch": 2.4576944201616686, + "grad_norm": 5.0119369916501455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506760 + }, + { + "epoch": 2.457742918354505, + "grad_norm": 6.983143066463526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506770 + }, + { + "epoch": 2.457791416547341, + "grad_norm": 3.6458120575844077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506780 + }, + { + "epoch": 2.457839914740177, + "grad_norm": 4.972975148120895e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506790 + }, + { + "epoch": 2.457888412933013, + "grad_norm": 4.125899977225345e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506800 + }, + { + "epoch": 2.457936911125849, + "grad_norm": 4.348799393483205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506810 + }, + { + "epoch": 2.4579854093186855, + "grad_norm": 4.561792593449354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506820 + }, + { + "epoch": 2.4580339075115214, + "grad_norm": 3.523584155118442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506830 + }, + { + "epoch": 2.4580824057043573, + "grad_norm": 3.7813961171195842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506840 + }, + { + "epoch": 2.4581309038971937, + "grad_norm": 4.992356934963027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506850 + }, + { + "epoch": 2.4581794020900296, + "grad_norm": 3.837812073470559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506860 + }, + { + "epoch": 2.4582279002828655, + "grad_norm": 3.3675198665150674e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506870 + }, + { + "epoch": 2.458276398475702, + "grad_norm": 2.667529770405963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506880 + }, + { + "epoch": 2.458324896668538, + "grad_norm": 3.411958005017368e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506890 + }, + { + "epoch": 2.4583733948613737, + "grad_norm": 3.77819537789037e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506900 + }, + { + "epoch": 2.45842189305421, + "grad_norm": 3.1809111078473506e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506910 + }, + { + "epoch": 2.458470391247046, + "grad_norm": 3.8644971027679276e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506920 + }, + { + "epoch": 2.4585188894398824, + "grad_norm": 2.2897775124874897e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506930 + }, + { + "epoch": 2.4585673876327183, + "grad_norm": 3.0090764084889088e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506940 + }, + { + "epoch": 2.4586158858255542, + "grad_norm": 3.0720684662810527e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506950 + }, + { + "epoch": 2.4586643840183906, + "grad_norm": 3.1563040465698577e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506960 + }, + { + "epoch": 2.4587128822112265, + "grad_norm": 4.046352842124179e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506970 + }, + { + "epoch": 2.458761380404063, + "grad_norm": 2.3178085939434823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506980 + }, + { + "epoch": 2.458809878596899, + "grad_norm": 2.7153485007147538e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 506990 + }, + { + "epoch": 2.4588583767897347, + "grad_norm": 2.7139797111885855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507000 + }, + { + "epoch": 2.458906874982571, + "grad_norm": 3.323985765746329e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507010 + }, + { + "epoch": 2.458955373175407, + "grad_norm": 3.1163167477643583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507020 + }, + { + "epoch": 2.4590038713682434, + "grad_norm": 1.950674686668208e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507030 + }, + { + "epoch": 2.4590523695610793, + "grad_norm": 6.447266787290573e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507040 + }, + { + "epoch": 2.4591008677539152, + "grad_norm": 0.690491795539856, + "learning_rate": 0.0002, + "loss": 0.0974, + "step": 507050 + }, + { + "epoch": 2.4591493659467516, + "grad_norm": 0.00039185129571706057, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 507060 + }, + { + "epoch": 2.4591978641395875, + "grad_norm": 0.002441474236547947, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 507070 + }, + { + "epoch": 2.4592463623324234, + "grad_norm": 0.0002003928821068257, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507080 + }, + { + "epoch": 2.45929486052526, + "grad_norm": 0.0001698113774182275, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507090 + }, + { + "epoch": 2.4593433587180957, + "grad_norm": 0.00014659501903224736, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507100 + }, + { + "epoch": 2.4593918569109317, + "grad_norm": 0.0001314845576416701, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507110 + }, + { + "epoch": 2.459440355103768, + "grad_norm": 0.00011871405877172947, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507120 + }, + { + "epoch": 2.459488853296604, + "grad_norm": 9.916810085996985e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507130 + }, + { + "epoch": 2.4595373514894403, + "grad_norm": 9.72025009104982e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507140 + }, + { + "epoch": 2.4595858496822762, + "grad_norm": 9.109066013479605e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507150 + }, + { + "epoch": 2.459634347875112, + "grad_norm": 8.293550490634516e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507160 + }, + { + "epoch": 2.4596828460679485, + "grad_norm": 7.896091119619086e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507170 + }, + { + "epoch": 2.4597313442607844, + "grad_norm": 6.791391206206754e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507180 + }, + { + "epoch": 2.459779842453621, + "grad_norm": 6.882278103148565e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507190 + }, + { + "epoch": 2.4598283406464567, + "grad_norm": 6.545658834511414e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507200 + }, + { + "epoch": 2.4598768388392926, + "grad_norm": 6.202999065862969e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507210 + }, + { + "epoch": 2.459925337032129, + "grad_norm": 5.9050144045613706e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507220 + }, + { + "epoch": 2.459973835224965, + "grad_norm": 5.0845632358687e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507230 + }, + { + "epoch": 2.460022333417801, + "grad_norm": 5.4683925554854795e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507240 + }, + { + "epoch": 2.460070831610637, + "grad_norm": 5.108227196615189e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507250 + }, + { + "epoch": 2.460119329803473, + "grad_norm": 5.024858910473995e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507260 + }, + { + "epoch": 2.460167827996309, + "grad_norm": 4.7367644583573565e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507270 + }, + { + "epoch": 2.4602163261891454, + "grad_norm": 4.103035826119594e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507280 + }, + { + "epoch": 2.4602648243819814, + "grad_norm": 4.3524389184312895e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507290 + }, + { + "epoch": 2.4603133225748177, + "grad_norm": 4.3150856072315946e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507300 + }, + { + "epoch": 2.4603618207676536, + "grad_norm": 4.3012096284655854e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507310 + }, + { + "epoch": 2.4604103189604896, + "grad_norm": 3.9072856452548876e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507320 + }, + { + "epoch": 2.460458817153326, + "grad_norm": 3.4109998523490503e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507330 + }, + { + "epoch": 2.460507315346162, + "grad_norm": 3.6185490898787975e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507340 + }, + { + "epoch": 2.460555813538998, + "grad_norm": 3.5629040212370455e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507350 + }, + { + "epoch": 2.460604311731834, + "grad_norm": 3.4274402423761785e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507360 + }, + { + "epoch": 2.46065280992467, + "grad_norm": 3.3118481951532885e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507370 + }, + { + "epoch": 2.4607013081175064, + "grad_norm": 2.850277269317303e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507380 + }, + { + "epoch": 2.4607498063103423, + "grad_norm": 3.1072257115738466e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507390 + }, + { + "epoch": 2.4607983045031783, + "grad_norm": 3.045467565243598e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507400 + }, + { + "epoch": 2.4608468026960146, + "grad_norm": 2.996463081217371e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507410 + }, + { + "epoch": 2.4608953008888506, + "grad_norm": 2.8930046028108336e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507420 + }, + { + "epoch": 2.4609437990816865, + "grad_norm": 2.5069239200092852e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507430 + }, + { + "epoch": 2.460992297274523, + "grad_norm": 2.8056459996150807e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507440 + }, + { + "epoch": 2.4610407954673588, + "grad_norm": 2.6255551347276196e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507450 + }, + { + "epoch": 2.461089293660195, + "grad_norm": 2.5974381060223095e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507460 + }, + { + "epoch": 2.461137791853031, + "grad_norm": 2.546753421484027e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507470 + }, + { + "epoch": 2.461186290045867, + "grad_norm": 2.164727084164042e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507480 + }, + { + "epoch": 2.4612347882387033, + "grad_norm": 2.417053656245116e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507490 + }, + { + "epoch": 2.4612832864315393, + "grad_norm": 2.3791095372871496e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507500 + }, + { + "epoch": 2.4613317846243756, + "grad_norm": 2.3411965230479836e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507510 + }, + { + "epoch": 2.4613802828172116, + "grad_norm": 2.25375042646192e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507520 + }, + { + "epoch": 2.4614287810100475, + "grad_norm": 1.9387818610994145e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507530 + }, + { + "epoch": 2.461477279202884, + "grad_norm": 2.1567939256783575e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507540 + }, + { + "epoch": 2.4615257773957198, + "grad_norm": 2.1055479010101408e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507550 + }, + { + "epoch": 2.461574275588556, + "grad_norm": 2.1045094399596564e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507560 + }, + { + "epoch": 2.461622773781392, + "grad_norm": 2.078786928905174e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507570 + }, + { + "epoch": 2.461671271974228, + "grad_norm": 1.7646272681304254e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507580 + }, + { + "epoch": 2.4617197701670643, + "grad_norm": 1.9344002794241533e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507590 + }, + { + "epoch": 2.4617682683599003, + "grad_norm": 1.9484446966089308e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507600 + }, + { + "epoch": 2.461816766552736, + "grad_norm": 1.87666155397892e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507610 + }, + { + "epoch": 2.4618652647455725, + "grad_norm": 1.8316874047741294e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507620 + }, + { + "epoch": 2.4619137629384085, + "grad_norm": 1.572936525917612e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507630 + }, + { + "epoch": 2.4619622611312444, + "grad_norm": 1.7476702851126902e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507640 + }, + { + "epoch": 2.4620107593240808, + "grad_norm": 1.775895361788571e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507650 + }, + { + "epoch": 2.4620592575169167, + "grad_norm": 1.6725891327951103e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507660 + }, + { + "epoch": 2.462107755709753, + "grad_norm": 1.6614261767244898e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507670 + }, + { + "epoch": 2.462156253902589, + "grad_norm": 1.8940609152195975e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507680 + }, + { + "epoch": 2.462204752095425, + "grad_norm": 1.6072583093773574e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507690 + }, + { + "epoch": 2.4622532502882613, + "grad_norm": 1.6045769370975904e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507700 + }, + { + "epoch": 2.462301748481097, + "grad_norm": 1.565065213071648e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507710 + }, + { + "epoch": 2.4623502466739335, + "grad_norm": 1.5322017134167254e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507720 + }, + { + "epoch": 2.4623987448667695, + "grad_norm": 1.3121165466145612e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507730 + }, + { + "epoch": 2.4624472430596054, + "grad_norm": 1.4961256965762004e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507740 + }, + { + "epoch": 2.4624957412524417, + "grad_norm": 1.4669622032670304e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507750 + }, + { + "epoch": 2.4625442394452777, + "grad_norm": 1.4310607184597757e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507760 + }, + { + "epoch": 2.4625927376381136, + "grad_norm": 1.418412102793809e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507770 + }, + { + "epoch": 2.46264123583095, + "grad_norm": 1.1806008842540905e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507780 + }, + { + "epoch": 2.462689734023786, + "grad_norm": 1.358382996841101e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507790 + }, + { + "epoch": 2.462738232216622, + "grad_norm": 1.3304643289302476e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507800 + }, + { + "epoch": 2.462786730409458, + "grad_norm": 1.3288985428516753e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507810 + }, + { + "epoch": 2.462835228602294, + "grad_norm": 1.3099791431159247e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507820 + }, + { + "epoch": 2.4628837267951305, + "grad_norm": 1.1141983122797683e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507830 + }, + { + "epoch": 2.4629322249879664, + "grad_norm": 1.2614518709597178e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507840 + }, + { + "epoch": 2.4629807231808023, + "grad_norm": 1.2652581062866375e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507850 + }, + { + "epoch": 2.4630292213736387, + "grad_norm": 1.234614410350332e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507860 + }, + { + "epoch": 2.4630777195664746, + "grad_norm": 1.2129012247896753e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507870 + }, + { + "epoch": 2.463126217759311, + "grad_norm": 1.0247761565551627e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507880 + }, + { + "epoch": 2.463174715952147, + "grad_norm": 1.169279948953772e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507890 + }, + { + "epoch": 2.463223214144983, + "grad_norm": 1.162976877822075e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507900 + }, + { + "epoch": 2.463271712337819, + "grad_norm": 1.1628084394033067e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507910 + }, + { + "epoch": 2.463320210530655, + "grad_norm": 1.1140123206132557e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507920 + }, + { + "epoch": 2.463368708723491, + "grad_norm": 9.55456926021725e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507930 + }, + { + "epoch": 2.4634172069163274, + "grad_norm": 1.0826393918250687e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507940 + }, + { + "epoch": 2.4634657051091633, + "grad_norm": 1.0787021892610937e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507950 + }, + { + "epoch": 2.463514203301999, + "grad_norm": 1.0461040801601484e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507960 + }, + { + "epoch": 2.4635627014948356, + "grad_norm": 1.0775123882922344e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507970 + }, + { + "epoch": 2.4636111996876715, + "grad_norm": 8.908325980883092e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507980 + }, + { + "epoch": 2.463659697880508, + "grad_norm": 1.0185143764829263e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 507990 + }, + { + "epoch": 2.463708196073344, + "grad_norm": 1.0020386071118992e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508000 + }, + { + "epoch": 2.4637566942661797, + "grad_norm": 9.787019735085778e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508010 + }, + { + "epoch": 2.463805192459016, + "grad_norm": 9.704089279694017e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508020 + }, + { + "epoch": 2.463853690651852, + "grad_norm": 8.310328666993883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508030 + }, + { + "epoch": 2.4639021888446884, + "grad_norm": 9.663340279075783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508040 + }, + { + "epoch": 2.4639506870375243, + "grad_norm": 9.612011126591824e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508050 + }, + { + "epoch": 2.46399918523036, + "grad_norm": 9.419639354746323e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508060 + }, + { + "epoch": 2.4640476834231966, + "grad_norm": 9.209323252434842e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508070 + }, + { + "epoch": 2.4640961816160325, + "grad_norm": 7.71561280998867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508080 + }, + { + "epoch": 2.464144679808869, + "grad_norm": 8.981551218312234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508090 + }, + { + "epoch": 2.464193178001705, + "grad_norm": 8.824678843666334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508100 + }, + { + "epoch": 2.4642416761945407, + "grad_norm": 8.834206710162107e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508110 + }, + { + "epoch": 2.464290174387377, + "grad_norm": 8.72315104061272e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508120 + }, + { + "epoch": 2.464338672580213, + "grad_norm": 7.208327588159591e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508130 + }, + { + "epoch": 2.464387170773049, + "grad_norm": 8.407659151998814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508140 + }, + { + "epoch": 2.4644356689658853, + "grad_norm": 8.314150363730732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508150 + }, + { + "epoch": 2.464484167158721, + "grad_norm": 8.25056395115098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508160 + }, + { + "epoch": 2.464532665351557, + "grad_norm": 8.283038368972484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508170 + }, + { + "epoch": 2.4645811635443935, + "grad_norm": 6.78212973070913e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508180 + }, + { + "epoch": 2.4646296617372294, + "grad_norm": 7.856195225031115e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508190 + }, + { + "epoch": 2.464678159930066, + "grad_norm": 7.839271347620524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508200 + }, + { + "epoch": 2.4647266581229017, + "grad_norm": 7.717498192505445e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508210 + }, + { + "epoch": 2.4647751563157376, + "grad_norm": 7.699753041379154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508220 + }, + { + "epoch": 2.464823654508574, + "grad_norm": 6.33667968941154e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508230 + }, + { + "epoch": 2.46487215270141, + "grad_norm": 7.348472081503132e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508240 + }, + { + "epoch": 2.4649206508942463, + "grad_norm": 7.292271675396478e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508250 + }, + { + "epoch": 2.464969149087082, + "grad_norm": 7.227377864182927e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508260 + }, + { + "epoch": 2.465017647279918, + "grad_norm": 7.172672667365987e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508270 + }, + { + "epoch": 2.4650661454727545, + "grad_norm": 6.096603556215996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508280 + }, + { + "epoch": 2.4651146436655904, + "grad_norm": 7.155462753871689e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508290 + }, + { + "epoch": 2.4651631418584263, + "grad_norm": 6.9147222347965e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508300 + }, + { + "epoch": 2.4652116400512627, + "grad_norm": 6.930362815182889e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508310 + }, + { + "epoch": 2.4652601382440986, + "grad_norm": 6.845361440355191e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508320 + }, + { + "epoch": 2.4653086364369345, + "grad_norm": 5.793343916593585e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508330 + }, + { + "epoch": 2.465357134629771, + "grad_norm": 6.657243829977233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508340 + }, + { + "epoch": 2.465405632822607, + "grad_norm": 6.642003882006975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508350 + }, + { + "epoch": 2.465454131015443, + "grad_norm": 6.5090766838693526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508360 + }, + { + "epoch": 2.465502629208279, + "grad_norm": 6.373822088789893e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508370 + }, + { + "epoch": 2.465551127401115, + "grad_norm": 5.432112175185466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508380 + }, + { + "epoch": 2.4655996255939514, + "grad_norm": 6.499343271570979e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508390 + }, + { + "epoch": 2.4656481237867873, + "grad_norm": 6.3289776335295755e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508400 + }, + { + "epoch": 2.4656966219796237, + "grad_norm": 6.21913341092295e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508410 + }, + { + "epoch": 2.4657451201724596, + "grad_norm": 6.11649920756463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508420 + }, + { + "epoch": 2.4657936183652955, + "grad_norm": 5.168497409613337e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508430 + }, + { + "epoch": 2.465842116558132, + "grad_norm": 5.8966438700736035e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508440 + }, + { + "epoch": 2.465890614750968, + "grad_norm": 6.073047188692726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508450 + }, + { + "epoch": 2.4659391129438037, + "grad_norm": 6.029877567925723e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508460 + }, + { + "epoch": 2.46598761113664, + "grad_norm": 5.772577424068004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508470 + }, + { + "epoch": 2.466036109329476, + "grad_norm": 4.96427219331963e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508480 + }, + { + "epoch": 2.466084607522312, + "grad_norm": 5.815034455736168e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508490 + }, + { + "epoch": 2.4661331057151483, + "grad_norm": 5.6142084758903366e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508500 + }, + { + "epoch": 2.4661816039079842, + "grad_norm": 5.655404038407141e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508510 + }, + { + "epoch": 2.4662301021008206, + "grad_norm": 5.514982149179559e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508520 + }, + { + "epoch": 2.4662786002936565, + "grad_norm": 4.630293460650137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508530 + }, + { + "epoch": 2.4663270984864925, + "grad_norm": 5.446589057100937e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508540 + }, + { + "epoch": 2.466375596679329, + "grad_norm": 5.460850388772087e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508550 + }, + { + "epoch": 2.4664240948721647, + "grad_norm": 5.497035999724176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508560 + }, + { + "epoch": 2.466472593065001, + "grad_norm": 9.704678632260766e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508570 + }, + { + "epoch": 2.466521091257837, + "grad_norm": 4.430802619026508e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508580 + }, + { + "epoch": 2.466569589450673, + "grad_norm": 5.225710992817767e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508590 + }, + { + "epoch": 2.4666180876435093, + "grad_norm": 5.11784082846134e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508600 + }, + { + "epoch": 2.4666665858363452, + "grad_norm": 4.996746156393783e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508610 + }, + { + "epoch": 2.4667150840291816, + "grad_norm": 5.1862407417502254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508620 + }, + { + "epoch": 2.4667635822220175, + "grad_norm": 4.300489308661781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508630 + }, + { + "epoch": 2.4668120804148534, + "grad_norm": 4.955345048074378e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508640 + }, + { + "epoch": 2.46686057860769, + "grad_norm": 4.9406580728827976e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508650 + }, + { + "epoch": 2.4669090768005257, + "grad_norm": 4.816535692953039e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508660 + }, + { + "epoch": 2.4669575749933617, + "grad_norm": 4.8949264055409e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508670 + }, + { + "epoch": 2.467006073186198, + "grad_norm": 4.013498710264685e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508680 + }, + { + "epoch": 2.467054571379034, + "grad_norm": 4.688858552981401e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508690 + }, + { + "epoch": 2.46710306957187, + "grad_norm": 4.687910404754803e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508700 + }, + { + "epoch": 2.4671515677647062, + "grad_norm": 4.708473170467187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508710 + }, + { + "epoch": 2.467200065957542, + "grad_norm": 4.632587206288008e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508720 + }, + { + "epoch": 2.4672485641503785, + "grad_norm": 3.860170181724243e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508730 + }, + { + "epoch": 2.4672970623432144, + "grad_norm": 4.487749720283318e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508740 + }, + { + "epoch": 2.4673455605360504, + "grad_norm": 4.532382718025474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508750 + }, + { + "epoch": 2.4673940587288867, + "grad_norm": 4.392928531160578e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508760 + }, + { + "epoch": 2.4674425569217227, + "grad_norm": 4.308648840378737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508770 + }, + { + "epoch": 2.467491055114559, + "grad_norm": 3.534237293933984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508780 + }, + { + "epoch": 2.467539553307395, + "grad_norm": 4.168074610788608e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508790 + }, + { + "epoch": 2.467588051500231, + "grad_norm": 4.350659764895681e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508800 + }, + { + "epoch": 2.4676365496930672, + "grad_norm": 4.241418537276331e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508810 + }, + { + "epoch": 2.467685047885903, + "grad_norm": 5.5958455050131306e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508820 + }, + { + "epoch": 2.467733546078739, + "grad_norm": 3.5598579870566027e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508830 + }, + { + "epoch": 2.4677820442715754, + "grad_norm": 4.087704837729689e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508840 + }, + { + "epoch": 2.4678305424644114, + "grad_norm": 4.131646619498497e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508850 + }, + { + "epoch": 2.4678790406572473, + "grad_norm": 4.155090664426098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508860 + }, + { + "epoch": 2.4679275388500836, + "grad_norm": 4.017550509161083e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508870 + }, + { + "epoch": 2.4679760370429196, + "grad_norm": 3.2561144962528488e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508880 + }, + { + "epoch": 2.468024535235756, + "grad_norm": 3.916011792171048e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508890 + }, + { + "epoch": 2.468073033428592, + "grad_norm": 3.8592661439906806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508900 + }, + { + "epoch": 2.4681215316214278, + "grad_norm": 3.7950669593556086e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508910 + }, + { + "epoch": 2.468170029814264, + "grad_norm": 3.7391007481346605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508920 + }, + { + "epoch": 2.4682185280071, + "grad_norm": 3.0980625069787493e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508930 + }, + { + "epoch": 2.4682670261999364, + "grad_norm": 3.7127642826817464e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508940 + }, + { + "epoch": 2.4683155243927724, + "grad_norm": 3.7372801671153866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508950 + }, + { + "epoch": 2.4683640225856083, + "grad_norm": 3.640217528300127e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508960 + }, + { + "epoch": 2.4684125207784446, + "grad_norm": 3.546975904100691e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508970 + }, + { + "epoch": 2.4684610189712806, + "grad_norm": 2.960817937491811e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508980 + }, + { + "epoch": 2.4685095171641165, + "grad_norm": 3.4964029964612564e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 508990 + }, + { + "epoch": 2.468558015356953, + "grad_norm": 3.5367918371775886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509000 + }, + { + "epoch": 2.4686065135497888, + "grad_norm": 3.3939179502340266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509010 + }, + { + "epoch": 2.4686550117426247, + "grad_norm": 3.378388328201254e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509020 + }, + { + "epoch": 2.468703509935461, + "grad_norm": 2.769280854408862e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509030 + }, + { + "epoch": 2.468752008128297, + "grad_norm": 3.341036517667817e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509040 + }, + { + "epoch": 2.4688005063211333, + "grad_norm": 3.3505850751680555e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509050 + }, + { + "epoch": 2.4688490045139693, + "grad_norm": 3.282856368969078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509060 + }, + { + "epoch": 2.468897502706805, + "grad_norm": 3.3151968636957463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509070 + }, + { + "epoch": 2.4689460008996416, + "grad_norm": 2.5723052203829866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509080 + }, + { + "epoch": 2.4689944990924775, + "grad_norm": 3.205747134416015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509090 + }, + { + "epoch": 2.469042997285314, + "grad_norm": 3.157157834721147e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509100 + }, + { + "epoch": 2.4690914954781498, + "grad_norm": 3.1881293125479715e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509110 + }, + { + "epoch": 2.4691399936709857, + "grad_norm": 3.078583176829852e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509120 + }, + { + "epoch": 2.469188491863822, + "grad_norm": 2.489299504304654e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509130 + }, + { + "epoch": 2.469236990056658, + "grad_norm": 3.0300893740786705e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509140 + }, + { + "epoch": 2.4692854882494943, + "grad_norm": 2.9913344405940734e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509150 + }, + { + "epoch": 2.4693339864423303, + "grad_norm": 3.015475613210583e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509160 + }, + { + "epoch": 2.469382484635166, + "grad_norm": 2.859129608623334e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509170 + }, + { + "epoch": 2.4694309828280026, + "grad_norm": 2.30656269195606e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509180 + }, + { + "epoch": 2.4694794810208385, + "grad_norm": 2.9014827305218205e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509190 + }, + { + "epoch": 2.4695279792136744, + "grad_norm": 2.870863227144582e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509200 + }, + { + "epoch": 2.4695764774065108, + "grad_norm": 2.816942696881597e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509210 + }, + { + "epoch": 2.4696249755993467, + "grad_norm": 2.788807250908576e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509220 + }, + { + "epoch": 2.4696734737921826, + "grad_norm": 2.196891045969096e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509230 + }, + { + "epoch": 2.469721971985019, + "grad_norm": 2.7167072857992025e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509240 + }, + { + "epoch": 2.469770470177855, + "grad_norm": 2.684079845494125e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509250 + }, + { + "epoch": 2.4698189683706913, + "grad_norm": 2.7055614282289753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509260 + }, + { + "epoch": 2.469867466563527, + "grad_norm": 2.7518394745129626e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509270 + }, + { + "epoch": 2.469915964756363, + "grad_norm": 2.1051039311714703e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509280 + }, + { + "epoch": 2.4699644629491995, + "grad_norm": 2.6294128474546596e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509290 + }, + { + "epoch": 2.4700129611420354, + "grad_norm": 2.6010386591224233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509300 + }, + { + "epoch": 2.4700614593348718, + "grad_norm": 2.5450422072026413e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509310 + }, + { + "epoch": 2.4701099575277077, + "grad_norm": 2.5426297725061886e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509320 + }, + { + "epoch": 2.4701584557205436, + "grad_norm": 1.9752287698793225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509330 + }, + { + "epoch": 2.47020695391338, + "grad_norm": 2.5370175080752233e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509340 + }, + { + "epoch": 2.470255452106216, + "grad_norm": 2.458962853779667e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509350 + }, + { + "epoch": 2.470303950299052, + "grad_norm": 2.4213161395891802e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509360 + }, + { + "epoch": 2.470352448491888, + "grad_norm": 2.38156690102187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509370 + }, + { + "epoch": 2.470400946684724, + "grad_norm": 1.8925629774457775e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509380 + }, + { + "epoch": 2.47044944487756, + "grad_norm": 2.296964566994575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509390 + }, + { + "epoch": 2.4704979430703964, + "grad_norm": 2.408405180176487e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509400 + }, + { + "epoch": 2.4705464412632323, + "grad_norm": 2.329679546164698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509410 + }, + { + "epoch": 2.4705949394560687, + "grad_norm": 2.2605370304518146e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509420 + }, + { + "epoch": 2.4706434376489046, + "grad_norm": 1.7952041844182531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509430 + }, + { + "epoch": 2.4706919358417405, + "grad_norm": 2.2721819732396398e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509440 + }, + { + "epoch": 2.470740434034577, + "grad_norm": 2.1838284283148823e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509450 + }, + { + "epoch": 2.470788932227413, + "grad_norm": 2.2249630546866683e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509460 + }, + { + "epoch": 2.470837430420249, + "grad_norm": 2.1798584839416435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509470 + }, + { + "epoch": 2.470885928613085, + "grad_norm": 1.7876892570711789e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509480 + }, + { + "epoch": 2.470934426805921, + "grad_norm": 2.114740709657781e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509490 + }, + { + "epoch": 2.4709829249987574, + "grad_norm": 2.155549736926332e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509500 + }, + { + "epoch": 2.4710314231915933, + "grad_norm": 2.174821702283225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509510 + }, + { + "epoch": 2.4710799213844292, + "grad_norm": 2.1013054265495157e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509520 + }, + { + "epoch": 2.4711284195772656, + "grad_norm": 1.7095694602176081e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509530 + }, + { + "epoch": 2.4711769177701015, + "grad_norm": 2.0318991573731182e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509540 + }, + { + "epoch": 2.4712254159629374, + "grad_norm": 2.0496306660788832e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509550 + }, + { + "epoch": 2.471273914155774, + "grad_norm": 2.0030652194691356e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509560 + }, + { + "epoch": 2.4713224123486097, + "grad_norm": 2.0228681023581885e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509570 + }, + { + "epoch": 2.471370910541446, + "grad_norm": 1.6178071291506058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509580 + }, + { + "epoch": 2.471419408734282, + "grad_norm": 2.0039192349941004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509590 + }, + { + "epoch": 2.471467906927118, + "grad_norm": 2.0282689092709916e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509600 + }, + { + "epoch": 2.4715164051199543, + "grad_norm": 1.9103042632195866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509610 + }, + { + "epoch": 2.47156490331279, + "grad_norm": 1.899833591778588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509620 + }, + { + "epoch": 2.4716134015056266, + "grad_norm": 1.5933421764202649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509630 + }, + { + "epoch": 2.4716618996984625, + "grad_norm": 1.9053926507694996e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509640 + }, + { + "epoch": 2.4717103978912984, + "grad_norm": 1.902438270917628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509650 + }, + { + "epoch": 2.471758896084135, + "grad_norm": 1.8711103848545463e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509660 + }, + { + "epoch": 2.4718073942769707, + "grad_norm": 1.8283421923115384e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509670 + }, + { + "epoch": 2.471855892469807, + "grad_norm": 1.4916639656803454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509680 + }, + { + "epoch": 2.471904390662643, + "grad_norm": 1.8185500039180624e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509690 + }, + { + "epoch": 2.471952888855479, + "grad_norm": 1.7870155488708406e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509700 + }, + { + "epoch": 2.4720013870483153, + "grad_norm": 1.7582163991392008e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509710 + }, + { + "epoch": 2.472049885241151, + "grad_norm": 1.8125889482689672e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509720 + }, + { + "epoch": 2.472098383433987, + "grad_norm": 1.4606038121200982e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509730 + }, + { + "epoch": 2.4721468816268235, + "grad_norm": 1.7789668618206633e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509740 + }, + { + "epoch": 2.4721953798196594, + "grad_norm": 1.7302754713455215e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509750 + }, + { + "epoch": 2.4722438780124953, + "grad_norm": 1.711667778181436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509760 + }, + { + "epoch": 2.4722923762053317, + "grad_norm": 1.6929058119785623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509770 + }, + { + "epoch": 2.4723408743981676, + "grad_norm": 1.4255027735998738e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509780 + }, + { + "epoch": 2.472389372591004, + "grad_norm": 1.6877253301572637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509790 + }, + { + "epoch": 2.47243787078384, + "grad_norm": 1.6956031458903453e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509800 + }, + { + "epoch": 2.472486368976676, + "grad_norm": 1.6787018921604613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509810 + }, + { + "epoch": 2.472534867169512, + "grad_norm": 1.6397398212575354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509820 + }, + { + "epoch": 2.472583365362348, + "grad_norm": 1.3406066727839061e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509830 + }, + { + "epoch": 2.4726318635551845, + "grad_norm": 1.591776822351676e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509840 + }, + { + "epoch": 2.4726803617480204, + "grad_norm": 1.5952393823681632e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509850 + }, + { + "epoch": 2.4727288599408563, + "grad_norm": 1.6082061620181776e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509860 + }, + { + "epoch": 2.4727773581336927, + "grad_norm": 1.5930061181279598e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509870 + }, + { + "epoch": 2.4728258563265286, + "grad_norm": 1.2941693512402708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509880 + }, + { + "epoch": 2.4728743545193645, + "grad_norm": 1.538362539577065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509890 + }, + { + "epoch": 2.472922852712201, + "grad_norm": 1.562175270919397e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509900 + }, + { + "epoch": 2.472971350905037, + "grad_norm": 1.5377344197986531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509910 + }, + { + "epoch": 2.4730198490978728, + "grad_norm": 1.534428179184033e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509920 + }, + { + "epoch": 2.473068347290709, + "grad_norm": 1.2329135188338114e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509930 + }, + { + "epoch": 2.473116845483545, + "grad_norm": 1.4974157238611951e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509940 + }, + { + "epoch": 2.4731653436763814, + "grad_norm": 1.5085307722983998e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509950 + }, + { + "epoch": 2.4732138418692173, + "grad_norm": 1.4578334912584978e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509960 + }, + { + "epoch": 2.4732623400620533, + "grad_norm": 1.5048044588183984e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509970 + }, + { + "epoch": 2.4733108382548896, + "grad_norm": 1.1757213087548735e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509980 + }, + { + "epoch": 2.4733593364477255, + "grad_norm": 1.4292664900494856e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 509990 + }, + { + "epoch": 2.473407834640562, + "grad_norm": 1.4706947695231065e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510000 + }, + { + "epoch": 2.473456332833398, + "grad_norm": 1.4380433412952698e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510010 + }, + { + "epoch": 2.4735048310262338, + "grad_norm": 1.4148648688205867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510020 + }, + { + "epoch": 2.47355332921907, + "grad_norm": 1.1134618489450077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510030 + }, + { + "epoch": 2.473601827411906, + "grad_norm": 1.3990331808599876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510040 + }, + { + "epoch": 2.473650325604742, + "grad_norm": 1.3999474504089449e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510050 + }, + { + "epoch": 2.4736988237975783, + "grad_norm": 1.3646713341586292e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510060 + }, + { + "epoch": 2.4737473219904142, + "grad_norm": 1.3644685168401338e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510070 + }, + { + "epoch": 2.4737958201832506, + "grad_norm": 1.0921504554062267e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510080 + }, + { + "epoch": 2.4738443183760865, + "grad_norm": 1.3756068710790714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510090 + }, + { + "epoch": 2.4738928165689225, + "grad_norm": 1.3526955626730341e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510100 + }, + { + "epoch": 2.473941314761759, + "grad_norm": 1.3206268931753584e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510110 + }, + { + "epoch": 2.4739898129545947, + "grad_norm": 1.3613351939056884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510120 + }, + { + "epoch": 2.4740383111474307, + "grad_norm": 1.0353605830459855e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510130 + }, + { + "epoch": 2.474086809340267, + "grad_norm": 1.3304231742949924e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510140 + }, + { + "epoch": 2.474135307533103, + "grad_norm": 1.2846041954617249e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510150 + }, + { + "epoch": 2.4741838057259393, + "grad_norm": 1.3159354921299382e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510160 + }, + { + "epoch": 2.4742323039187752, + "grad_norm": 1.2833876326112659e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510170 + }, + { + "epoch": 2.474280802111611, + "grad_norm": 1.0186033705394948e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510180 + }, + { + "epoch": 2.4743293003044475, + "grad_norm": 1.2957759736309526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510190 + }, + { + "epoch": 2.4743777984972835, + "grad_norm": 1.2626715033547953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510200 + }, + { + "epoch": 2.47442629669012, + "grad_norm": 1.2719550568363047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510210 + }, + { + "epoch": 2.4744747948829557, + "grad_norm": 1.2653728163058986e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510220 + }, + { + "epoch": 2.4745232930757917, + "grad_norm": 9.766073389982921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510230 + }, + { + "epoch": 2.474571791268628, + "grad_norm": 1.2519700476332218e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510240 + }, + { + "epoch": 2.474620289461464, + "grad_norm": 1.1970134892180795e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510250 + }, + { + "epoch": 2.4746687876543, + "grad_norm": 1.1800593711086549e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510260 + }, + { + "epoch": 2.4747172858471362, + "grad_norm": 1.2080437272743438e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510270 + }, + { + "epoch": 2.474765784039972, + "grad_norm": 9.61928549259028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510280 + }, + { + "epoch": 2.474814282232808, + "grad_norm": 1.1708145848388085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510290 + }, + { + "epoch": 2.4748627804256444, + "grad_norm": 1.1627197409325163e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510300 + }, + { + "epoch": 2.4749112786184804, + "grad_norm": 1.198479367303662e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510310 + }, + { + "epoch": 2.4749597768113167, + "grad_norm": 1.1588090274017304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510320 + }, + { + "epoch": 2.4750082750041527, + "grad_norm": 9.247561933989346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510330 + }, + { + "epoch": 2.4750567731969886, + "grad_norm": 1.1212978279218078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510340 + }, + { + "epoch": 2.475105271389825, + "grad_norm": 1.134037574956892e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510350 + }, + { + "epoch": 2.475153769582661, + "grad_norm": 1.1051338333345484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510360 + }, + { + "epoch": 2.4752022677754972, + "grad_norm": 1.0927088851531153e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510370 + }, + { + "epoch": 2.475250765968333, + "grad_norm": 9.03327418200206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510380 + }, + { + "epoch": 2.475299264161169, + "grad_norm": 1.068345000021509e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510390 + }, + { + "epoch": 2.4753477623540054, + "grad_norm": 1.1344395716150757e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510400 + }, + { + "epoch": 2.4753962605468414, + "grad_norm": 1.0684611879696604e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510410 + }, + { + "epoch": 2.4754447587396773, + "grad_norm": 1.0816204394359374e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510420 + }, + { + "epoch": 2.4754932569325137, + "grad_norm": 8.600084129284369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510430 + }, + { + "epoch": 2.4755417551253496, + "grad_norm": 1.0748032082119607e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510440 + }, + { + "epoch": 2.4755902533181855, + "grad_norm": 1.0661962051017326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510450 + }, + { + "epoch": 2.475638751511022, + "grad_norm": 1.04263449429709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510460 + }, + { + "epoch": 2.475687249703858, + "grad_norm": 1.031335386869614e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510470 + }, + { + "epoch": 2.475735747896694, + "grad_norm": 8.442303851552424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510480 + }, + { + "epoch": 2.47578424608953, + "grad_norm": 1.014560325529601e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510490 + }, + { + "epoch": 2.475832744282366, + "grad_norm": 1.025531105369737e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510500 + }, + { + "epoch": 2.4758812424752024, + "grad_norm": 1.0035391824203543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510510 + }, + { + "epoch": 2.4759297406680383, + "grad_norm": 9.957623205991695e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510520 + }, + { + "epoch": 2.4759782388608746, + "grad_norm": 8.165618510247441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510530 + }, + { + "epoch": 2.4760267370537106, + "grad_norm": 9.813373935685377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510540 + }, + { + "epoch": 2.4760752352465465, + "grad_norm": 9.685795703262556e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510550 + }, + { + "epoch": 2.476123733439383, + "grad_norm": 1.0048819376606843e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510560 + }, + { + "epoch": 2.476172231632219, + "grad_norm": 9.509849405731075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510570 + }, + { + "epoch": 2.4762207298250547, + "grad_norm": 8.16977603790292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510580 + }, + { + "epoch": 2.476269228017891, + "grad_norm": 9.494648907093506e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510590 + }, + { + "epoch": 2.476317726210727, + "grad_norm": 9.641988754083286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510600 + }, + { + "epoch": 2.4763662244035634, + "grad_norm": 9.33243825329555e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510610 + }, + { + "epoch": 2.4764147225963993, + "grad_norm": 9.509548704045301e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510620 + }, + { + "epoch": 2.476463220789235, + "grad_norm": 7.623100941600569e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510630 + }, + { + "epoch": 2.4765117189820716, + "grad_norm": 9.336108064417203e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510640 + }, + { + "epoch": 2.4765602171749075, + "grad_norm": 9.248749393009348e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510650 + }, + { + "epoch": 2.4766087153677434, + "grad_norm": 9.103415550271166e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510660 + }, + { + "epoch": 2.4766572135605798, + "grad_norm": 9.194204153573082e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510670 + }, + { + "epoch": 2.4767057117534157, + "grad_norm": 7.417895062644675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510680 + }, + { + "epoch": 2.476754209946252, + "grad_norm": 8.785604563854577e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510690 + }, + { + "epoch": 2.476802708139088, + "grad_norm": 8.974263892014278e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510700 + }, + { + "epoch": 2.476851206331924, + "grad_norm": 9.03845261746028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510710 + }, + { + "epoch": 2.4768997045247603, + "grad_norm": 9.013949124891951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510720 + }, + { + "epoch": 2.476948202717596, + "grad_norm": 7.177726502050064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510730 + }, + { + "epoch": 2.4769967009104326, + "grad_norm": 8.572168326281826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510740 + }, + { + "epoch": 2.4770451991032685, + "grad_norm": 8.45432396090473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510750 + }, + { + "epoch": 2.4770936972961044, + "grad_norm": 8.599264447184396e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510760 + }, + { + "epoch": 2.4771421954889408, + "grad_norm": 8.5029461160957e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510770 + }, + { + "epoch": 2.4771906936817767, + "grad_norm": 6.995817329880083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510780 + }, + { + "epoch": 2.4772391918746126, + "grad_norm": 8.373413606932445e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510790 + }, + { + "epoch": 2.477287690067449, + "grad_norm": 8.543785270376247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510800 + }, + { + "epoch": 2.477336188260285, + "grad_norm": 8.413671821472235e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510810 + }, + { + "epoch": 2.477384686453121, + "grad_norm": 8.282774501822132e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510820 + }, + { + "epoch": 2.477433184645957, + "grad_norm": 6.880531486785912e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510830 + }, + { + "epoch": 2.477481682838793, + "grad_norm": 7.985885872585641e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510840 + }, + { + "epoch": 2.4775301810316295, + "grad_norm": 8.058450475800782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510850 + }, + { + "epoch": 2.4775786792244654, + "grad_norm": 2.237396302007255e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510860 + }, + { + "epoch": 2.4776271774173013, + "grad_norm": 8.190945663955063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510870 + }, + { + "epoch": 2.4776756756101377, + "grad_norm": 6.415943403226265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510880 + }, + { + "epoch": 2.4777241738029736, + "grad_norm": 7.810853617229441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510890 + }, + { + "epoch": 2.47777267199581, + "grad_norm": 7.969900934767793e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510900 + }, + { + "epoch": 2.477821170188646, + "grad_norm": 8.018243420337967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510910 + }, + { + "epoch": 2.477869668381482, + "grad_norm": 7.655539775441866e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510920 + }, + { + "epoch": 2.477918166574318, + "grad_norm": 6.316257668004255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510930 + }, + { + "epoch": 2.477966664767154, + "grad_norm": 7.95008759268967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510940 + }, + { + "epoch": 2.47801516295999, + "grad_norm": 7.534838459832827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510950 + }, + { + "epoch": 2.4780636611528264, + "grad_norm": 7.756765967315005e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510960 + }, + { + "epoch": 2.4781121593456623, + "grad_norm": 7.373606649707654e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510970 + }, + { + "epoch": 2.4781606575384982, + "grad_norm": 6.005205932524404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510980 + }, + { + "epoch": 2.4782091557313346, + "grad_norm": 7.642479431524407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 510990 + }, + { + "epoch": 2.4782576539241705, + "grad_norm": 7.451195074281713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511000 + }, + { + "epoch": 2.478306152117007, + "grad_norm": 7.565126338704431e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511010 + }, + { + "epoch": 2.478354650309843, + "grad_norm": 7.247980420288513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511020 + }, + { + "epoch": 2.4784031485026787, + "grad_norm": 5.875327246940287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511030 + }, + { + "epoch": 2.478451646695515, + "grad_norm": 7.286777190529392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511040 + }, + { + "epoch": 2.478500144888351, + "grad_norm": 7.45825730064098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511050 + }, + { + "epoch": 2.4785486430811874, + "grad_norm": 7.296752073671087e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511060 + }, + { + "epoch": 2.4785971412740233, + "grad_norm": 7.235680641315412e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511070 + }, + { + "epoch": 2.4786456394668592, + "grad_norm": 5.569276027017622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511080 + }, + { + "epoch": 2.4786941376596956, + "grad_norm": 7.090646363394626e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511090 + }, + { + "epoch": 2.4787426358525315, + "grad_norm": 7.003439463915129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511100 + }, + { + "epoch": 2.478791134045368, + "grad_norm": 7.130473704819451e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511110 + }, + { + "epoch": 2.478839632238204, + "grad_norm": 6.974250368330104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511120 + }, + { + "epoch": 2.4788881304310397, + "grad_norm": 5.308581307872373e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511130 + }, + { + "epoch": 2.478936628623876, + "grad_norm": 6.640510150646151e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511140 + }, + { + "epoch": 2.478985126816712, + "grad_norm": 6.992709131736774e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511150 + }, + { + "epoch": 2.479033625009548, + "grad_norm": 6.819535087743134e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511160 + }, + { + "epoch": 2.4790821232023843, + "grad_norm": 6.570202231159783e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511170 + }, + { + "epoch": 2.4791306213952202, + "grad_norm": 5.144521537658875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511180 + }, + { + "epoch": 2.479179119588056, + "grad_norm": 6.815736242060666e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511190 + }, + { + "epoch": 2.4792276177808925, + "grad_norm": 6.660993108198454e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511200 + }, + { + "epoch": 2.4792761159737284, + "grad_norm": 6.625066930610046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511210 + }, + { + "epoch": 2.479324614166565, + "grad_norm": 6.759984785276174e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511220 + }, + { + "epoch": 2.4793731123594007, + "grad_norm": 4.88123987452127e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511230 + }, + { + "epoch": 2.4794216105522366, + "grad_norm": 7.134412953746505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511240 + }, + { + "epoch": 2.479470108745073, + "grad_norm": 6.488122608061531e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511250 + }, + { + "epoch": 2.479518606937909, + "grad_norm": 6.31274360785028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511260 + }, + { + "epoch": 2.4795671051307453, + "grad_norm": 1.309387585024524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511270 + }, + { + "epoch": 2.479615603323581, + "grad_norm": 4.454715565316292e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511280 + }, + { + "epoch": 2.479664101516417, + "grad_norm": 6.129644702923542e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511290 + }, + { + "epoch": 2.4797125997092535, + "grad_norm": 6.362240583257517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511300 + }, + { + "epoch": 2.4797610979020894, + "grad_norm": 5.814811743221071e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511310 + }, + { + "epoch": 2.4798095960949254, + "grad_norm": 6.21700678493653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511320 + }, + { + "epoch": 2.4798580942877617, + "grad_norm": 4.645157218874374e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511330 + }, + { + "epoch": 2.4799065924805976, + "grad_norm": 6.021112426424224e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511340 + }, + { + "epoch": 2.4799550906734336, + "grad_norm": 5.814976020701579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511350 + }, + { + "epoch": 2.48000358886627, + "grad_norm": 5.698839800061251e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511360 + }, + { + "epoch": 2.480052087059106, + "grad_norm": 5.920254579905304e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511370 + }, + { + "epoch": 2.480100585251942, + "grad_norm": 4.173598426859826e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511380 + }, + { + "epoch": 2.480149083444778, + "grad_norm": 5.828922553519078e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511390 + }, + { + "epoch": 2.480197581637614, + "grad_norm": 5.84260646974144e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511400 + }, + { + "epoch": 2.4802460798304504, + "grad_norm": 5.757553935836768e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511410 + }, + { + "epoch": 2.4802945780232863, + "grad_norm": 5.691811111319112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511420 + }, + { + "epoch": 2.4803430762161227, + "grad_norm": 4.4159219214634504e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511430 + }, + { + "epoch": 2.4803915744089586, + "grad_norm": 5.737110200243478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511440 + }, + { + "epoch": 2.4804400726017946, + "grad_norm": 5.421358082458028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511450 + }, + { + "epoch": 2.480488570794631, + "grad_norm": 5.552855100177112e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511460 + }, + { + "epoch": 2.480537068987467, + "grad_norm": 5.447343482956057e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511470 + }, + { + "epoch": 2.4805855671803028, + "grad_norm": 3.9151404962467495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511480 + }, + { + "epoch": 2.480634065373139, + "grad_norm": 5.477927516039927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511490 + }, + { + "epoch": 2.480682563565975, + "grad_norm": 5.328233214640932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511500 + }, + { + "epoch": 2.480731061758811, + "grad_norm": 5.230094757280312e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511510 + }, + { + "epoch": 2.4807795599516473, + "grad_norm": 5.268336167318921e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511520 + }, + { + "epoch": 2.4808280581444833, + "grad_norm": 3.811959459198988e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511530 + }, + { + "epoch": 2.4808765563373196, + "grad_norm": 5.128130737830361e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511540 + }, + { + "epoch": 2.4809250545301555, + "grad_norm": 4.884406052951817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511550 + }, + { + "epoch": 2.4809735527229915, + "grad_norm": 5.321163030203024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511560 + }, + { + "epoch": 2.481022050915828, + "grad_norm": 4.972771989741887e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511570 + }, + { + "epoch": 2.4810705491086638, + "grad_norm": 3.527162846239662e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511580 + }, + { + "epoch": 2.4811190473015, + "grad_norm": 4.850126629207807e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511590 + }, + { + "epoch": 2.481167545494336, + "grad_norm": 4.846110641665291e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511600 + }, + { + "epoch": 2.481216043687172, + "grad_norm": 4.5670091708416294e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511610 + }, + { + "epoch": 2.4812645418800083, + "grad_norm": 4.760401566272776e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511620 + }, + { + "epoch": 2.4813130400728443, + "grad_norm": 3.4526073022789205e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511630 + }, + { + "epoch": 2.4813615382656806, + "grad_norm": 4.918729814562539e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511640 + }, + { + "epoch": 2.4814100364585165, + "grad_norm": 4.822005621463177e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511650 + }, + { + "epoch": 2.4814585346513525, + "grad_norm": 4.5624119593412615e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511660 + }, + { + "epoch": 2.481507032844189, + "grad_norm": 4.461650178200216e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511670 + }, + { + "epoch": 2.4815555310370248, + "grad_norm": 3.625969213771896e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511680 + }, + { + "epoch": 2.4816040292298607, + "grad_norm": 4.4676718857772357e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511690 + }, + { + "epoch": 2.481652527422697, + "grad_norm": 4.515588329923048e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511700 + }, + { + "epoch": 2.481701025615533, + "grad_norm": 4.524178223164199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511710 + }, + { + "epoch": 2.481749523808369, + "grad_norm": 4.2912458297905687e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511720 + }, + { + "epoch": 2.4817980220012053, + "grad_norm": 3.1826235158405325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511730 + }, + { + "epoch": 2.481846520194041, + "grad_norm": 4.2692641955e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511740 + }, + { + "epoch": 2.4818950183868775, + "grad_norm": 4.282915426756517e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511750 + }, + { + "epoch": 2.4819435165797135, + "grad_norm": 4.18740000895923e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511760 + }, + { + "epoch": 2.4819920147725494, + "grad_norm": 4.1464068090135697e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511770 + }, + { + "epoch": 2.4820405129653857, + "grad_norm": 3.0355454327946063e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511780 + }, + { + "epoch": 2.4820890111582217, + "grad_norm": 4.115979095331568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511790 + }, + { + "epoch": 2.482137509351058, + "grad_norm": 3.9483660430050804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511800 + }, + { + "epoch": 2.482186007543894, + "grad_norm": 3.9117873029681505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511810 + }, + { + "epoch": 2.48223450573673, + "grad_norm": 3.807886628237611e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511820 + }, + { + "epoch": 2.4822830039295662, + "grad_norm": 3.032089352927869e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511830 + }, + { + "epoch": 2.482331502122402, + "grad_norm": 3.9341207980214676e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511840 + }, + { + "epoch": 2.482380000315238, + "grad_norm": 4.101632100628194e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511850 + }, + { + "epoch": 2.4824284985080745, + "grad_norm": 4.0396597000835754e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511860 + }, + { + "epoch": 2.4824769967009104, + "grad_norm": 3.857512353988568e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511870 + }, + { + "epoch": 2.4825254948937463, + "grad_norm": 3.108411021912616e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511880 + }, + { + "epoch": 2.4825739930865827, + "grad_norm": 3.894270435012004e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511890 + }, + { + "epoch": 2.4826224912794186, + "grad_norm": 3.7273343878041487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511900 + }, + { + "epoch": 2.482670989472255, + "grad_norm": 3.9504899973508145e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511910 + }, + { + "epoch": 2.482719487665091, + "grad_norm": 3.894419080552325e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511920 + }, + { + "epoch": 2.482767985857927, + "grad_norm": 2.786210018257407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511930 + }, + { + "epoch": 2.482816484050763, + "grad_norm": 3.6876713238598313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511940 + }, + { + "epoch": 2.482864982243599, + "grad_norm": 3.788009053096175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511950 + }, + { + "epoch": 2.4829134804364354, + "grad_norm": 3.6504150102700805e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511960 + }, + { + "epoch": 2.4829619786292714, + "grad_norm": 3.5788056607088947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511970 + }, + { + "epoch": 2.4830104768221073, + "grad_norm": 2.79857857776733e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511980 + }, + { + "epoch": 2.4830589750149437, + "grad_norm": 3.545664242210478e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 511990 + }, + { + "epoch": 2.4831074732077796, + "grad_norm": 3.535318171543622e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512000 + }, + { + "epoch": 2.4831559714006155, + "grad_norm": 3.354636817221035e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512010 + }, + { + "epoch": 2.483204469593452, + "grad_norm": 3.4687997185756103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512020 + }, + { + "epoch": 2.483252967786288, + "grad_norm": 2.603331381578755e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512030 + }, + { + "epoch": 2.4833014659791237, + "grad_norm": 3.4003619475697633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512040 + }, + { + "epoch": 2.48334996417196, + "grad_norm": 3.411528837204969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512050 + }, + { + "epoch": 2.483398462364796, + "grad_norm": 3.326411217585701e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512060 + }, + { + "epoch": 2.4834469605576324, + "grad_norm": 3.455499211213464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512070 + }, + { + "epoch": 2.4834954587504683, + "grad_norm": 2.774981453512737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512080 + }, + { + "epoch": 2.483543956943304, + "grad_norm": 3.4472714105504565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512090 + }, + { + "epoch": 2.4835924551361406, + "grad_norm": 3.3461370207987784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512100 + }, + { + "epoch": 2.4836409533289765, + "grad_norm": 3.208419627753756e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512110 + }, + { + "epoch": 2.483689451521813, + "grad_norm": 3.206792484888865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512120 + }, + { + "epoch": 2.483737949714649, + "grad_norm": 2.5615105414544814e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512130 + }, + { + "epoch": 2.4837864479074847, + "grad_norm": 3.203224991921161e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512140 + }, + { + "epoch": 2.483834946100321, + "grad_norm": 3.0702213393851707e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512150 + }, + { + "epoch": 2.483883444293157, + "grad_norm": 3.2290711260429816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512160 + }, + { + "epoch": 2.4839319424859934, + "grad_norm": 3.019433734152699e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512170 + }, + { + "epoch": 2.4839804406788293, + "grad_norm": 2.4690342570465873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512180 + }, + { + "epoch": 2.484028938871665, + "grad_norm": 3.1131844480114523e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512190 + }, + { + "epoch": 2.4840774370645016, + "grad_norm": 2.9948984092698083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512200 + }, + { + "epoch": 2.4841259352573375, + "grad_norm": 4.4022104361829406e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512210 + }, + { + "epoch": 2.4841744334501734, + "grad_norm": 2.882668468373595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512220 + }, + { + "epoch": 2.48422293164301, + "grad_norm": 2.4545067844883306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512230 + }, + { + "epoch": 2.4842714298358457, + "grad_norm": 3.0783854754190543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512240 + }, + { + "epoch": 2.4843199280286816, + "grad_norm": 2.9005545343352424e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512250 + }, + { + "epoch": 2.484368426221518, + "grad_norm": 2.925000330833427e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512260 + }, + { + "epoch": 2.484416924414354, + "grad_norm": 2.9064364071018645e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512270 + }, + { + "epoch": 2.4844654226071903, + "grad_norm": 2.3049365438509994e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512280 + }, + { + "epoch": 2.484513920800026, + "grad_norm": 2.838890793555038e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512290 + }, + { + "epoch": 2.484562418992862, + "grad_norm": 2.901682307765441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512300 + }, + { + "epoch": 2.4846109171856985, + "grad_norm": 2.893709449836024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512310 + }, + { + "epoch": 2.4846594153785344, + "grad_norm": 2.8550599040499947e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512320 + }, + { + "epoch": 2.4847079135713708, + "grad_norm": 2.2395339271952253e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512330 + }, + { + "epoch": 2.4847564117642067, + "grad_norm": 2.850808016319206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512340 + }, + { + "epoch": 2.4848049099570426, + "grad_norm": 2.694319221063779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512350 + }, + { + "epoch": 2.484853408149879, + "grad_norm": 2.6670076636037265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512360 + }, + { + "epoch": 2.484901906342715, + "grad_norm": 2.8023754339301377e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512370 + }, + { + "epoch": 2.484950404535551, + "grad_norm": 2.1587506182640936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512380 + }, + { + "epoch": 2.484998902728387, + "grad_norm": 2.689534994715359e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512390 + }, + { + "epoch": 2.485047400921223, + "grad_norm": 2.5193332930939505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512400 + }, + { + "epoch": 2.485095899114059, + "grad_norm": 2.6543278863755404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512410 + }, + { + "epoch": 2.4851443973068954, + "grad_norm": 2.5365633860019443e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512420 + }, + { + "epoch": 2.4851928954997313, + "grad_norm": 2.1931934668373287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512430 + }, + { + "epoch": 2.4852413936925677, + "grad_norm": 2.5913595891324803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512440 + }, + { + "epoch": 2.4852898918854036, + "grad_norm": 2.453774072819215e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512450 + }, + { + "epoch": 2.4853383900782395, + "grad_norm": 2.4401435894105816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512460 + }, + { + "epoch": 2.485386888271076, + "grad_norm": 2.5543675974404323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512470 + }, + { + "epoch": 2.485435386463912, + "grad_norm": 1.9721946387107891e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512480 + }, + { + "epoch": 2.485483884656748, + "grad_norm": 2.5365110900565924e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512490 + }, + { + "epoch": 2.485532382849584, + "grad_norm": 2.437046759951045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512500 + }, + { + "epoch": 2.48558088104242, + "grad_norm": 2.490904762453283e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512510 + }, + { + "epoch": 2.4856293792352564, + "grad_norm": 2.6497551175452827e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512520 + }, + { + "epoch": 2.4856778774280923, + "grad_norm": 1.850673072567588e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512530 + }, + { + "epoch": 2.4857263756209282, + "grad_norm": 2.3509333857418824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512540 + }, + { + "epoch": 2.4857748738137646, + "grad_norm": 2.5017803295668273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512550 + }, + { + "epoch": 2.4858233720066005, + "grad_norm": 2.3561231898838741e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512560 + }, + { + "epoch": 2.4858718701994365, + "grad_norm": 2.2381310316177405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512570 + }, + { + "epoch": 2.485920368392273, + "grad_norm": 1.8954156644213072e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512580 + }, + { + "epoch": 2.4859688665851087, + "grad_norm": 2.3199940812901332e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512590 + }, + { + "epoch": 2.486017364777945, + "grad_norm": 2.3469745258353214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512600 + }, + { + "epoch": 2.486065862970781, + "grad_norm": 2.3725235109850473e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512610 + }, + { + "epoch": 2.486114361163617, + "grad_norm": 2.2297237478596799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512620 + }, + { + "epoch": 2.4861628593564533, + "grad_norm": 1.574174746110657e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512630 + }, + { + "epoch": 2.4862113575492892, + "grad_norm": 2.2742482030935207e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512640 + }, + { + "epoch": 2.4862598557421256, + "grad_norm": 2.24357194156255e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512650 + }, + { + "epoch": 2.4863083539349615, + "grad_norm": 2.227231590268275e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512660 + }, + { + "epoch": 2.4863568521277974, + "grad_norm": 2.1686975060219993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512670 + }, + { + "epoch": 2.486405350320634, + "grad_norm": 1.4710634843595471e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512680 + }, + { + "epoch": 2.4864538485134697, + "grad_norm": 2.15698889860505e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512690 + }, + { + "epoch": 2.486502346706306, + "grad_norm": 2.1441387332288286e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512700 + }, + { + "epoch": 2.486550844899142, + "grad_norm": 2.1164183294786199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512710 + }, + { + "epoch": 2.486599343091978, + "grad_norm": 2.1898996749314392e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512720 + }, + { + "epoch": 2.4866478412848143, + "grad_norm": 1.4117749458364415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512730 + }, + { + "epoch": 2.4866963394776502, + "grad_norm": 2.1401143612820306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512740 + }, + { + "epoch": 2.486744837670486, + "grad_norm": 2.113624333333064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512750 + }, + { + "epoch": 2.4867933358633225, + "grad_norm": 2.1397465843620012e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512760 + }, + { + "epoch": 2.4868418340561584, + "grad_norm": 2.0929887512011192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512770 + }, + { + "epoch": 2.4868903322489944, + "grad_norm": 1.2445846664377314e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512780 + }, + { + "epoch": 2.4869388304418307, + "grad_norm": 2.0453191496017098e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512790 + }, + { + "epoch": 2.4869873286346666, + "grad_norm": 2.0517403243047738e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512800 + }, + { + "epoch": 2.487035826827503, + "grad_norm": 2.1231021207768208e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512810 + }, + { + "epoch": 2.487084325020339, + "grad_norm": 2.551335001044208e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 512820 + }, + { + "epoch": 2.487132823213175, + "grad_norm": 5.19521017849911e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512830 + }, + { + "epoch": 2.4871813214060112, + "grad_norm": 1.8002323486143723e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512840 + }, + { + "epoch": 2.487229819598847, + "grad_norm": 7.1752815529180225e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512850 + }, + { + "epoch": 2.4872783177916835, + "grad_norm": 0.0001260154094779864, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 512860 + }, + { + "epoch": 2.4873268159845194, + "grad_norm": 0.00038767181104049087, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 512870 + }, + { + "epoch": 2.4873753141773554, + "grad_norm": 2.1622838630719343e-06, + "learning_rate": 0.0002, + "loss": 0.0059, + "step": 512880 + }, + { + "epoch": 2.4874238123701917, + "grad_norm": 0.010001133196055889, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 512890 + }, + { + "epoch": 2.4874723105630276, + "grad_norm": 0.0001143891568062827, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 512900 + }, + { + "epoch": 2.4875208087558636, + "grad_norm": 0.1497558206319809, + "learning_rate": 0.0002, + "loss": 0.0015, + "step": 512910 + }, + { + "epoch": 2.4875693069487, + "grad_norm": 0.00022379134315997362, + "learning_rate": 0.0002, + "loss": 0.0005, + "step": 512920 + }, + { + "epoch": 2.487617805141536, + "grad_norm": 0.4202660620212555, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 512930 + }, + { + "epoch": 2.4876663033343718, + "grad_norm": 0.00023654439428355545, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512940 + }, + { + "epoch": 2.487714801527208, + "grad_norm": 5.1788676501018927e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512950 + }, + { + "epoch": 2.487763299720044, + "grad_norm": 2.817581480485387e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512960 + }, + { + "epoch": 2.4878117979128804, + "grad_norm": 4.332192111178301e-05, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 512970 + }, + { + "epoch": 2.4878602961057164, + "grad_norm": 5.1037062803516164e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512980 + }, + { + "epoch": 2.4879087942985523, + "grad_norm": 0.037554919719696045, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 512990 + }, + { + "epoch": 2.4879572924913886, + "grad_norm": 1.8573116904008202e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513000 + }, + { + "epoch": 2.4880057906842246, + "grad_norm": 0.00341786234639585, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 513010 + }, + { + "epoch": 2.488054288877061, + "grad_norm": 3.278471194789745e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513020 + }, + { + "epoch": 2.488102787069897, + "grad_norm": 1.8293312677997164e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513030 + }, + { + "epoch": 2.4881512852627328, + "grad_norm": 1.4629015822720248e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513040 + }, + { + "epoch": 2.488199783455569, + "grad_norm": 1.54807094077114e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513050 + }, + { + "epoch": 2.488248281648405, + "grad_norm": 1.2644626622204669e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513060 + }, + { + "epoch": 2.488296779841241, + "grad_norm": 1.0850493708858266e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513070 + }, + { + "epoch": 2.4883452780340773, + "grad_norm": 1.1812342563644052e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513080 + }, + { + "epoch": 2.4883937762269133, + "grad_norm": 9.00838404049864e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513090 + }, + { + "epoch": 2.488442274419749, + "grad_norm": 9.783210771274753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513100 + }, + { + "epoch": 2.4884907726125856, + "grad_norm": 1.7432163076591678e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513110 + }, + { + "epoch": 2.4885392708054215, + "grad_norm": 7.810209353920072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513120 + }, + { + "epoch": 2.488587768998258, + "grad_norm": 9.456551197217777e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513130 + }, + { + "epoch": 2.4886362671910938, + "grad_norm": 8.647385584481526e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513140 + }, + { + "epoch": 2.4886847653839297, + "grad_norm": 1.2766112376993988e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513150 + }, + { + "epoch": 2.488733263576766, + "grad_norm": 6.8811268647550605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513160 + }, + { + "epoch": 2.488781761769602, + "grad_norm": 7.109517810022226e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513170 + }, + { + "epoch": 2.4888302599624383, + "grad_norm": 7.673601430724375e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513180 + }, + { + "epoch": 2.4888787581552743, + "grad_norm": 6.478810973931104e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513190 + }, + { + "epoch": 2.48892725634811, + "grad_norm": 5.409762707131449e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513200 + }, + { + "epoch": 2.4889757545409466, + "grad_norm": 5.755986421718262e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513210 + }, + { + "epoch": 2.4890242527337825, + "grad_norm": 5.935318313277094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513220 + }, + { + "epoch": 2.489072750926619, + "grad_norm": 7.560779067716794e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 513230 + }, + { + "epoch": 2.4891212491194548, + "grad_norm": 9.616706847737078e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513240 + }, + { + "epoch": 2.4891697473122907, + "grad_norm": 0.00014230728265829384, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513250 + }, + { + "epoch": 2.489218245505127, + "grad_norm": 7.2312036536459345e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513260 + }, + { + "epoch": 2.489266743697963, + "grad_norm": 3.658080458990298e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513270 + }, + { + "epoch": 2.489315241890799, + "grad_norm": 7.74995260144351e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513280 + }, + { + "epoch": 2.4893637400836353, + "grad_norm": 1.1781660759879742e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513290 + }, + { + "epoch": 2.489412238276471, + "grad_norm": 6.031916655047098e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513300 + }, + { + "epoch": 2.489460736469307, + "grad_norm": 7.845054824429099e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513310 + }, + { + "epoch": 2.4895092346621435, + "grad_norm": 4.974063358531566e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513320 + }, + { + "epoch": 2.4895577328549794, + "grad_norm": 1.2874520507466514e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513330 + }, + { + "epoch": 2.4896062310478158, + "grad_norm": 9.747319381858688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513340 + }, + { + "epoch": 2.4896547292406517, + "grad_norm": 4.732932666229317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513350 + }, + { + "epoch": 2.4897032274334876, + "grad_norm": 6.990951987972949e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513360 + }, + { + "epoch": 2.489751725626324, + "grad_norm": 4.657301360566635e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513370 + }, + { + "epoch": 2.48980022381916, + "grad_norm": 4.239009285811335e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513380 + }, + { + "epoch": 2.4898487220119963, + "grad_norm": 4.249341145623475e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513390 + }, + { + "epoch": 2.489897220204832, + "grad_norm": 5.800501185149187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513400 + }, + { + "epoch": 2.489945718397668, + "grad_norm": 8.032836376514751e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513410 + }, + { + "epoch": 2.4899942165905045, + "grad_norm": 4.330763204052346e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513420 + }, + { + "epoch": 2.4900427147833404, + "grad_norm": 6.1937303144077305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513430 + }, + { + "epoch": 2.4900912129761763, + "grad_norm": 3.4783149658323964e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513440 + }, + { + "epoch": 2.4901397111690127, + "grad_norm": 1.6668647731421515e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513450 + }, + { + "epoch": 2.4901882093618486, + "grad_norm": 3.4215188406960806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513460 + }, + { + "epoch": 2.4902367075546845, + "grad_norm": 3.7599138522637077e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513470 + }, + { + "epoch": 2.490285205747521, + "grad_norm": 3.4427632726874435e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513480 + }, + { + "epoch": 2.490333703940357, + "grad_norm": 3.860740889649605e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513490 + }, + { + "epoch": 2.490382202133193, + "grad_norm": 3.6941678445145953e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513500 + }, + { + "epoch": 2.490430700326029, + "grad_norm": 2.849025122486637e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513510 + }, + { + "epoch": 2.490479198518865, + "grad_norm": 2.8688584734481992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513520 + }, + { + "epoch": 2.4905276967117014, + "grad_norm": 3.2637994991091546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513530 + }, + { + "epoch": 2.4905761949045373, + "grad_norm": 3.023198587470688e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513540 + }, + { + "epoch": 2.4906246930973737, + "grad_norm": 2.723818170125014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513550 + }, + { + "epoch": 2.4906731912902096, + "grad_norm": 2.956526941488846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513560 + }, + { + "epoch": 2.4907216894830455, + "grad_norm": 2.3383981897495687e-06, + "learning_rate": 0.0002, + "loss": 0.0002, + "step": 513570 + }, + { + "epoch": 2.490770187675882, + "grad_norm": 6.03026046519517e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513580 + }, + { + "epoch": 2.490818685868718, + "grad_norm": 1.4781991012569051e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513590 + }, + { + "epoch": 2.4908671840615537, + "grad_norm": 1.8524255210650153e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513600 + }, + { + "epoch": 2.49091568225439, + "grad_norm": 7.998543878784403e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513610 + }, + { + "epoch": 2.490964180447226, + "grad_norm": 9.22648177947849e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513620 + }, + { + "epoch": 2.491012678640062, + "grad_norm": 5.660122042172588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513630 + }, + { + "epoch": 2.4910611768328983, + "grad_norm": 6.733552709192736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513640 + }, + { + "epoch": 2.491109675025734, + "grad_norm": 4.734890353574883e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513650 + }, + { + "epoch": 2.4911581732185706, + "grad_norm": 5.820877959195059e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513660 + }, + { + "epoch": 2.4912066714114065, + "grad_norm": 6.121988917584531e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513670 + }, + { + "epoch": 2.4912551696042424, + "grad_norm": 3.9515607568318956e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513680 + }, + { + "epoch": 2.491303667797079, + "grad_norm": 3.2557331905991305e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513690 + }, + { + "epoch": 2.4913521659899147, + "grad_norm": 3.420291477596038e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513700 + }, + { + "epoch": 2.491400664182751, + "grad_norm": 3.835788902506465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513710 + }, + { + "epoch": 2.491449162375587, + "grad_norm": 3.2022369396145223e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513720 + }, + { + "epoch": 2.491497660568423, + "grad_norm": 2.9431555503833806e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513730 + }, + { + "epoch": 2.4915461587612593, + "grad_norm": 3.40601604875701e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513740 + }, + { + "epoch": 2.491594656954095, + "grad_norm": 2.71178896582569e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513750 + }, + { + "epoch": 2.4916431551469316, + "grad_norm": 3.096012960668304e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513760 + }, + { + "epoch": 2.4916916533397675, + "grad_norm": 2.411556351944455e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513770 + }, + { + "epoch": 2.4917401515326034, + "grad_norm": 2.37275639847212e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513780 + }, + { + "epoch": 2.49178864972544, + "grad_norm": 2.3347081423708005e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513790 + }, + { + "epoch": 2.4918371479182757, + "grad_norm": 2.0566053535731044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513800 + }, + { + "epoch": 2.4918856461111116, + "grad_norm": 2.5612339413783047e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513810 + }, + { + "epoch": 2.491934144303948, + "grad_norm": 2.1251921680232044e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513820 + }, + { + "epoch": 2.491982642496784, + "grad_norm": 2.066066144834622e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513830 + }, + { + "epoch": 2.49203114068962, + "grad_norm": 1.8832824935088865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513840 + }, + { + "epoch": 2.492079638882456, + "grad_norm": 1.8004199091592454e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513850 + }, + { + "epoch": 2.492128137075292, + "grad_norm": 2.3382760900858557e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513860 + }, + { + "epoch": 2.4921766352681285, + "grad_norm": 2.3530731141363503e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513870 + }, + { + "epoch": 2.4922251334609644, + "grad_norm": 2.1915423076279694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513880 + }, + { + "epoch": 2.4922736316538003, + "grad_norm": 2.0580778254952747e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513890 + }, + { + "epoch": 2.4923221298466367, + "grad_norm": 1.8519441482567345e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513900 + }, + { + "epoch": 2.4923706280394726, + "grad_norm": 2.0525483250821708e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513910 + }, + { + "epoch": 2.492419126232309, + "grad_norm": 1.7442331454731175e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513920 + }, + { + "epoch": 2.492467624425145, + "grad_norm": 1.948964381881524e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513930 + }, + { + "epoch": 2.492516122617981, + "grad_norm": 1.768761535458907e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513940 + }, + { + "epoch": 2.492564620810817, + "grad_norm": 1.6443552794953575e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513950 + }, + { + "epoch": 2.492613119003653, + "grad_norm": 1.7066919326680363e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513960 + }, + { + "epoch": 2.492661617196489, + "grad_norm": 1.5069111896082177e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513970 + }, + { + "epoch": 2.4927101153893254, + "grad_norm": 1.7735425217324519e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513980 + }, + { + "epoch": 2.4927586135821613, + "grad_norm": 1.5906163071122137e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 513990 + }, + { + "epoch": 2.4928071117749973, + "grad_norm": 1.467661149945343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514000 + }, + { + "epoch": 2.4928556099678336, + "grad_norm": 1.540997800475452e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514010 + }, + { + "epoch": 2.4929041081606695, + "grad_norm": 1.609423406989663e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514020 + }, + { + "epoch": 2.492952606353506, + "grad_norm": 1.5341364587584394e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514030 + }, + { + "epoch": 2.493001104546342, + "grad_norm": 1.4334149227579474e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514040 + }, + { + "epoch": 2.4930496027391778, + "grad_norm": 1.4069087228563149e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514050 + }, + { + "epoch": 2.493098100932014, + "grad_norm": 1.453154141017876e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514060 + }, + { + "epoch": 2.49314659912485, + "grad_norm": 1.3531708873415482e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514070 + }, + { + "epoch": 2.4931950973176864, + "grad_norm": 1.4594492085961974e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514080 + }, + { + "epoch": 2.4932435955105223, + "grad_norm": 1.3692923630514997e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514090 + }, + { + "epoch": 2.4932920937033582, + "grad_norm": 1.320523324466194e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514100 + }, + { + "epoch": 2.4933405918961946, + "grad_norm": 1.3893647974327905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514110 + }, + { + "epoch": 2.4933890900890305, + "grad_norm": 1.2667866258198046e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514120 + }, + { + "epoch": 2.4934375882818665, + "grad_norm": 1.301962015531899e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514130 + }, + { + "epoch": 2.493486086474703, + "grad_norm": 1.3504726439350634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514140 + }, + { + "epoch": 2.4935345846675387, + "grad_norm": 2.5240311515517533e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514150 + }, + { + "epoch": 2.4935830828603747, + "grad_norm": 1.2826735655835364e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514160 + }, + { + "epoch": 2.493631581053211, + "grad_norm": 1.3757040733253234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514170 + }, + { + "epoch": 2.493680079246047, + "grad_norm": 1.29041006857733e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514180 + }, + { + "epoch": 2.4937285774388833, + "grad_norm": 1.1958004506595898e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514190 + }, + { + "epoch": 2.4937770756317192, + "grad_norm": 1.3108631264913129e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514200 + }, + { + "epoch": 2.493825573824555, + "grad_norm": 1.3372141438594554e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514210 + }, + { + "epoch": 2.4938740720173915, + "grad_norm": 1.1290143220321625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514220 + }, + { + "epoch": 2.4939225702102275, + "grad_norm": 1.3051485439063981e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514230 + }, + { + "epoch": 2.493971068403064, + "grad_norm": 1.24244161270326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514240 + }, + { + "epoch": 2.4940195665958997, + "grad_norm": 1.114053475248511e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514250 + }, + { + "epoch": 2.4940680647887357, + "grad_norm": 1.147102921095211e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514260 + }, + { + "epoch": 2.494116562981572, + "grad_norm": 1.0765503475340665e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514270 + }, + { + "epoch": 2.494165061174408, + "grad_norm": 1.181136667582905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514280 + }, + { + "epoch": 2.4942135593672443, + "grad_norm": 1.1163174349348992e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514290 + }, + { + "epoch": 2.4942620575600802, + "grad_norm": 1.108332412513846e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514300 + }, + { + "epoch": 2.494310555752916, + "grad_norm": 1.125707854043867e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514310 + }, + { + "epoch": 2.4943590539457525, + "grad_norm": 1.0900644156208728e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514320 + }, + { + "epoch": 2.4944075521385884, + "grad_norm": 1.0885253232117975e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514330 + }, + { + "epoch": 2.4944560503314244, + "grad_norm": 1.1531890322658e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514340 + }, + { + "epoch": 2.4945045485242607, + "grad_norm": 1.0858033192562289e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514350 + }, + { + "epoch": 2.4945530467170967, + "grad_norm": 1.1498126468723058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514360 + }, + { + "epoch": 2.4946015449099326, + "grad_norm": 1.1472590131234028e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514370 + }, + { + "epoch": 2.494650043102769, + "grad_norm": 1.1127552852485678e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514380 + }, + { + "epoch": 2.494698541295605, + "grad_norm": 1.0001033388107317e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514390 + }, + { + "epoch": 2.4947470394884412, + "grad_norm": 1.2486101468311972e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514400 + }, + { + "epoch": 2.494795537681277, + "grad_norm": 1.0732416058090166e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514410 + }, + { + "epoch": 2.494844035874113, + "grad_norm": 9.72743237070972e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514420 + }, + { + "epoch": 2.4948925340669494, + "grad_norm": 1.0613896392897004e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514430 + }, + { + "epoch": 2.4949410322597854, + "grad_norm": 9.252538006876421e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514440 + }, + { + "epoch": 2.4949895304526217, + "grad_norm": 1.3548798278861796e-06, + "learning_rate": 0.0002, + "loss": 0.0003, + "step": 514450 + }, + { + "epoch": 2.4950380286454577, + "grad_norm": 3.6360270314617082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514460 + }, + { + "epoch": 2.4950865268382936, + "grad_norm": 3.8774146560172085e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514470 + }, + { + "epoch": 2.49513502503113, + "grad_norm": 8.694375537743326e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514480 + }, + { + "epoch": 2.495183523223966, + "grad_norm": 3.896556336258072e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514490 + }, + { + "epoch": 2.495232021416802, + "grad_norm": 2.4952720195869915e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514500 + }, + { + "epoch": 2.495280519609638, + "grad_norm": 3.378739347681403e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514510 + }, + { + "epoch": 2.495329017802474, + "grad_norm": 2.3929674171085935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514520 + }, + { + "epoch": 2.49537751599531, + "grad_norm": 3.6275509955885354e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514530 + }, + { + "epoch": 2.4954260141881464, + "grad_norm": 0.00489435251802206, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 514540 + }, + { + "epoch": 2.4954745123809823, + "grad_norm": 1.221010165863845e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514550 + }, + { + "epoch": 2.4955230105738186, + "grad_norm": 3.592714165279176e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514560 + }, + { + "epoch": 2.4955715087666546, + "grad_norm": 1.2125361763537512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514570 + }, + { + "epoch": 2.4956200069594905, + "grad_norm": 1.001871055450465e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514580 + }, + { + "epoch": 2.495668505152327, + "grad_norm": 1.5153558479141793e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514590 + }, + { + "epoch": 2.4957170033451628, + "grad_norm": 1.2608060160346213e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514600 + }, + { + "epoch": 2.495765501537999, + "grad_norm": 1.2082422244930058e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514610 + }, + { + "epoch": 2.495813999730835, + "grad_norm": 1.2446993196135736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514620 + }, + { + "epoch": 2.495862497923671, + "grad_norm": 9.675383125795634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514630 + }, + { + "epoch": 2.4959109961165074, + "grad_norm": 1.0939002095256e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514640 + }, + { + "epoch": 2.4959594943093433, + "grad_norm": 1.2629426464627613e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514650 + }, + { + "epoch": 2.496007992502179, + "grad_norm": 1.1000300901287119e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514660 + }, + { + "epoch": 2.4960564906950156, + "grad_norm": 9.344201998828794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514670 + }, + { + "epoch": 2.4961049888878515, + "grad_norm": 9.677959269538405e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514680 + }, + { + "epoch": 2.496153487080688, + "grad_norm": 9.524753750156378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514690 + }, + { + "epoch": 2.4962019852735238, + "grad_norm": 1.0275001613990753e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514700 + }, + { + "epoch": 2.4962504834663597, + "grad_norm": 1.0930783673757105e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514710 + }, + { + "epoch": 2.496298981659196, + "grad_norm": 9.692639650893398e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514720 + }, + { + "epoch": 2.496347479852032, + "grad_norm": 8.709035341780691e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514730 + }, + { + "epoch": 2.496395978044868, + "grad_norm": 1.2951753660672694e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514740 + }, + { + "epoch": 2.4964444762377043, + "grad_norm": 1.1271664561718353e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514750 + }, + { + "epoch": 2.49649297443054, + "grad_norm": 1.046249053615611e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514760 + }, + { + "epoch": 2.4965414726233766, + "grad_norm": 9.5761276952544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514770 + }, + { + "epoch": 2.4965899708162125, + "grad_norm": 8.152904911185033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514780 + }, + { + "epoch": 2.4966384690090484, + "grad_norm": 9.123146469391941e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514790 + }, + { + "epoch": 2.4966869672018848, + "grad_norm": 1.443041014681512e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514800 + }, + { + "epoch": 2.4967354653947207, + "grad_norm": 9.310884934166097e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514810 + }, + { + "epoch": 2.496783963587557, + "grad_norm": 9.899064252749668e-07, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 514820 + }, + { + "epoch": 2.496832461780393, + "grad_norm": 1.2050655868733884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514830 + }, + { + "epoch": 2.496880959973229, + "grad_norm": 1.3051627547611133e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514840 + }, + { + "epoch": 2.4969294581660653, + "grad_norm": 1.361386807730014e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514850 + }, + { + "epoch": 2.496977956358901, + "grad_norm": 1.8819673641701229e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514860 + }, + { + "epoch": 2.497026454551737, + "grad_norm": 1.3249749599708593e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514870 + }, + { + "epoch": 2.4970749527445735, + "grad_norm": 9.754078291734913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514880 + }, + { + "epoch": 2.4971234509374094, + "grad_norm": 1.8081676671499736e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514890 + }, + { + "epoch": 2.4971719491302453, + "grad_norm": 2.823825070663588e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514900 + }, + { + "epoch": 2.4972204473230817, + "grad_norm": 4.380500286060851e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514910 + }, + { + "epoch": 2.4972689455159176, + "grad_norm": 1.2517497225417173e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514920 + }, + { + "epoch": 2.497317443708754, + "grad_norm": 9.85229917205288e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514930 + }, + { + "epoch": 2.49736594190159, + "grad_norm": 2.775709162960993e-06, + "learning_rate": 0.0002, + "loss": 0.0001, + "step": 514940 + }, + { + "epoch": 2.497414440094426, + "grad_norm": 3.846354502456961e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514950 + }, + { + "epoch": 2.497462938287262, + "grad_norm": 8.831286322674714e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514960 + }, + { + "epoch": 2.497511436480098, + "grad_norm": 3.4208994748041732e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514970 + }, + { + "epoch": 2.4975599346729345, + "grad_norm": 5.857830274180742e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514980 + }, + { + "epoch": 2.4976084328657704, + "grad_norm": 2.0192201191093773e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 514990 + }, + { + "epoch": 2.4976569310586063, + "grad_norm": 1.8202974843006814e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515000 + }, + { + "epoch": 2.4977054292514427, + "grad_norm": 2.6907991923508234e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515010 + }, + { + "epoch": 2.4977539274442786, + "grad_norm": 7.1488184403278865e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515020 + }, + { + "epoch": 2.4978024256371145, + "grad_norm": 1.4472337852566852e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515030 + }, + { + "epoch": 2.497850923829951, + "grad_norm": 8.231930223701056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515040 + }, + { + "epoch": 2.497899422022787, + "grad_norm": 1.2194016107969219e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515050 + }, + { + "epoch": 2.4979479202156227, + "grad_norm": 1.2986760111743934e-06, + "learning_rate": 0.0002, + "loss": 0.0004, + "step": 515060 + }, + { + "epoch": 2.497996418408459, + "grad_norm": 1.0284387826686725e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515070 + }, + { + "epoch": 2.498044916601295, + "grad_norm": 1.6497293472639285e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515080 + }, + { + "epoch": 2.4980934147941314, + "grad_norm": 1.5692407032474875e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515090 + }, + { + "epoch": 2.4981419129869673, + "grad_norm": 8.887064723239746e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515100 + }, + { + "epoch": 2.4981904111798032, + "grad_norm": 7.649104190932121e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515110 + }, + { + "epoch": 2.4982389093726396, + "grad_norm": 5.127531039761379e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515120 + }, + { + "epoch": 2.4982874075654755, + "grad_norm": 3.405885081519955e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515130 + }, + { + "epoch": 2.498335905758312, + "grad_norm": 3.902923253917834e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515140 + }, + { + "epoch": 2.498384403951148, + "grad_norm": 3.026778813364217e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515150 + }, + { + "epoch": 2.4984329021439837, + "grad_norm": 2.2455576527136145e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515160 + }, + { + "epoch": 2.49848140033682, + "grad_norm": 2.5803067273955094e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515170 + }, + { + "epoch": 2.498529898529656, + "grad_norm": 2.0570264496200252e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515180 + }, + { + "epoch": 2.498578396722492, + "grad_norm": 2.546975565564935e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515190 + }, + { + "epoch": 2.4986268949153283, + "grad_norm": 1.736360559334571e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515200 + }, + { + "epoch": 2.4986753931081642, + "grad_norm": 2.9227276172605343e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515210 + }, + { + "epoch": 2.4987238913010006, + "grad_norm": 1.5401094515254954e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515220 + }, + { + "epoch": 2.4987723894938365, + "grad_norm": 1.8084776911564404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515230 + }, + { + "epoch": 2.4988208876866724, + "grad_norm": 1.825742970140709e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515240 + }, + { + "epoch": 2.498869385879509, + "grad_norm": 1.3760418369201943e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515250 + }, + { + "epoch": 2.4989178840723447, + "grad_norm": 1.6876991821845877e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515260 + }, + { + "epoch": 2.4989663822651806, + "grad_norm": 1.1606815633058432e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515270 + }, + { + "epoch": 2.499014880458017, + "grad_norm": 1.1795858654295444e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515280 + }, + { + "epoch": 2.499063378650853, + "grad_norm": 1.1976735549978912e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515290 + }, + { + "epoch": 2.4991118768436893, + "grad_norm": 1.1811149533969e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515300 + }, + { + "epoch": 2.499160375036525, + "grad_norm": 1.1739714409486623e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515310 + }, + { + "epoch": 2.499208873229361, + "grad_norm": 1.0285027656209422e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515320 + }, + { + "epoch": 2.4992573714221975, + "grad_norm": 1.1015432619387866e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515330 + }, + { + "epoch": 2.4993058696150334, + "grad_norm": 1.1366314538463484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515340 + }, + { + "epoch": 2.49935436780787, + "grad_norm": 9.982203437175485e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515350 + }, + { + "epoch": 2.4994028660007057, + "grad_norm": 9.270410146200447e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515360 + }, + { + "epoch": 2.4994513641935416, + "grad_norm": 1.115662030315434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515370 + }, + { + "epoch": 2.499499862386378, + "grad_norm": 9.01290832189261e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515380 + }, + { + "epoch": 2.499548360579214, + "grad_norm": 8.699965974301449e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515390 + }, + { + "epoch": 2.49959685877205, + "grad_norm": 9.867934522844735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515400 + }, + { + "epoch": 2.499645356964886, + "grad_norm": 8.301290677081852e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515410 + }, + { + "epoch": 2.499693855157722, + "grad_norm": 9.102723197429441e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515420 + }, + { + "epoch": 2.499742353350558, + "grad_norm": 9.018909281621745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515430 + }, + { + "epoch": 2.4997908515433944, + "grad_norm": 1.1558612413864466e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515440 + }, + { + "epoch": 2.4998393497362303, + "grad_norm": 8.637766768515576e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515450 + }, + { + "epoch": 2.4998878479290667, + "grad_norm": 8.689533501637925e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515460 + }, + { + "epoch": 2.4999363461219026, + "grad_norm": 9.171316719402967e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515470 + }, + { + "epoch": 2.4999848443147386, + "grad_norm": 8.957734394243744e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515480 + }, + { + "epoch": 2.500033342507575, + "grad_norm": 7.975698963491595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515490 + }, + { + "epoch": 2.500081840700411, + "grad_norm": 7.990800554580346e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515500 + }, + { + "epoch": 2.500130338893247, + "grad_norm": 7.751367547825794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515510 + }, + { + "epoch": 2.500178837086083, + "grad_norm": 7.071704999361828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515520 + }, + { + "epoch": 2.500227335278919, + "grad_norm": 6.924941544639296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515530 + }, + { + "epoch": 2.5002758334717554, + "grad_norm": 7.044675953693513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515540 + }, + { + "epoch": 2.5003243316645913, + "grad_norm": 7.867884619372489e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515550 + }, + { + "epoch": 2.5003728298574277, + "grad_norm": 6.637487786065321e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515560 + }, + { + "epoch": 2.5004213280502636, + "grad_norm": 7.187211963355367e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515570 + }, + { + "epoch": 2.5004698262430995, + "grad_norm": 6.678206432297884e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515580 + }, + { + "epoch": 2.5005183244359355, + "grad_norm": 6.797453693252464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515590 + }, + { + "epoch": 2.500566822628772, + "grad_norm": 6.329840971375233e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515600 + }, + { + "epoch": 2.5006153208216078, + "grad_norm": 5.668931066793448e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515610 + }, + { + "epoch": 2.500663819014444, + "grad_norm": 5.804873808301636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515620 + }, + { + "epoch": 2.50071231720728, + "grad_norm": 6.517437327602238e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515630 + }, + { + "epoch": 2.500760815400116, + "grad_norm": 5.576990815825411e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515640 + }, + { + "epoch": 2.5008093135929523, + "grad_norm": 6.779222303521237e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515650 + }, + { + "epoch": 2.5008578117857883, + "grad_norm": 6.147523095023644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515660 + }, + { + "epoch": 2.5009063099786246, + "grad_norm": 5.480858931150578e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515670 + }, + { + "epoch": 2.5009548081714605, + "grad_norm": 5.54675068542565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515680 + }, + { + "epoch": 2.5010033063642965, + "grad_norm": 6.286175562308927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515690 + }, + { + "epoch": 2.501051804557133, + "grad_norm": 5.374984652917192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515700 + }, + { + "epoch": 2.5011003027499688, + "grad_norm": 6.491048338830296e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515710 + }, + { + "epoch": 2.501148800942805, + "grad_norm": 5.757931944572192e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515720 + }, + { + "epoch": 2.501197299135641, + "grad_norm": 5.679648893419653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515730 + }, + { + "epoch": 2.501245797328477, + "grad_norm": 6.60007231090276e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515740 + }, + { + "epoch": 2.501294295521313, + "grad_norm": 5.458440455186064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515750 + }, + { + "epoch": 2.5013427937141492, + "grad_norm": 5.032346734878956e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515760 + }, + { + "epoch": 2.501391291906985, + "grad_norm": 7.554519925179193e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515770 + }, + { + "epoch": 2.5014397900998215, + "grad_norm": 5.214592420088593e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515780 + }, + { + "epoch": 2.5014882882926575, + "grad_norm": 5.771893825112784e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515790 + }, + { + "epoch": 2.5015367864854934, + "grad_norm": 5.257265911495779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515800 + }, + { + "epoch": 2.5015852846783297, + "grad_norm": 6.092442959015898e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515810 + }, + { + "epoch": 2.5016337828711657, + "grad_norm": 4.7148688508968917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515820 + }, + { + "epoch": 2.501682281064002, + "grad_norm": 4.99435770962009e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515830 + }, + { + "epoch": 2.501730779256838, + "grad_norm": 5.205820912124182e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515840 + }, + { + "epoch": 2.501779277449674, + "grad_norm": 4.854958888245164e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515850 + }, + { + "epoch": 2.5018277756425102, + "grad_norm": 4.774495891979313e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515860 + }, + { + "epoch": 2.501876273835346, + "grad_norm": 4.804685431736289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515870 + }, + { + "epoch": 2.5019247720281825, + "grad_norm": 5.031708383285149e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515880 + }, + { + "epoch": 2.5019732702210185, + "grad_norm": 6.725542789354222e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515890 + }, + { + "epoch": 2.5020217684138544, + "grad_norm": 4.706264462583931e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515900 + }, + { + "epoch": 2.5020702666066903, + "grad_norm": 4.6343907911250426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515910 + }, + { + "epoch": 2.5021187647995267, + "grad_norm": 4.7163661065496854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515920 + }, + { + "epoch": 2.5021672629923626, + "grad_norm": 4.905576247438148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515930 + }, + { + "epoch": 2.502215761185199, + "grad_norm": 4.7632775590500387e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515940 + }, + { + "epoch": 2.502264259378035, + "grad_norm": 4.982120458407735e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515950 + }, + { + "epoch": 2.502312757570871, + "grad_norm": 4.436425626863638e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515960 + }, + { + "epoch": 2.502361255763707, + "grad_norm": 4.594142239966459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515970 + }, + { + "epoch": 2.502409753956543, + "grad_norm": 4.4943323018742376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515980 + }, + { + "epoch": 2.5024582521493794, + "grad_norm": 4.227502756748436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 515990 + }, + { + "epoch": 2.5025067503422154, + "grad_norm": 5.015352257942141e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516000 + }, + { + "epoch": 2.5025552485350513, + "grad_norm": 4.535860114174284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516010 + }, + { + "epoch": 2.5026037467278877, + "grad_norm": 4.0834868286765413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516020 + }, + { + "epoch": 2.5026522449207236, + "grad_norm": 4.5763835032630595e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516030 + }, + { + "epoch": 2.50270074311356, + "grad_norm": 4.292653841275751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516040 + }, + { + "epoch": 2.502749241306396, + "grad_norm": 4.043513683882338e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516050 + }, + { + "epoch": 2.502797739499232, + "grad_norm": 4.2726702531581395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516060 + }, + { + "epoch": 2.502846237692068, + "grad_norm": 4.176448840098601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516070 + }, + { + "epoch": 2.502894735884904, + "grad_norm": 4.091647269888199e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516080 + }, + { + "epoch": 2.5029432340777404, + "grad_norm": 4.781696816280601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516090 + }, + { + "epoch": 2.5029917322705764, + "grad_norm": 4.209765336327109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516100 + }, + { + "epoch": 2.5030402304634123, + "grad_norm": 4.014080161596212e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516110 + }, + { + "epoch": 2.503088728656248, + "grad_norm": 4.0505887000108487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516120 + }, + { + "epoch": 2.5031372268490846, + "grad_norm": 3.8181622130650794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516130 + }, + { + "epoch": 2.5031857250419205, + "grad_norm": 4.07852752459803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516140 + }, + { + "epoch": 2.503234223234757, + "grad_norm": 3.957694048040139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516150 + }, + { + "epoch": 2.503282721427593, + "grad_norm": 3.86587316825171e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516160 + }, + { + "epoch": 2.5033312196204287, + "grad_norm": 3.976715277076437e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516170 + }, + { + "epoch": 2.503379717813265, + "grad_norm": 4.380397911063483e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516180 + }, + { + "epoch": 2.503428216006101, + "grad_norm": 3.9705096810394025e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516190 + }, + { + "epoch": 2.5034767141989374, + "grad_norm": 3.8955826653364056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516200 + }, + { + "epoch": 2.5035252123917733, + "grad_norm": 4.1417987972636183e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516210 + }, + { + "epoch": 2.503573710584609, + "grad_norm": 4.0724677319303737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516220 + }, + { + "epoch": 2.5036222087774456, + "grad_norm": 4.278632843579544e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516230 + }, + { + "epoch": 2.5036707069702815, + "grad_norm": 4.317842297041352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516240 + }, + { + "epoch": 2.503719205163118, + "grad_norm": 4.1855290078274265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516250 + }, + { + "epoch": 2.5037677033559538, + "grad_norm": 4.18163978110897e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516260 + }, + { + "epoch": 2.5038162015487897, + "grad_norm": 3.327747322146024e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516270 + }, + { + "epoch": 2.5038646997416256, + "grad_norm": 3.686267007196875e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516280 + }, + { + "epoch": 2.503913197934462, + "grad_norm": 3.9106075178096944e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516290 + }, + { + "epoch": 2.503961696127298, + "grad_norm": 3.3942259847208334e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516300 + }, + { + "epoch": 2.5040101943201343, + "grad_norm": 3.920601159279613e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516310 + }, + { + "epoch": 2.50405869251297, + "grad_norm": 3.5667980569087376e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516320 + }, + { + "epoch": 2.504107190705806, + "grad_norm": 3.694106283091969e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516330 + }, + { + "epoch": 2.5041556888986425, + "grad_norm": 3.4826564387913095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516340 + }, + { + "epoch": 2.5042041870914784, + "grad_norm": 3.2165118568627804e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516350 + }, + { + "epoch": 2.5042526852843148, + "grad_norm": 3.325932595998893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516360 + }, + { + "epoch": 2.5043011834771507, + "grad_norm": 3.926146519006579e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516370 + }, + { + "epoch": 2.5043496816699866, + "grad_norm": 3.5991493518849893e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516380 + }, + { + "epoch": 2.504398179862823, + "grad_norm": 3.1981738857211894e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516390 + }, + { + "epoch": 2.504446678055659, + "grad_norm": 3.8225493881327566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516400 + }, + { + "epoch": 2.5044951762484953, + "grad_norm": 3.545632409895916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516410 + }, + { + "epoch": 2.504543674441331, + "grad_norm": 3.5300715239827696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516420 + }, + { + "epoch": 2.504592172634167, + "grad_norm": 3.658524860838952e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516430 + }, + { + "epoch": 2.504640670827003, + "grad_norm": 3.4277033478247176e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516440 + }, + { + "epoch": 2.5046891690198394, + "grad_norm": 3.1313086878981267e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516450 + }, + { + "epoch": 2.5047376672126753, + "grad_norm": 3.7845495626243064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516460 + }, + { + "epoch": 2.5047861654055117, + "grad_norm": 3.102514085639996e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516470 + }, + { + "epoch": 2.5048346635983476, + "grad_norm": 3.3646995234448696e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516480 + }, + { + "epoch": 2.5048831617911835, + "grad_norm": 3.079330213040521e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516490 + }, + { + "epoch": 2.50493165998402, + "grad_norm": 3.0549270491064817e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516500 + }, + { + "epoch": 2.504980158176856, + "grad_norm": 3.160950541314378e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516510 + }, + { + "epoch": 2.505028656369692, + "grad_norm": 2.892728900860675e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516520 + }, + { + "epoch": 2.505077154562528, + "grad_norm": 3.083441697526723e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516530 + }, + { + "epoch": 2.505125652755364, + "grad_norm": 3.307889357984095e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516540 + }, + { + "epoch": 2.5051741509482004, + "grad_norm": 3.877929088957899e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516550 + }, + { + "epoch": 2.5052226491410363, + "grad_norm": 3.012426361692633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516560 + }, + { + "epoch": 2.5052711473338727, + "grad_norm": 2.900171978126309e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516570 + }, + { + "epoch": 2.5053196455267086, + "grad_norm": 3.0191387168088113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516580 + }, + { + "epoch": 2.5053681437195445, + "grad_norm": 4.221746507937496e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516590 + }, + { + "epoch": 2.505416641912381, + "grad_norm": 3.7062355318084883e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516600 + }, + { + "epoch": 2.505465140105217, + "grad_norm": 3.072453012009646e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516610 + }, + { + "epoch": 2.505513638298053, + "grad_norm": 2.959075686703727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516620 + }, + { + "epoch": 2.505562136490889, + "grad_norm": 2.8474562441260787e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516630 + }, + { + "epoch": 2.505610634683725, + "grad_norm": 2.859200378679816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516640 + }, + { + "epoch": 2.505659132876561, + "grad_norm": 2.6449612278156565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516650 + }, + { + "epoch": 2.5057076310693973, + "grad_norm": 3.6831985994467686e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516660 + }, + { + "epoch": 2.5057561292622332, + "grad_norm": 2.981338980134751e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516670 + }, + { + "epoch": 2.5058046274550696, + "grad_norm": 2.8063220725016436e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516680 + }, + { + "epoch": 2.5058531256479055, + "grad_norm": 2.5562326300132554e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516690 + }, + { + "epoch": 2.5059016238407414, + "grad_norm": 2.598250716800976e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516700 + }, + { + "epoch": 2.505950122033578, + "grad_norm": 2.6096239480466465e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516710 + }, + { + "epoch": 2.5059986202264137, + "grad_norm": 2.6997676627615874e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516720 + }, + { + "epoch": 2.50604711841925, + "grad_norm": 2.606898306112271e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516730 + }, + { + "epoch": 2.506095616612086, + "grad_norm": 3.2872659971872054e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516740 + }, + { + "epoch": 2.506144114804922, + "grad_norm": 2.91046490019653e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516750 + }, + { + "epoch": 2.5061926129977583, + "grad_norm": 2.56306094570391e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516760 + }, + { + "epoch": 2.5062411111905942, + "grad_norm": 2.704164785427565e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516770 + }, + { + "epoch": 2.5062896093834306, + "grad_norm": 2.6888238835454104e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516780 + }, + { + "epoch": 2.5063381075762665, + "grad_norm": 2.5786519586290524e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516790 + }, + { + "epoch": 2.5063866057691024, + "grad_norm": 3.082661237385764e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516800 + }, + { + "epoch": 2.5064351039619384, + "grad_norm": 2.6201666969427606e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516810 + }, + { + "epoch": 2.5064836021547747, + "grad_norm": 2.185531116083439e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516820 + }, + { + "epoch": 2.5065321003476106, + "grad_norm": 2.435355668239936e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516830 + }, + { + "epoch": 2.506580598540447, + "grad_norm": 4.027852355648065e-06, + "learning_rate": 0.0002, + "loss": 0.0008, + "step": 516840 + }, + { + "epoch": 2.506629096733283, + "grad_norm": 0.000246424344368279, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516850 + }, + { + "epoch": 2.506677594926119, + "grad_norm": 2.1723493773606606e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516860 + }, + { + "epoch": 2.5067260931189552, + "grad_norm": 1.235697345691733e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516870 + }, + { + "epoch": 2.506774591311791, + "grad_norm": 1.1386025107640307e-05, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516880 + }, + { + "epoch": 2.5068230895046275, + "grad_norm": 8.340195563505404e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516890 + }, + { + "epoch": 2.5068715876974634, + "grad_norm": 7.010988611000357e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516900 + }, + { + "epoch": 2.5069200858902994, + "grad_norm": 6.480053343693726e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516910 + }, + { + "epoch": 2.5069685840831357, + "grad_norm": 6.111430593591649e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516920 + }, + { + "epoch": 2.5070170822759716, + "grad_norm": 6.285783456405625e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516930 + }, + { + "epoch": 2.507065580468808, + "grad_norm": 4.9745231081033126e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516940 + }, + { + "epoch": 2.507114078661644, + "grad_norm": 4.869435088039609e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516950 + }, + { + "epoch": 2.50716257685448, + "grad_norm": 4.47168895334471e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516960 + }, + { + "epoch": 2.5072110750473158, + "grad_norm": 4.317011189414188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516970 + }, + { + "epoch": 2.507259573240152, + "grad_norm": 4.606332822731929e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516980 + }, + { + "epoch": 2.5073080714329885, + "grad_norm": 3.873554760502884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 516990 + }, + { + "epoch": 2.5073565696258244, + "grad_norm": 3.7355609947553603e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517000 + }, + { + "epoch": 2.5074050678186603, + "grad_norm": 3.4847264487325447e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517010 + }, + { + "epoch": 2.5074535660114963, + "grad_norm": 3.4770375805237563e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517020 + }, + { + "epoch": 2.5075020642043326, + "grad_norm": 3.7587753922707634e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517030 + }, + { + "epoch": 2.5075505623971686, + "grad_norm": 3.039868715859484e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517040 + }, + { + "epoch": 2.507599060590005, + "grad_norm": 2.9469294986483874e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517050 + }, + { + "epoch": 2.507647558782841, + "grad_norm": 2.9234347493911628e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517060 + }, + { + "epoch": 2.5076960569756768, + "grad_norm": 2.670804178706021e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517070 + }, + { + "epoch": 2.507744555168513, + "grad_norm": 2.822864189511165e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517080 + }, + { + "epoch": 2.507793053361349, + "grad_norm": 2.4332305201824056e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517090 + }, + { + "epoch": 2.5078415515541854, + "grad_norm": 2.2537219592777546e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517100 + }, + { + "epoch": 2.5078900497470213, + "grad_norm": 2.1303405901562655e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517110 + }, + { + "epoch": 2.5079385479398573, + "grad_norm": 2.0743821096402826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517120 + }, + { + "epoch": 2.5079870461326936, + "grad_norm": 2.249721774205682e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517130 + }, + { + "epoch": 2.5080355443255296, + "grad_norm": 2.019159410338034e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517140 + }, + { + "epoch": 2.508084042518366, + "grad_norm": 1.8521433275964228e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517150 + }, + { + "epoch": 2.508132540711202, + "grad_norm": 1.7988677427638322e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517160 + }, + { + "epoch": 2.5081810389040378, + "grad_norm": 1.717801296763355e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517170 + }, + { + "epoch": 2.5082295370968737, + "grad_norm": 1.8027672012976836e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517180 + }, + { + "epoch": 2.50827803528971, + "grad_norm": 1.6702521179468022e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517190 + }, + { + "epoch": 2.508326533482546, + "grad_norm": 1.7254335489269579e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517200 + }, + { + "epoch": 2.5083750316753823, + "grad_norm": 1.584425945111434e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517210 + }, + { + "epoch": 2.5084235298682183, + "grad_norm": 1.468997766096436e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517220 + }, + { + "epoch": 2.508472028061054, + "grad_norm": 1.6497917840752052e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517230 + }, + { + "epoch": 2.5085205262538905, + "grad_norm": 1.4100436374064884e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517240 + }, + { + "epoch": 2.5085690244467265, + "grad_norm": 1.408645516676188e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517250 + }, + { + "epoch": 2.508617522639563, + "grad_norm": 1.3127425972925266e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517260 + }, + { + "epoch": 2.5086660208323988, + "grad_norm": 1.2702040521617164e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517270 + }, + { + "epoch": 2.5087145190252347, + "grad_norm": 1.4463798834185582e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517280 + }, + { + "epoch": 2.508763017218071, + "grad_norm": 1.2615789728442905e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517290 + }, + { + "epoch": 2.508811515410907, + "grad_norm": 1.2306691132835113e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517300 + }, + { + "epoch": 2.5088600136037433, + "grad_norm": 1.1809762554548797e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517310 + }, + { + "epoch": 2.5089085117965793, + "grad_norm": 1.208070216307533e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517320 + }, + { + "epoch": 2.508957009989415, + "grad_norm": 1.2368426496323082e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517330 + }, + { + "epoch": 2.509005508182251, + "grad_norm": 1.1563024600036442e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517340 + }, + { + "epoch": 2.5090540063750875, + "grad_norm": 1.0848624469872448e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517350 + }, + { + "epoch": 2.5091025045679234, + "grad_norm": 1.0991783483405015e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517360 + }, + { + "epoch": 2.5091510027607598, + "grad_norm": 1.0743694929260528e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517370 + }, + { + "epoch": 2.5091995009535957, + "grad_norm": 1.1612571597652277e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517380 + }, + { + "epoch": 2.5092479991464316, + "grad_norm": 1.0313647180737462e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517390 + }, + { + "epoch": 2.509296497339268, + "grad_norm": 1.2399387969708187e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517400 + }, + { + "epoch": 2.509344995532104, + "grad_norm": 1.0230129419142031e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517410 + }, + { + "epoch": 2.5093934937249402, + "grad_norm": 1.0055016446131049e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517420 + }, + { + "epoch": 2.509441991917776, + "grad_norm": 1.0233726470687543e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517430 + }, + { + "epoch": 2.509490490110612, + "grad_norm": 1.010768073683721e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517440 + }, + { + "epoch": 2.5095389883034485, + "grad_norm": 9.205961646330252e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517450 + }, + { + "epoch": 2.5095874864962844, + "grad_norm": 9.165741516881099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517460 + }, + { + "epoch": 2.5096359846891207, + "grad_norm": 9.012444479594706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517470 + }, + { + "epoch": 2.5096844828819567, + "grad_norm": 1.0242464441034826e-06, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517480 + }, + { + "epoch": 2.5097329810747926, + "grad_norm": 8.832211051412742e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517490 + }, + { + "epoch": 2.509781479267629, + "grad_norm": 8.56424833273195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517500 + }, + { + "epoch": 2.509829977460465, + "grad_norm": 8.658143997308798e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517510 + }, + { + "epoch": 2.5098784756533012, + "grad_norm": 8.263843938038917e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517520 + }, + { + "epoch": 2.509926973846137, + "grad_norm": 9.148234312306158e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517530 + }, + { + "epoch": 2.509975472038973, + "grad_norm": 8.07261415047833e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517540 + }, + { + "epoch": 2.510023970231809, + "grad_norm": 7.869763862800028e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517550 + }, + { + "epoch": 2.5100724684246454, + "grad_norm": 7.888461368565913e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517560 + }, + { + "epoch": 2.5101209666174813, + "grad_norm": 8.031360607674287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517570 + }, + { + "epoch": 2.5101694648103177, + "grad_norm": 8.274398055618803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517580 + }, + { + "epoch": 2.5102179630031536, + "grad_norm": 7.65481559028558e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517590 + }, + { + "epoch": 2.5102664611959895, + "grad_norm": 7.511734452236851e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517600 + }, + { + "epoch": 2.510314959388826, + "grad_norm": 7.390380005745101e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517610 + }, + { + "epoch": 2.510363457581662, + "grad_norm": 7.162311135289201e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517620 + }, + { + "epoch": 2.510411955774498, + "grad_norm": 8.019761139621551e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517630 + }, + { + "epoch": 2.510460453967334, + "grad_norm": 7.080296882122639e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517640 + }, + { + "epoch": 2.51050895216017, + "grad_norm": 6.932972382855951e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517650 + }, + { + "epoch": 2.5105574503530064, + "grad_norm": 6.878473186588963e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517660 + }, + { + "epoch": 2.5106059485458423, + "grad_norm": 6.572198572030175e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517670 + }, + { + "epoch": 2.5106544467386787, + "grad_norm": 7.377718134193856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517680 + }, + { + "epoch": 2.5107029449315146, + "grad_norm": 6.769444098608801e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517690 + }, + { + "epoch": 2.5107514431243505, + "grad_norm": 6.722085572619108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517700 + }, + { + "epoch": 2.5107999413171864, + "grad_norm": 6.40913356164674e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517710 + }, + { + "epoch": 2.510848439510023, + "grad_norm": 7.478144539163623e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517720 + }, + { + "epoch": 2.5108969377028587, + "grad_norm": 6.730312520630832e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517730 + }, + { + "epoch": 2.510945435895695, + "grad_norm": 6.163245984680543e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517740 + }, + { + "epoch": 2.510993934088531, + "grad_norm": 5.887435463591828e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517750 + }, + { + "epoch": 2.511042432281367, + "grad_norm": 5.794012167825713e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517760 + }, + { + "epoch": 2.5110909304742033, + "grad_norm": 6.097639015933964e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517770 + }, + { + "epoch": 2.511139428667039, + "grad_norm": 6.020287059982365e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517780 + }, + { + "epoch": 2.5111879268598756, + "grad_norm": 5.59717761916545e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517790 + }, + { + "epoch": 2.5112364250527115, + "grad_norm": 5.616374778583122e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517800 + }, + { + "epoch": 2.5112849232455474, + "grad_norm": 5.487831913342234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517810 + }, + { + "epoch": 2.511333421438384, + "grad_norm": 5.319176352713839e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517820 + }, + { + "epoch": 2.5113819196312197, + "grad_norm": 5.470046176014876e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517830 + }, + { + "epoch": 2.511430417824056, + "grad_norm": 5.071383384347428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517840 + }, + { + "epoch": 2.511478916016892, + "grad_norm": 5.073011379863601e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517850 + }, + { + "epoch": 2.511527414209728, + "grad_norm": 5.174860007173265e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517860 + }, + { + "epoch": 2.511575912402564, + "grad_norm": 5.124803692524438e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517870 + }, + { + "epoch": 2.5116244105954, + "grad_norm": 5.053638005847461e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517880 + }, + { + "epoch": 2.511672908788236, + "grad_norm": 4.95472420425358e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517890 + }, + { + "epoch": 2.5117214069810725, + "grad_norm": 4.816644150196225e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517900 + }, + { + "epoch": 2.5117699051739084, + "grad_norm": 4.863766207563458e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517910 + }, + { + "epoch": 2.5118184033667443, + "grad_norm": 4.6170529799383075e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517920 + }, + { + "epoch": 2.5118669015595807, + "grad_norm": 4.930986960971495e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517930 + }, + { + "epoch": 2.5119153997524166, + "grad_norm": 4.4960464151699853e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517940 + }, + { + "epoch": 2.511963897945253, + "grad_norm": 4.6119424723656266e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517950 + }, + { + "epoch": 2.512012396138089, + "grad_norm": 4.438009852947289e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517960 + }, + { + "epoch": 2.512060894330925, + "grad_norm": 4.415309149408131e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517970 + }, + { + "epoch": 2.512109392523761, + "grad_norm": 4.4363719098328147e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517980 + }, + { + "epoch": 2.512157890716597, + "grad_norm": 4.2984294168491033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 517990 + }, + { + "epoch": 2.5122063889094335, + "grad_norm": 4.109344899916323e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518000 + }, + { + "epoch": 2.5122548871022694, + "grad_norm": 3.9849308564043895e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518010 + }, + { + "epoch": 2.5123033852951053, + "grad_norm": 4.17062778979016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518020 + }, + { + "epoch": 2.5123518834879417, + "grad_norm": 4.1189909438799077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518030 + }, + { + "epoch": 2.5124003816807776, + "grad_norm": 4.0656050259713084e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518040 + }, + { + "epoch": 2.512448879873614, + "grad_norm": 4.095835208772769e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518050 + }, + { + "epoch": 2.51249737806645, + "grad_norm": 4.17237572492013e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518060 + }, + { + "epoch": 2.512545876259286, + "grad_norm": 3.997688509116415e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518070 + }, + { + "epoch": 2.5125943744521217, + "grad_norm": 4.1104775050371245e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518080 + }, + { + "epoch": 2.512642872644958, + "grad_norm": 3.6734476793753856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518090 + }, + { + "epoch": 2.512691370837794, + "grad_norm": 3.760646336559148e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518100 + }, + { + "epoch": 2.5127398690306304, + "grad_norm": 3.64309698852594e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518110 + }, + { + "epoch": 2.5127883672234663, + "grad_norm": 3.6614642340282444e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518120 + }, + { + "epoch": 2.5128368654163022, + "grad_norm": 3.991725634477916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518130 + }, + { + "epoch": 2.5128853636091386, + "grad_norm": 4.0060669448394037e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518140 + }, + { + "epoch": 2.5129338618019745, + "grad_norm": 3.589295545225468e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518150 + }, + { + "epoch": 2.512982359994811, + "grad_norm": 3.4498759760026587e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518160 + }, + { + "epoch": 2.513030858187647, + "grad_norm": 3.6406106573849684e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518170 + }, + { + "epoch": 2.5130793563804827, + "grad_norm": 3.583655541206099e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518180 + }, + { + "epoch": 2.513127854573319, + "grad_norm": 3.2637100844112865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518190 + }, + { + "epoch": 2.513176352766155, + "grad_norm": 3.380210387149418e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518200 + }, + { + "epoch": 2.5132248509589914, + "grad_norm": 3.389209268789273e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518210 + }, + { + "epoch": 2.5132733491518273, + "grad_norm": 3.421783389967459e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518220 + }, + { + "epoch": 2.5133218473446632, + "grad_norm": 3.4315107200200146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518230 + }, + { + "epoch": 2.513370345537499, + "grad_norm": 3.318859000955854e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518240 + }, + { + "epoch": 2.5134188437303355, + "grad_norm": 3.2912521419348195e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518250 + }, + { + "epoch": 2.5134673419231714, + "grad_norm": 3.244336710395146e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518260 + }, + { + "epoch": 2.513515840116008, + "grad_norm": 3.113574962299026e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518270 + }, + { + "epoch": 2.5135643383088437, + "grad_norm": 3.388313132290932e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518280 + }, + { + "epoch": 2.5136128365016797, + "grad_norm": 3.124327747627831e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518290 + }, + { + "epoch": 2.513661334694516, + "grad_norm": 3.187544734828407e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518300 + }, + { + "epoch": 2.513709832887352, + "grad_norm": 3.2048251341620926e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518310 + }, + { + "epoch": 2.5137583310801883, + "grad_norm": 3.2341458222617803e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518320 + }, + { + "epoch": 2.5138068292730242, + "grad_norm": 3.2427939800072636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518330 + }, + { + "epoch": 2.51385532746586, + "grad_norm": 3.037139038042369e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518340 + }, + { + "epoch": 2.5139038256586965, + "grad_norm": 3.0192407507456664e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518350 + }, + { + "epoch": 2.5139523238515324, + "grad_norm": 2.9796257194902864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518360 + }, + { + "epoch": 2.514000822044369, + "grad_norm": 2.9458811923177564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518370 + }, + { + "epoch": 2.5140493202372047, + "grad_norm": 3.1121112442633603e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518380 + }, + { + "epoch": 2.5140978184300407, + "grad_norm": 2.8934559281879046e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518390 + }, + { + "epoch": 2.5141463166228766, + "grad_norm": 2.909007719154033e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518400 + }, + { + "epoch": 2.514194814815713, + "grad_norm": 3.049437111712905e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518410 + }, + { + "epoch": 2.514243313008549, + "grad_norm": 3.038671252397762e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518420 + }, + { + "epoch": 2.5142918112013852, + "grad_norm": 2.9976084192639973e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518430 + }, + { + "epoch": 2.514340309394221, + "grad_norm": 2.852541172160272e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518440 + }, + { + "epoch": 2.514388807587057, + "grad_norm": 2.8519514216895914e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518450 + }, + { + "epoch": 2.5144373057798934, + "grad_norm": 2.8156699727333034e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518460 + }, + { + "epoch": 2.5144858039727294, + "grad_norm": 2.921802320088318e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518470 + }, + { + "epoch": 2.5145343021655657, + "grad_norm": 2.9014117330916633e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518480 + }, + { + "epoch": 2.5145828003584016, + "grad_norm": 2.9295486569935747e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518490 + }, + { + "epoch": 2.5146312985512376, + "grad_norm": 2.7984052053398045e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518500 + }, + { + "epoch": 2.514679796744074, + "grad_norm": 2.776344558697019e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518510 + }, + { + "epoch": 2.51472829493691, + "grad_norm": 2.712536684157385e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518520 + }, + { + "epoch": 2.5147767931297462, + "grad_norm": 2.820162023908779e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518530 + }, + { + "epoch": 2.514825291322582, + "grad_norm": 2.7554895609682717e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518540 + }, + { + "epoch": 2.514873789515418, + "grad_norm": 2.812231514326413e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518550 + }, + { + "epoch": 2.5149222877082544, + "grad_norm": 2.6243804995829123e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518560 + }, + { + "epoch": 2.5149707859010904, + "grad_norm": 2.706815678266139e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518570 + }, + { + "epoch": 2.5150192840939267, + "grad_norm": 2.6971858346769295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518580 + }, + { + "epoch": 2.5150677822867626, + "grad_norm": 2.5808947157202056e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518590 + }, + { + "epoch": 2.5151162804795986, + "grad_norm": 2.5451237206652877e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518600 + }, + { + "epoch": 2.5151647786724345, + "grad_norm": 2.653465571711422e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518610 + }, + { + "epoch": 2.515213276865271, + "grad_norm": 2.572463984051865e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518620 + }, + { + "epoch": 2.5152617750581068, + "grad_norm": 2.6993967594535206e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518630 + }, + { + "epoch": 2.515310273250943, + "grad_norm": 2.592455814465211e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518640 + }, + { + "epoch": 2.515358771443779, + "grad_norm": 2.5340264642181864e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518650 + }, + { + "epoch": 2.515407269636615, + "grad_norm": 2.55954262229352e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518660 + }, + { + "epoch": 2.5154557678294514, + "grad_norm": 2.4792930730654916e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518670 + }, + { + "epoch": 2.5155042660222873, + "grad_norm": 2.707411113078706e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518680 + }, + { + "epoch": 2.5155527642151236, + "grad_norm": 2.4124585706886137e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518690 + }, + { + "epoch": 2.5156012624079596, + "grad_norm": 2.4405377985203813e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518700 + }, + { + "epoch": 2.5156497606007955, + "grad_norm": 2.464636850163515e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518710 + }, + { + "epoch": 2.515698258793632, + "grad_norm": 2.44277430283546e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518720 + }, + { + "epoch": 2.5157467569864678, + "grad_norm": 2.838352486378426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518730 + }, + { + "epoch": 2.515795255179304, + "grad_norm": 2.3958057226991514e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518740 + }, + { + "epoch": 2.51584375337214, + "grad_norm": 2.4270414655802597e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518750 + }, + { + "epoch": 2.515892251564976, + "grad_norm": 2.3671543658565497e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518760 + }, + { + "epoch": 2.515940749757812, + "grad_norm": 2.384945787525794e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518770 + }, + { + "epoch": 2.5159892479506483, + "grad_norm": 2.59977241512388e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518780 + }, + { + "epoch": 2.516037746143484, + "grad_norm": 2.4141044718817284e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518790 + }, + { + "epoch": 2.5160862443363206, + "grad_norm": 2.3279250171981403e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518800 + }, + { + "epoch": 2.5161347425291565, + "grad_norm": 2.3204526655717927e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518810 + }, + { + "epoch": 2.5161832407219924, + "grad_norm": 3.0836233122499834e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518820 + }, + { + "epoch": 2.5162317389148288, + "grad_norm": 2.46497990019634e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518830 + }, + { + "epoch": 2.5162802371076647, + "grad_norm": 2.281443727270016e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518840 + }, + { + "epoch": 2.516328735300501, + "grad_norm": 2.288285401164103e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518850 + }, + { + "epoch": 2.516377233493337, + "grad_norm": 2.286428895104109e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518860 + }, + { + "epoch": 2.516425731686173, + "grad_norm": 2.2773664909436775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518870 + }, + { + "epoch": 2.5164742298790093, + "grad_norm": 2.359901571935552e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518880 + }, + { + "epoch": 2.516522728071845, + "grad_norm": 2.3013842564978404e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518890 + }, + { + "epoch": 2.5165712262646815, + "grad_norm": 2.1590466303678113e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518900 + }, + { + "epoch": 2.5166197244575175, + "grad_norm": 2.2308657321445935e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518910 + }, + { + "epoch": 2.5166682226503534, + "grad_norm": 2.2944068689412234e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518920 + }, + { + "epoch": 2.5167167208431893, + "grad_norm": 2.3809025151422247e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518930 + }, + { + "epoch": 2.5167652190360257, + "grad_norm": 2.2297561486084305e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518940 + }, + { + "epoch": 2.5168137172288616, + "grad_norm": 2.1720228460253566e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518950 + }, + { + "epoch": 2.516862215421698, + "grad_norm": 2.2253605891364714e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518960 + }, + { + "epoch": 2.516910713614534, + "grad_norm": 2.0397419575601816e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518970 + }, + { + "epoch": 2.51695921180737, + "grad_norm": 2.1780819281502772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518980 + }, + { + "epoch": 2.517007710000206, + "grad_norm": 2.0519381394024094e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 518990 + }, + { + "epoch": 2.517056208193042, + "grad_norm": 2.1844610387233843e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519000 + }, + { + "epoch": 2.5171047063858785, + "grad_norm": 2.0432651126611745e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519010 + }, + { + "epoch": 2.5171532045787144, + "grad_norm": 2.0270633172003727e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519020 + }, + { + "epoch": 2.5172017027715503, + "grad_norm": 2.117954380764786e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519030 + }, + { + "epoch": 2.5172502009643867, + "grad_norm": 1.9543516316389287e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519040 + }, + { + "epoch": 2.5172986991572226, + "grad_norm": 1.832267741974647e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519050 + }, + { + "epoch": 2.517347197350059, + "grad_norm": 1.8248374544782564e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519060 + }, + { + "epoch": 2.517395695542895, + "grad_norm": 1.9963538022693683e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519070 + }, + { + "epoch": 2.517444193735731, + "grad_norm": 2.0106189424495824e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519080 + }, + { + "epoch": 2.517492691928567, + "grad_norm": 1.9702886788763863e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519090 + }, + { + "epoch": 2.517541190121403, + "grad_norm": 1.961374067604993e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519100 + }, + { + "epoch": 2.5175896883142395, + "grad_norm": 1.901359922840129e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519110 + }, + { + "epoch": 2.5176381865070754, + "grad_norm": 1.7610791758215782e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519120 + }, + { + "epoch": 2.5176866846999113, + "grad_norm": 1.8625695474838722e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519130 + }, + { + "epoch": 2.5177351828927472, + "grad_norm": 1.901126154280064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519140 + }, + { + "epoch": 2.5177836810855836, + "grad_norm": 1.8361065201588644e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519150 + }, + { + "epoch": 2.5178321792784195, + "grad_norm": 1.803084330731508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519160 + }, + { + "epoch": 2.517880677471256, + "grad_norm": 1.7614206626603846e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519170 + }, + { + "epoch": 2.517929175664092, + "grad_norm": 1.817961532424306e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519180 + }, + { + "epoch": 2.5179776738569277, + "grad_norm": 1.7052794021310547e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519190 + }, + { + "epoch": 2.518026172049764, + "grad_norm": 1.651249164069668e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519200 + }, + { + "epoch": 2.5180746702426, + "grad_norm": 1.6363929944418487e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519210 + }, + { + "epoch": 2.5181231684354364, + "grad_norm": 1.6799667434952426e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519220 + }, + { + "epoch": 2.5181716666282723, + "grad_norm": 1.5989060386800702e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519230 + }, + { + "epoch": 2.518220164821108, + "grad_norm": 1.524734045688092e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519240 + }, + { + "epoch": 2.5182686630139446, + "grad_norm": 1.5799875541233632e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519250 + }, + { + "epoch": 2.5183171612067805, + "grad_norm": 1.5253571916673536e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519260 + }, + { + "epoch": 2.518365659399617, + "grad_norm": 1.5960699784045573e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519270 + }, + { + "epoch": 2.518414157592453, + "grad_norm": 1.6140177194756689e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519280 + }, + { + "epoch": 2.5184626557852887, + "grad_norm": 1.4359491729010188e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519290 + }, + { + "epoch": 2.5185111539781246, + "grad_norm": 1.4248662694171799e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519300 + }, + { + "epoch": 2.518559652170961, + "grad_norm": 1.4145869897674856e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519310 + }, + { + "epoch": 2.518608150363797, + "grad_norm": 1.5064112801610463e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519320 + }, + { + "epoch": 2.5186566485566333, + "grad_norm": 1.4377251034147775e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519330 + }, + { + "epoch": 2.518705146749469, + "grad_norm": 1.3775451179753873e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519340 + }, + { + "epoch": 2.518753644942305, + "grad_norm": 1.3642727481055772e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519350 + }, + { + "epoch": 2.5188021431351415, + "grad_norm": 1.47830903074464e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519360 + }, + { + "epoch": 2.5188506413279774, + "grad_norm": 1.3850900870693295e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519370 + }, + { + "epoch": 2.518899139520814, + "grad_norm": 1.4399974190837384e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519380 + }, + { + "epoch": 2.5189476377136497, + "grad_norm": 1.3541237819936214e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519390 + }, + { + "epoch": 2.5189961359064856, + "grad_norm": 1.3253878705654643e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519400 + }, + { + "epoch": 2.519044634099322, + "grad_norm": 1.1640599240081428e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519410 + }, + { + "epoch": 2.519093132292158, + "grad_norm": 1.2555213402265508e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519420 + }, + { + "epoch": 2.5191416304849943, + "grad_norm": 1.1605281002857737e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519430 + }, + { + "epoch": 2.51919012867783, + "grad_norm": 1.3139221266555978e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519440 + }, + { + "epoch": 2.519238626870666, + "grad_norm": 1.1966959334586136e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519450 + }, + { + "epoch": 2.519287125063502, + "grad_norm": 1.2563651807795395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519460 + }, + { + "epoch": 2.5193356232563384, + "grad_norm": 1.2479887345762108e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519470 + }, + { + "epoch": 2.5193841214491743, + "grad_norm": 1.2786814806986513e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519480 + }, + { + "epoch": 2.5194326196420107, + "grad_norm": 1.1014401479769731e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519490 + }, + { + "epoch": 2.5194811178348466, + "grad_norm": 1.1971052060744114e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519500 + }, + { + "epoch": 2.5195296160276826, + "grad_norm": 1.1256742027399014e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519510 + }, + { + "epoch": 2.519578114220519, + "grad_norm": 1.0666374805623491e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519520 + }, + { + "epoch": 2.519626612413355, + "grad_norm": 1.1593434123824409e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519530 + }, + { + "epoch": 2.519675110606191, + "grad_norm": 9.85853247925661e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519540 + }, + { + "epoch": 2.519723608799027, + "grad_norm": 1.0586256848910125e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519550 + }, + { + "epoch": 2.519772106991863, + "grad_norm": 1.0680678741437077e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519560 + }, + { + "epoch": 2.5198206051846994, + "grad_norm": 9.702706194048005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519570 + }, + { + "epoch": 2.5198691033775353, + "grad_norm": 1.0555247342836083e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519580 + }, + { + "epoch": 2.5199176015703717, + "grad_norm": 9.798596067867038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519590 + }, + { + "epoch": 2.5199660997632076, + "grad_norm": 1.091218990723064e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519600 + }, + { + "epoch": 2.5200145979560435, + "grad_norm": 9.974709058724329e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519610 + }, + { + "epoch": 2.52006309614888, + "grad_norm": 1.0930374116924213e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519620 + }, + { + "epoch": 2.520111594341716, + "grad_norm": 1.0013820883614244e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519630 + }, + { + "epoch": 2.520160092534552, + "grad_norm": 9.955486746093811e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519640 + }, + { + "epoch": 2.520208590727388, + "grad_norm": 9.60508046432551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519650 + }, + { + "epoch": 2.520257088920224, + "grad_norm": 1.0111326531614395e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519660 + }, + { + "epoch": 2.52030558711306, + "grad_norm": 9.998936434385541e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519670 + }, + { + "epoch": 2.5203540853058963, + "grad_norm": 9.841284764888769e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519680 + }, + { + "epoch": 2.5204025834987323, + "grad_norm": 9.757322771974941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519690 + }, + { + "epoch": 2.5204510816915686, + "grad_norm": 9.854411331389201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519700 + }, + { + "epoch": 2.5204995798844045, + "grad_norm": 9.199340667009892e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519710 + }, + { + "epoch": 2.5205480780772405, + "grad_norm": 9.774280584906592e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519720 + }, + { + "epoch": 2.520596576270077, + "grad_norm": 9.236792664069071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519730 + }, + { + "epoch": 2.5206450744629127, + "grad_norm": 9.382031151972114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519740 + }, + { + "epoch": 2.520693572655749, + "grad_norm": 9.024351044217838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519750 + }, + { + "epoch": 2.520742070848585, + "grad_norm": 9.752085361469653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519760 + }, + { + "epoch": 2.520790569041421, + "grad_norm": 9.529576061595435e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519770 + }, + { + "epoch": 2.5208390672342573, + "grad_norm": 7.965886084093654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519780 + }, + { + "epoch": 2.5208875654270932, + "grad_norm": 9.331981942750645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519790 + }, + { + "epoch": 2.5209360636199296, + "grad_norm": 9.386558019741642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519800 + }, + { + "epoch": 2.5209845618127655, + "grad_norm": 8.835549891728078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519810 + }, + { + "epoch": 2.5210330600056015, + "grad_norm": 8.8224780370183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519820 + }, + { + "epoch": 2.5210815581984374, + "grad_norm": 9.080372365133371e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519830 + }, + { + "epoch": 2.5211300563912737, + "grad_norm": 8.115846839018559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519840 + }, + { + "epoch": 2.5211785545841097, + "grad_norm": 8.888997626854689e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519850 + }, + { + "epoch": 2.521227052776946, + "grad_norm": 8.685133678909551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519860 + }, + { + "epoch": 2.521275550969782, + "grad_norm": 8.353236324865065e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519870 + }, + { + "epoch": 2.521324049162618, + "grad_norm": 8.545600138631926e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519880 + }, + { + "epoch": 2.5213725473554542, + "grad_norm": 8.412490615228307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519890 + }, + { + "epoch": 2.52142104554829, + "grad_norm": 8.224697012337856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519900 + }, + { + "epoch": 2.5214695437411265, + "grad_norm": 8.588131095166318e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519910 + }, + { + "epoch": 2.5215180419339625, + "grad_norm": 8.447999988447918e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519920 + }, + { + "epoch": 2.5215665401267984, + "grad_norm": 7.942424673501591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519930 + }, + { + "epoch": 2.5216150383196347, + "grad_norm": 8.770023640636282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519940 + }, + { + "epoch": 2.5216635365124707, + "grad_norm": 8.568844833689582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519950 + }, + { + "epoch": 2.521712034705307, + "grad_norm": 7.991821604491633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519960 + }, + { + "epoch": 2.521760532898143, + "grad_norm": 8.309549315299591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519970 + }, + { + "epoch": 2.521809031090979, + "grad_norm": 8.321631383978456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519980 + }, + { + "epoch": 2.521857529283815, + "grad_norm": 7.613694918973124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 519990 + }, + { + "epoch": 2.521906027476651, + "grad_norm": 7.678806923649972e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520000 + }, + { + "epoch": 2.521954525669487, + "grad_norm": 7.741640928315974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520010 + }, + { + "epoch": 2.5220030238623234, + "grad_norm": 8.011707564037351e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520020 + }, + { + "epoch": 2.5220515220551594, + "grad_norm": 8.189087452592503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520030 + }, + { + "epoch": 2.5221000202479953, + "grad_norm": 7.744284857835737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520040 + }, + { + "epoch": 2.5221485184408317, + "grad_norm": 7.735979323797437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520050 + }, + { + "epoch": 2.5221970166336676, + "grad_norm": 7.495867748730234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520060 + }, + { + "epoch": 2.522245514826504, + "grad_norm": 8.106194115953258e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520070 + }, + { + "epoch": 2.52229401301934, + "grad_norm": 7.745309460460703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520080 + }, + { + "epoch": 2.522342511212176, + "grad_norm": 8.21960668417887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520090 + }, + { + "epoch": 2.522391009405012, + "grad_norm": 7.927699385845699e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520100 + }, + { + "epoch": 2.522439507597848, + "grad_norm": 7.663285828130029e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520110 + }, + { + "epoch": 2.5224880057906844, + "grad_norm": 7.973799398541814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520120 + }, + { + "epoch": 2.5225365039835204, + "grad_norm": 7.685967062798227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520130 + }, + { + "epoch": 2.5225850021763563, + "grad_norm": 7.740786145404854e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520140 + }, + { + "epoch": 2.5226335003691926, + "grad_norm": 7.310863026077641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520150 + }, + { + "epoch": 2.5226819985620286, + "grad_norm": 7.336875285091082e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520160 + }, + { + "epoch": 2.522730496754865, + "grad_norm": 7.163379223129596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520170 + }, + { + "epoch": 2.522778994947701, + "grad_norm": 8.250058414205341e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520180 + }, + { + "epoch": 2.522827493140537, + "grad_norm": 7.578810112818246e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520190 + }, + { + "epoch": 2.5228759913333727, + "grad_norm": 7.693871140190822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520200 + }, + { + "epoch": 2.522924489526209, + "grad_norm": 7.024032555591475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520210 + }, + { + "epoch": 2.522972987719045, + "grad_norm": 7.444166527648122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520220 + }, + { + "epoch": 2.5230214859118814, + "grad_norm": 7.529762058311462e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520230 + }, + { + "epoch": 2.5230699841047173, + "grad_norm": 7.277812841266496e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520240 + }, + { + "epoch": 2.523118482297553, + "grad_norm": 7.467246376791081e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520250 + }, + { + "epoch": 2.5231669804903896, + "grad_norm": 6.93104738047623e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520260 + }, + { + "epoch": 2.5232154786832255, + "grad_norm": 7.096096510395e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520270 + }, + { + "epoch": 2.523263976876062, + "grad_norm": 7.328206663714809e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520280 + }, + { + "epoch": 2.5233124750688978, + "grad_norm": 7.254674727619204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520290 + }, + { + "epoch": 2.5233609732617337, + "grad_norm": 7.920033340269583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520300 + }, + { + "epoch": 2.52340947145457, + "grad_norm": 7.300651105879297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520310 + }, + { + "epoch": 2.523457969647406, + "grad_norm": 7.220229747417761e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520320 + }, + { + "epoch": 2.5235064678402424, + "grad_norm": 7.726829664989054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520330 + }, + { + "epoch": 2.5235549660330783, + "grad_norm": 7.219515651968322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520340 + }, + { + "epoch": 2.523603464225914, + "grad_norm": 7.069077412324987e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520350 + }, + { + "epoch": 2.52365196241875, + "grad_norm": 7.03449742900375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520360 + }, + { + "epoch": 2.5237004606115865, + "grad_norm": 7.003529134408382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520370 + }, + { + "epoch": 2.5237489588044224, + "grad_norm": 7.065875706757652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520380 + }, + { + "epoch": 2.5237974569972588, + "grad_norm": 7.111285071914608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520390 + }, + { + "epoch": 2.5238459551900947, + "grad_norm": 6.796258134045274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520400 + }, + { + "epoch": 2.5238944533829306, + "grad_norm": 6.551731956960793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520410 + }, + { + "epoch": 2.523942951575767, + "grad_norm": 6.738516589166466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520420 + }, + { + "epoch": 2.523991449768603, + "grad_norm": 6.858987688929119e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520430 + }, + { + "epoch": 2.5240399479614393, + "grad_norm": 6.428746246456285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520440 + }, + { + "epoch": 2.524088446154275, + "grad_norm": 7.0342679237001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520450 + }, + { + "epoch": 2.524136944347111, + "grad_norm": 6.705810307039428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520460 + }, + { + "epoch": 2.5241854425399475, + "grad_norm": 6.783230332985113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520470 + }, + { + "epoch": 2.5242339407327834, + "grad_norm": 6.995297496814601e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520480 + }, + { + "epoch": 2.5242824389256198, + "grad_norm": 6.689777620749737e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520490 + }, + { + "epoch": 2.5243309371184557, + "grad_norm": 6.680247821577723e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520500 + }, + { + "epoch": 2.5243794353112916, + "grad_norm": 6.551821485345499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520510 + }, + { + "epoch": 2.5244279335041275, + "grad_norm": 6.591352530449512e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520520 + }, + { + "epoch": 2.524476431696964, + "grad_norm": 6.645978345432013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520530 + }, + { + "epoch": 2.5245249298898, + "grad_norm": 6.726352808072988e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520540 + }, + { + "epoch": 2.524573428082636, + "grad_norm": 6.542403241382999e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520550 + }, + { + "epoch": 2.524621926275472, + "grad_norm": 6.58983125845225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520560 + }, + { + "epoch": 2.524670424468308, + "grad_norm": 6.63809061052234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520570 + }, + { + "epoch": 2.5247189226611444, + "grad_norm": 6.695638887777022e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520580 + }, + { + "epoch": 2.5247674208539803, + "grad_norm": 6.54276348655003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520590 + }, + { + "epoch": 2.5248159190468167, + "grad_norm": 6.973363042561687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520600 + }, + { + "epoch": 2.5248644172396526, + "grad_norm": 6.38147312770343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520610 + }, + { + "epoch": 2.5249129154324885, + "grad_norm": 6.72756428343746e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520620 + }, + { + "epoch": 2.524961413625325, + "grad_norm": 6.364054883079007e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520630 + }, + { + "epoch": 2.525009911818161, + "grad_norm": 6.620168591098263e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520640 + }, + { + "epoch": 2.525058410010997, + "grad_norm": 6.482878234237432e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520650 + }, + { + "epoch": 2.525106908203833, + "grad_norm": 6.541053920727791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520660 + }, + { + "epoch": 2.525155406396669, + "grad_norm": 6.41410693447142e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520670 + }, + { + "epoch": 2.5252039045895054, + "grad_norm": 6.895655957350755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520680 + }, + { + "epoch": 2.5252524027823413, + "grad_norm": 6.317022638313574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520690 + }, + { + "epoch": 2.5253009009751777, + "grad_norm": 6.314684952712923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520700 + }, + { + "epoch": 2.5253493991680136, + "grad_norm": 6.545776898292388e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520710 + }, + { + "epoch": 2.5253978973608495, + "grad_norm": 6.307399758043175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520720 + }, + { + "epoch": 2.5254463955536854, + "grad_norm": 6.430879295749037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520730 + }, + { + "epoch": 2.525494893746522, + "grad_norm": 6.285972631303594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520740 + }, + { + "epoch": 2.5255433919393577, + "grad_norm": 6.317105771813658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520750 + }, + { + "epoch": 2.525591890132194, + "grad_norm": 6.140997044212781e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520760 + }, + { + "epoch": 2.52564038832503, + "grad_norm": 6.609779745758715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520770 + }, + { + "epoch": 2.525688886517866, + "grad_norm": 6.16315034562831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520780 + }, + { + "epoch": 2.5257373847107023, + "grad_norm": 6.280222919485823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520790 + }, + { + "epoch": 2.5257858829035382, + "grad_norm": 6.302980182226747e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520800 + }, + { + "epoch": 2.5258343810963746, + "grad_norm": 6.463359625286103e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520810 + }, + { + "epoch": 2.5258828792892105, + "grad_norm": 6.179227796110354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520820 + }, + { + "epoch": 2.5259313774820464, + "grad_norm": 6.175282862841414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520830 + }, + { + "epoch": 2.525979875674883, + "grad_norm": 6.312222922133515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520840 + }, + { + "epoch": 2.5260283738677187, + "grad_norm": 6.074879621564833e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520850 + }, + { + "epoch": 2.526076872060555, + "grad_norm": 6.114034789561629e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520860 + }, + { + "epoch": 2.526125370253391, + "grad_norm": 6.079880421339112e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520870 + }, + { + "epoch": 2.526173868446227, + "grad_norm": 5.997635810217616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520880 + }, + { + "epoch": 2.526222366639063, + "grad_norm": 6.11730257560339e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520890 + }, + { + "epoch": 2.526270864831899, + "grad_norm": 6.101191729612765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520900 + }, + { + "epoch": 2.526319363024735, + "grad_norm": 5.99390475031214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520910 + }, + { + "epoch": 2.5263678612175715, + "grad_norm": 6.156104603860513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520920 + }, + { + "epoch": 2.5264163594104074, + "grad_norm": 6.355727322215898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520930 + }, + { + "epoch": 2.5264648576032434, + "grad_norm": 5.941243941265384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520940 + }, + { + "epoch": 2.5265133557960797, + "grad_norm": 5.89767594760815e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520950 + }, + { + "epoch": 2.5265618539889156, + "grad_norm": 6.435246291403018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520960 + }, + { + "epoch": 2.526610352181752, + "grad_norm": 6.039861943918368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520970 + }, + { + "epoch": 2.526658850374588, + "grad_norm": 6.67933832687595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520980 + }, + { + "epoch": 2.526707348567424, + "grad_norm": 5.836355398969317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 520990 + }, + { + "epoch": 2.52675584676026, + "grad_norm": 6.10605752626725e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521000 + }, + { + "epoch": 2.526804344953096, + "grad_norm": 6.064250612780597e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521010 + }, + { + "epoch": 2.5268528431459325, + "grad_norm": 5.7667165265229414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521020 + }, + { + "epoch": 2.5269013413387684, + "grad_norm": 5.771575928292805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521030 + }, + { + "epoch": 2.5269498395316043, + "grad_norm": 6.165777222122415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521040 + }, + { + "epoch": 2.5269983377244403, + "grad_norm": 5.719340379073401e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521050 + }, + { + "epoch": 2.5270468359172766, + "grad_norm": 6.137207009260237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521060 + }, + { + "epoch": 2.5270953341101126, + "grad_norm": 5.860063367890689e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521070 + }, + { + "epoch": 2.527143832302949, + "grad_norm": 5.778322886840215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521080 + }, + { + "epoch": 2.527192330495785, + "grad_norm": 5.72660532327518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521090 + }, + { + "epoch": 2.5272408286886208, + "grad_norm": 6.620049219918656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521100 + }, + { + "epoch": 2.527289326881457, + "grad_norm": 5.940372460599974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521110 + }, + { + "epoch": 2.527337825074293, + "grad_norm": 5.9234000815422405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521120 + }, + { + "epoch": 2.5273863232671294, + "grad_norm": 5.642004197170536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521130 + }, + { + "epoch": 2.5274348214599653, + "grad_norm": 5.867178032303855e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521140 + }, + { + "epoch": 2.5274833196528013, + "grad_norm": 5.675081027334272e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521150 + }, + { + "epoch": 2.5275318178456376, + "grad_norm": 5.6611149545915396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521160 + }, + { + "epoch": 2.5275803160384736, + "grad_norm": 5.895996579852181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521170 + }, + { + "epoch": 2.52762881423131, + "grad_norm": 5.474144160189098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521180 + }, + { + "epoch": 2.527677312424146, + "grad_norm": 5.966335692164648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521190 + }, + { + "epoch": 2.5277258106169818, + "grad_norm": 5.641058464789239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521200 + }, + { + "epoch": 2.527774308809818, + "grad_norm": 5.531453695084565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521210 + }, + { + "epoch": 2.527822807002654, + "grad_norm": 5.950534287535447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521220 + }, + { + "epoch": 2.5278713051954904, + "grad_norm": 5.5464237647129266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521230 + }, + { + "epoch": 2.5279198033883263, + "grad_norm": 1.3186816261168133e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521240 + }, + { + "epoch": 2.5279683015811623, + "grad_norm": 5.580859507858804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521250 + }, + { + "epoch": 2.528016799773998, + "grad_norm": 5.86246464706619e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521260 + }, + { + "epoch": 2.5280652979668345, + "grad_norm": 5.695203952882366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521270 + }, + { + "epoch": 2.5281137961596705, + "grad_norm": 5.539856928749032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521280 + }, + { + "epoch": 2.528162294352507, + "grad_norm": 5.5786365749099787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521290 + }, + { + "epoch": 2.5282107925453428, + "grad_norm": 5.733226160486993e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521300 + }, + { + "epoch": 2.5282592907381787, + "grad_norm": 5.5397865850181915e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521310 + }, + { + "epoch": 2.528307788931015, + "grad_norm": 5.632034927316454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521320 + }, + { + "epoch": 2.528356287123851, + "grad_norm": 5.6137825055202484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521330 + }, + { + "epoch": 2.5284047853166873, + "grad_norm": 5.544778147736906e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521340 + }, + { + "epoch": 2.5284532835095233, + "grad_norm": 5.500629285393188e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521350 + }, + { + "epoch": 2.528501781702359, + "grad_norm": 5.532567826094237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521360 + }, + { + "epoch": 2.5285502798951955, + "grad_norm": 5.8292023652484204e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521370 + }, + { + "epoch": 2.5285987780880315, + "grad_norm": 5.560688975947414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521380 + }, + { + "epoch": 2.528647276280868, + "grad_norm": 5.3932978971715784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521390 + }, + { + "epoch": 2.5286957744737038, + "grad_norm": 5.570762340312285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521400 + }, + { + "epoch": 2.5287442726665397, + "grad_norm": 5.508902134465643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521410 + }, + { + "epoch": 2.5287927708593756, + "grad_norm": 5.300952921061253e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521420 + }, + { + "epoch": 2.528841269052212, + "grad_norm": 5.562469240771861e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521430 + }, + { + "epoch": 2.528889767245048, + "grad_norm": 5.398177904680779e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521440 + }, + { + "epoch": 2.5289382654378842, + "grad_norm": 5.471964570347154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521450 + }, + { + "epoch": 2.52898676363072, + "grad_norm": 5.3943963962410635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521460 + }, + { + "epoch": 2.529035261823556, + "grad_norm": 5.421933124694078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521470 + }, + { + "epoch": 2.5290837600163925, + "grad_norm": 5.261796331978985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521480 + }, + { + "epoch": 2.5291322582092284, + "grad_norm": 5.442488415496882e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521490 + }, + { + "epoch": 2.5291807564020647, + "grad_norm": 5.412434944673805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521500 + }, + { + "epoch": 2.5292292545949007, + "grad_norm": 5.5307712187868674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521510 + }, + { + "epoch": 2.5292777527877366, + "grad_norm": 6.761972315416642e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521520 + }, + { + "epoch": 2.529326250980573, + "grad_norm": 5.3578506964413464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521530 + }, + { + "epoch": 2.529374749173409, + "grad_norm": 5.448302786703607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521540 + }, + { + "epoch": 2.5294232473662452, + "grad_norm": 5.3787903908641965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521550 + }, + { + "epoch": 2.529471745559081, + "grad_norm": 5.416741188923879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521560 + }, + { + "epoch": 2.529520243751917, + "grad_norm": 5.2877524581163016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521570 + }, + { + "epoch": 2.529568741944753, + "grad_norm": 5.35394875100792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521580 + }, + { + "epoch": 2.5296172401375894, + "grad_norm": 5.2817050288922474e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521590 + }, + { + "epoch": 2.5296657383304257, + "grad_norm": 5.401429703510985e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521600 + }, + { + "epoch": 2.5297142365232617, + "grad_norm": 5.45802052442923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521610 + }, + { + "epoch": 2.5297627347160976, + "grad_norm": 5.289371785011099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521620 + }, + { + "epoch": 2.5298112329089335, + "grad_norm": 5.3042029435346194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521630 + }, + { + "epoch": 2.52985973110177, + "grad_norm": 5.340388753438674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521640 + }, + { + "epoch": 2.529908229294606, + "grad_norm": 5.3142812816986407e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521650 + }, + { + "epoch": 2.529956727487442, + "grad_norm": 5.272372405329406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521660 + }, + { + "epoch": 2.530005225680278, + "grad_norm": 5.534664282436097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521670 + }, + { + "epoch": 2.530053723873114, + "grad_norm": 5.3086694151716074e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521680 + }, + { + "epoch": 2.5301022220659504, + "grad_norm": 5.3737757355065696e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521690 + }, + { + "epoch": 2.5301507202587863, + "grad_norm": 5.201506425578373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521700 + }, + { + "epoch": 2.5301992184516227, + "grad_norm": 5.157048121873231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521710 + }, + { + "epoch": 2.5302477166444586, + "grad_norm": 5.348681142436362e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521720 + }, + { + "epoch": 2.5302962148372945, + "grad_norm": 5.110206302560982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521730 + }, + { + "epoch": 2.530344713030131, + "grad_norm": 5.2203475320311554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521740 + }, + { + "epoch": 2.530393211222967, + "grad_norm": 5.4291536599748724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521750 + }, + { + "epoch": 2.530441709415803, + "grad_norm": 5.342503683891664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521760 + }, + { + "epoch": 2.530490207608639, + "grad_norm": 4.9793108303219924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521770 + }, + { + "epoch": 2.530538705801475, + "grad_norm": 5.246158707450377e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521780 + }, + { + "epoch": 2.530587203994311, + "grad_norm": 5.250274881518635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521790 + }, + { + "epoch": 2.5306357021871473, + "grad_norm": 5.172129391439739e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521800 + }, + { + "epoch": 2.530684200379983, + "grad_norm": 5.12177003031411e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521810 + }, + { + "epoch": 2.5307326985728196, + "grad_norm": 5.165339089785448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521820 + }, + { + "epoch": 2.5307811967656555, + "grad_norm": 5.0527546591183636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521830 + }, + { + "epoch": 2.5308296949584914, + "grad_norm": 5.19805460896805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521840 + }, + { + "epoch": 2.530878193151328, + "grad_norm": 5.093437493997044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521850 + }, + { + "epoch": 2.5309266913441637, + "grad_norm": 5.083408893824526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521860 + }, + { + "epoch": 2.530975189537, + "grad_norm": 5.199436259317736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521870 + }, + { + "epoch": 2.531023687729836, + "grad_norm": 4.855542456994044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521880 + }, + { + "epoch": 2.531072185922672, + "grad_norm": 5.0636252524327574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521890 + }, + { + "epoch": 2.5311206841155083, + "grad_norm": 5.04561619152355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521900 + }, + { + "epoch": 2.531169182308344, + "grad_norm": 4.9957488812424344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521910 + }, + { + "epoch": 2.5312176805011806, + "grad_norm": 4.9097280907517415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521920 + }, + { + "epoch": 2.5312661786940165, + "grad_norm": 4.91807412572598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521930 + }, + { + "epoch": 2.5313146768868524, + "grad_norm": 4.8679069664103736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521940 + }, + { + "epoch": 2.5313631750796883, + "grad_norm": 4.890312155225729e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521950 + }, + { + "epoch": 2.5314116732725247, + "grad_norm": 4.954836185788736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521960 + }, + { + "epoch": 2.5314601714653606, + "grad_norm": 5.0610985624643945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521970 + }, + { + "epoch": 2.531508669658197, + "grad_norm": 4.8098705462962243e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521980 + }, + { + "epoch": 2.531557167851033, + "grad_norm": 4.959269261917143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 521990 + }, + { + "epoch": 2.531605666043869, + "grad_norm": 5.005876957397959e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522000 + }, + { + "epoch": 2.531654164236705, + "grad_norm": 4.787930052430056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522010 + }, + { + "epoch": 2.531702662429541, + "grad_norm": 5.1232703413006675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522020 + }, + { + "epoch": 2.5317511606223775, + "grad_norm": 5.000225655749091e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522030 + }, + { + "epoch": 2.5317996588152134, + "grad_norm": 4.9073822339096296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522040 + }, + { + "epoch": 2.5318481570080493, + "grad_norm": 4.891416693908468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522050 + }, + { + "epoch": 2.5318966552008857, + "grad_norm": 4.818807042283879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522060 + }, + { + "epoch": 2.5319451533937216, + "grad_norm": 4.854640422990997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522070 + }, + { + "epoch": 2.531993651586558, + "grad_norm": 4.817809085011504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522080 + }, + { + "epoch": 2.532042149779394, + "grad_norm": 5.085386334258146e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522090 + }, + { + "epoch": 2.53209064797223, + "grad_norm": 5.0404757701016933e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522100 + }, + { + "epoch": 2.532139146165066, + "grad_norm": 4.869218628300587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522110 + }, + { + "epoch": 2.532187644357902, + "grad_norm": 4.768017802803115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522120 + }, + { + "epoch": 2.5322361425507385, + "grad_norm": 4.764932626244445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522130 + }, + { + "epoch": 2.5322846407435744, + "grad_norm": 4.719871071756643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522140 + }, + { + "epoch": 2.5323331389364103, + "grad_norm": 4.8476781699946514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522150 + }, + { + "epoch": 2.5323816371292462, + "grad_norm": 4.588122592963373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522160 + }, + { + "epoch": 2.5324301353220826, + "grad_norm": 4.6793093844144096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522170 + }, + { + "epoch": 2.5324786335149185, + "grad_norm": 4.6364188932557227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522180 + }, + { + "epoch": 2.532527131707755, + "grad_norm": 4.600494563078428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522190 + }, + { + "epoch": 2.532575629900591, + "grad_norm": 4.825257704510477e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522200 + }, + { + "epoch": 2.5326241280934267, + "grad_norm": 4.885831117462658e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522210 + }, + { + "epoch": 2.532672626286263, + "grad_norm": 4.7334992814285215e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522220 + }, + { + "epoch": 2.532721124479099, + "grad_norm": 4.593205105152265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522230 + }, + { + "epoch": 2.5327696226719354, + "grad_norm": 4.841136203026508e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522240 + }, + { + "epoch": 2.5328181208647713, + "grad_norm": 4.693995236948467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522250 + }, + { + "epoch": 2.5328666190576072, + "grad_norm": 4.632483907585083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522260 + }, + { + "epoch": 2.5329151172504436, + "grad_norm": 4.6712351320366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522270 + }, + { + "epoch": 2.5329636154432795, + "grad_norm": 4.828990540772793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522280 + }, + { + "epoch": 2.533012113636116, + "grad_norm": 4.696928712633053e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522290 + }, + { + "epoch": 2.533060611828952, + "grad_norm": 4.666966901822889e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522300 + }, + { + "epoch": 2.5331091100217877, + "grad_norm": 4.668153863462976e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522310 + }, + { + "epoch": 2.5331576082146237, + "grad_norm": 4.647817419822786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522320 + }, + { + "epoch": 2.53320610640746, + "grad_norm": 4.415547394387431e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522330 + }, + { + "epoch": 2.533254604600296, + "grad_norm": 4.663651509417832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522340 + }, + { + "epoch": 2.5333031027931323, + "grad_norm": 4.664068953275091e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522350 + }, + { + "epoch": 2.5333516009859682, + "grad_norm": 4.6362355732298965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522360 + }, + { + "epoch": 2.533400099178804, + "grad_norm": 4.6398493935839724e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522370 + }, + { + "epoch": 2.5334485973716405, + "grad_norm": 7.113105482403626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522380 + }, + { + "epoch": 2.5334970955644764, + "grad_norm": 4.606796366601884e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522390 + }, + { + "epoch": 2.533545593757313, + "grad_norm": 4.5594735098575256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522400 + }, + { + "epoch": 2.5335940919501487, + "grad_norm": 4.669911035648511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522410 + }, + { + "epoch": 2.5336425901429847, + "grad_norm": 4.6580225898651406e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522420 + }, + { + "epoch": 2.533691088335821, + "grad_norm": 4.6700684208644816e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522430 + }, + { + "epoch": 2.533739586528657, + "grad_norm": 4.6110368856489004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522440 + }, + { + "epoch": 2.5337880847214933, + "grad_norm": 4.6102158535177296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522450 + }, + { + "epoch": 2.5338365829143292, + "grad_norm": 4.638174289084418e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522460 + }, + { + "epoch": 2.533885081107165, + "grad_norm": 4.481988824522887e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522470 + }, + { + "epoch": 2.533933579300001, + "grad_norm": 4.577207590727994e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522480 + }, + { + "epoch": 2.5339820774928374, + "grad_norm": 4.6161925837395756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522490 + }, + { + "epoch": 2.5340305756856734, + "grad_norm": 4.57783677632051e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522500 + }, + { + "epoch": 2.5340790738785097, + "grad_norm": 4.502353689872507e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522510 + }, + { + "epoch": 2.5341275720713456, + "grad_norm": 4.487402094355275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522520 + }, + { + "epoch": 2.5341760702641816, + "grad_norm": 4.390911101381789e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522530 + }, + { + "epoch": 2.534224568457018, + "grad_norm": 4.31351523388912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522540 + }, + { + "epoch": 2.534273066649854, + "grad_norm": 4.400415321015316e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522550 + }, + { + "epoch": 2.53432156484269, + "grad_norm": 4.526623698097865e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522560 + }, + { + "epoch": 2.534370063035526, + "grad_norm": 4.53413733225716e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522570 + }, + { + "epoch": 2.534418561228362, + "grad_norm": 4.378027895768355e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522580 + }, + { + "epoch": 2.5344670594211984, + "grad_norm": 4.470019732139008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522590 + }, + { + "epoch": 2.5345155576140344, + "grad_norm": 4.50522392725361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522600 + }, + { + "epoch": 2.5345640558068707, + "grad_norm": 4.393912789169008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522610 + }, + { + "epoch": 2.5346125539997066, + "grad_norm": 4.370712858303705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522620 + }, + { + "epoch": 2.5346610521925426, + "grad_norm": 4.2559339163972254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522630 + }, + { + "epoch": 2.534709550385379, + "grad_norm": 4.4683591937655365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522640 + }, + { + "epoch": 2.534758048578215, + "grad_norm": 4.623040084084096e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522650 + }, + { + "epoch": 2.534806546771051, + "grad_norm": 4.436638434412998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522660 + }, + { + "epoch": 2.534855044963887, + "grad_norm": 4.316235546752978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522670 + }, + { + "epoch": 2.534903543156723, + "grad_norm": 4.2284920453994346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522680 + }, + { + "epoch": 2.534952041349559, + "grad_norm": 4.3298456375850947e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522690 + }, + { + "epoch": 2.5350005395423953, + "grad_norm": 4.421540111820832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522700 + }, + { + "epoch": 2.5350490377352313, + "grad_norm": 4.3782424796745545e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522710 + }, + { + "epoch": 2.5350975359280676, + "grad_norm": 4.366530248489653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522720 + }, + { + "epoch": 2.5351460341209036, + "grad_norm": 4.54149926554237e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522730 + }, + { + "epoch": 2.5351945323137395, + "grad_norm": 4.277379517247937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522740 + }, + { + "epoch": 2.535243030506576, + "grad_norm": 4.369337247567273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522750 + }, + { + "epoch": 2.5352915286994118, + "grad_norm": 4.759925431585543e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522760 + }, + { + "epoch": 2.535340026892248, + "grad_norm": 4.315422685863268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522770 + }, + { + "epoch": 2.535388525085084, + "grad_norm": 4.435763045762542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522780 + }, + { + "epoch": 2.53543702327792, + "grad_norm": 4.2424776580674006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522790 + }, + { + "epoch": 2.5354855214707563, + "grad_norm": 4.4663483578233354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522800 + }, + { + "epoch": 2.5355340196635923, + "grad_norm": 4.2735649685710086e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522810 + }, + { + "epoch": 2.5355825178564286, + "grad_norm": 4.521457697137521e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522820 + }, + { + "epoch": 2.5356310160492646, + "grad_norm": 4.273443465763194e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522830 + }, + { + "epoch": 2.5356795142421005, + "grad_norm": 4.2855752724335616e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522840 + }, + { + "epoch": 2.5357280124349364, + "grad_norm": 4.2702481550804805e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522850 + }, + { + "epoch": 2.5357765106277728, + "grad_norm": 4.3054999565583785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522860 + }, + { + "epoch": 2.5358250088206087, + "grad_norm": 4.19598968903756e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522870 + }, + { + "epoch": 2.535873507013445, + "grad_norm": 4.044976265049627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522880 + }, + { + "epoch": 2.535922005206281, + "grad_norm": 4.532621034059048e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522890 + }, + { + "epoch": 2.535970503399117, + "grad_norm": 4.338021497574118e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522900 + }, + { + "epoch": 2.5360190015919533, + "grad_norm": 4.122657770722071e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522910 + }, + { + "epoch": 2.536067499784789, + "grad_norm": 4.3109853464784464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522920 + }, + { + "epoch": 2.5361159979776255, + "grad_norm": 4.08438332044625e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522930 + }, + { + "epoch": 2.5361644961704615, + "grad_norm": 4.130628994403196e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522940 + }, + { + "epoch": 2.5362129943632974, + "grad_norm": 4.2428485613754674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522950 + }, + { + "epoch": 2.5362614925561338, + "grad_norm": 4.152527921519322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522960 + }, + { + "epoch": 2.5363099907489697, + "grad_norm": 4.194791003442333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522970 + }, + { + "epoch": 2.536358488941806, + "grad_norm": 4.097348949017032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522980 + }, + { + "epoch": 2.536406987134642, + "grad_norm": 4.1204621936685726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 522990 + }, + { + "epoch": 2.536455485327478, + "grad_norm": 4.047351254143905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523000 + }, + { + "epoch": 2.536503983520314, + "grad_norm": 4.2301511626874344e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523010 + }, + { + "epoch": 2.53655248171315, + "grad_norm": 4.058593461309101e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523020 + }, + { + "epoch": 2.536600979905986, + "grad_norm": 3.970293960264826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523030 + }, + { + "epoch": 2.5366494780988225, + "grad_norm": 4.1391221117237365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523040 + }, + { + "epoch": 2.5366979762916584, + "grad_norm": 4.036730771872499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523050 + }, + { + "epoch": 2.5367464744844943, + "grad_norm": 4.09113027899366e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523060 + }, + { + "epoch": 2.5367949726773307, + "grad_norm": 4.007307907727409e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523070 + }, + { + "epoch": 2.5368434708701666, + "grad_norm": 4.083603499793753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523080 + }, + { + "epoch": 2.536891969063003, + "grad_norm": 4.1140619799762135e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523090 + }, + { + "epoch": 2.536940467255839, + "grad_norm": 4.169845979618003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523100 + }, + { + "epoch": 2.536988965448675, + "grad_norm": 4.0407332591030354e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523110 + }, + { + "epoch": 2.537037463641511, + "grad_norm": 4.0825980818226526e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523120 + }, + { + "epoch": 2.537085961834347, + "grad_norm": 4.041996604087217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523130 + }, + { + "epoch": 2.5371344600271835, + "grad_norm": 4.0790528288425776e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523140 + }, + { + "epoch": 2.5371829582200194, + "grad_norm": 4.1006121165310105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523150 + }, + { + "epoch": 2.5372314564128553, + "grad_norm": 4.0973368697905244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523160 + }, + { + "epoch": 2.5372799546056917, + "grad_norm": 4.083328875026382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523170 + }, + { + "epoch": 2.5373284527985276, + "grad_norm": 4.3413884043275175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523180 + }, + { + "epoch": 2.537376950991364, + "grad_norm": 3.9504417514990564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523190 + }, + { + "epoch": 2.5374254491842, + "grad_norm": 3.8613528374753514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523200 + }, + { + "epoch": 2.537473947377036, + "grad_norm": 3.915526036735173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523210 + }, + { + "epoch": 2.5375224455698717, + "grad_norm": 3.9234297588564004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523220 + }, + { + "epoch": 2.537570943762708, + "grad_norm": 4.082934879079403e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523230 + }, + { + "epoch": 2.537619441955544, + "grad_norm": 4.059072011841636e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523240 + }, + { + "epoch": 2.5376679401483804, + "grad_norm": 3.921198654666114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523250 + }, + { + "epoch": 2.5377164383412163, + "grad_norm": 4.077164916793663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523260 + }, + { + "epoch": 2.537764936534052, + "grad_norm": 4.039893397589367e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523270 + }, + { + "epoch": 2.5378134347268886, + "grad_norm": 4.080496296410274e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523280 + }, + { + "epoch": 2.5378619329197245, + "grad_norm": 4.0455550021079034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523290 + }, + { + "epoch": 2.537910431112561, + "grad_norm": 3.8176917627197327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523300 + }, + { + "epoch": 2.537958929305397, + "grad_norm": 3.9439669308194425e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523310 + }, + { + "epoch": 2.5380074274982327, + "grad_norm": 3.927207004039701e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523320 + }, + { + "epoch": 2.538055925691069, + "grad_norm": 4.561370658962005e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523330 + }, + { + "epoch": 2.538104423883905, + "grad_norm": 3.9769112447629595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523340 + }, + { + "epoch": 2.5381529220767414, + "grad_norm": 3.789506308748969e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523350 + }, + { + "epoch": 2.5382014202695773, + "grad_norm": 3.888684929620467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523360 + }, + { + "epoch": 2.538249918462413, + "grad_norm": 3.872170140084563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523370 + }, + { + "epoch": 2.538298416655249, + "grad_norm": 3.931090475361998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523380 + }, + { + "epoch": 2.5383469148480855, + "grad_norm": 3.73868438430236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523390 + }, + { + "epoch": 2.5383954130409214, + "grad_norm": 3.8596642326638175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523400 + }, + { + "epoch": 2.538443911233758, + "grad_norm": 3.8882753017333016e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523410 + }, + { + "epoch": 2.5384924094265937, + "grad_norm": 3.751321386857853e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523420 + }, + { + "epoch": 2.5385409076194296, + "grad_norm": 3.990815500287681e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523430 + }, + { + "epoch": 2.538589405812266, + "grad_norm": 3.861573816266173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523440 + }, + { + "epoch": 2.538637904005102, + "grad_norm": 3.6237697287333503e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523450 + }, + { + "epoch": 2.5386864021979383, + "grad_norm": 3.646728785611231e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523460 + }, + { + "epoch": 2.538734900390774, + "grad_norm": 3.743207344086841e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523470 + }, + { + "epoch": 2.53878339858361, + "grad_norm": 4.089098837312122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523480 + }, + { + "epoch": 2.5388318967764465, + "grad_norm": 3.771092949023114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523490 + }, + { + "epoch": 2.5388803949692824, + "grad_norm": 4.052945357102544e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523500 + }, + { + "epoch": 2.538928893162119, + "grad_norm": 3.692052885639896e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523510 + }, + { + "epoch": 2.5389773913549547, + "grad_norm": 4.079960902458879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523520 + }, + { + "epoch": 2.5390258895477906, + "grad_norm": 3.952593985445674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523530 + }, + { + "epoch": 2.5390743877406265, + "grad_norm": 3.930235337179511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523540 + }, + { + "epoch": 2.539122885933463, + "grad_norm": 3.891340227824003e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523550 + }, + { + "epoch": 2.539171384126299, + "grad_norm": 3.62634224870817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523560 + }, + { + "epoch": 2.539219882319135, + "grad_norm": 3.665053327495116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523570 + }, + { + "epoch": 2.539268380511971, + "grad_norm": 4.0483392638179794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523580 + }, + { + "epoch": 2.539316878704807, + "grad_norm": 3.570673712260941e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523590 + }, + { + "epoch": 2.5393653768976434, + "grad_norm": 3.8970192406395654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523600 + }, + { + "epoch": 2.5394138750904793, + "grad_norm": 3.6870069664018956e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523610 + }, + { + "epoch": 2.5394623732833157, + "grad_norm": 3.623839006650087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523620 + }, + { + "epoch": 2.5395108714761516, + "grad_norm": 3.7866819013743225e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523630 + }, + { + "epoch": 2.5395593696689875, + "grad_norm": 3.68669788031184e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523640 + }, + { + "epoch": 2.539607867861824, + "grad_norm": 4.0604739126592904e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523650 + }, + { + "epoch": 2.53965636605466, + "grad_norm": 3.5393693309515584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523660 + }, + { + "epoch": 2.539704864247496, + "grad_norm": 3.584926133726185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523670 + }, + { + "epoch": 2.539753362440332, + "grad_norm": 3.777697799023372e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523680 + }, + { + "epoch": 2.539801860633168, + "grad_norm": 3.7082497073015475e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523690 + }, + { + "epoch": 2.5398503588260044, + "grad_norm": 3.551516769562113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523700 + }, + { + "epoch": 2.5398988570188403, + "grad_norm": 3.6636418343505284e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523710 + }, + { + "epoch": 2.5399473552116767, + "grad_norm": 3.8177486061385935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523720 + }, + { + "epoch": 2.5399958534045126, + "grad_norm": 3.717729057939323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523730 + }, + { + "epoch": 2.5400443515973485, + "grad_norm": 3.468987586074945e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523740 + }, + { + "epoch": 2.5400928497901845, + "grad_norm": 3.7026335775180996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523750 + }, + { + "epoch": 2.540141347983021, + "grad_norm": 3.734280085154751e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523760 + }, + { + "epoch": 2.5401898461758567, + "grad_norm": 3.44227544246678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523770 + }, + { + "epoch": 2.540238344368693, + "grad_norm": 3.79662346006171e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523780 + }, + { + "epoch": 2.540286842561529, + "grad_norm": 3.610030674394693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523790 + }, + { + "epoch": 2.540335340754365, + "grad_norm": 3.517422797472136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523800 + }, + { + "epoch": 2.5403838389472013, + "grad_norm": 3.572883144897787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523810 + }, + { + "epoch": 2.5404323371400372, + "grad_norm": 3.7300520006056104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523820 + }, + { + "epoch": 2.5404808353328736, + "grad_norm": 3.776906964958471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523830 + }, + { + "epoch": 2.5405293335257095, + "grad_norm": 3.481612154132563e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523840 + }, + { + "epoch": 2.5405778317185455, + "grad_norm": 3.787642199881702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523850 + }, + { + "epoch": 2.540626329911382, + "grad_norm": 3.858640340581587e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523860 + }, + { + "epoch": 2.5406748281042177, + "grad_norm": 3.6172060902117664e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523870 + }, + { + "epoch": 2.540723326297054, + "grad_norm": 3.7776924699528536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523880 + }, + { + "epoch": 2.54077182448989, + "grad_norm": 3.4767140277836006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523890 + }, + { + "epoch": 2.540820322682726, + "grad_norm": 3.640236556634591e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523900 + }, + { + "epoch": 2.540868820875562, + "grad_norm": 3.370326595586448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523910 + }, + { + "epoch": 2.5409173190683982, + "grad_norm": 3.542014681556793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523920 + }, + { + "epoch": 2.540965817261234, + "grad_norm": 3.5850849400276275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523930 + }, + { + "epoch": 2.5410143154540705, + "grad_norm": 3.491350497597523e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523940 + }, + { + "epoch": 2.5410628136469064, + "grad_norm": 3.472024445727584e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523950 + }, + { + "epoch": 2.5411113118397424, + "grad_norm": 3.471226861506693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523960 + }, + { + "epoch": 2.5411598100325787, + "grad_norm": 3.748405319470294e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523970 + }, + { + "epoch": 2.5412083082254147, + "grad_norm": 3.443991403173641e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523980 + }, + { + "epoch": 2.541256806418251, + "grad_norm": 3.587156527373736e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 523990 + }, + { + "epoch": 2.541305304611087, + "grad_norm": 3.319664898526753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524000 + }, + { + "epoch": 2.541353802803923, + "grad_norm": 3.3385244790906654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524010 + }, + { + "epoch": 2.5414023009967592, + "grad_norm": 3.5523328278941335e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524020 + }, + { + "epoch": 2.541450799189595, + "grad_norm": 3.711325646804653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524030 + }, + { + "epoch": 2.5414992973824315, + "grad_norm": 3.572981910338058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524040 + }, + { + "epoch": 2.5415477955752674, + "grad_norm": 3.417736849087305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524050 + }, + { + "epoch": 2.5415962937681034, + "grad_norm": 3.32264171731822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524060 + }, + { + "epoch": 2.5416447919609393, + "grad_norm": 3.317722274687185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524070 + }, + { + "epoch": 2.5416932901537757, + "grad_norm": 3.733243758574645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524080 + }, + { + "epoch": 2.5417417883466116, + "grad_norm": 3.510885804303143e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524090 + }, + { + "epoch": 2.541790286539448, + "grad_norm": 3.2315579545638684e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524100 + }, + { + "epoch": 2.541838784732284, + "grad_norm": 3.3959306477981954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524110 + }, + { + "epoch": 2.54188728292512, + "grad_norm": 3.370982071260187e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524120 + }, + { + "epoch": 2.541935781117956, + "grad_norm": 3.677738291685273e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524130 + }, + { + "epoch": 2.541984279310792, + "grad_norm": 3.5706012369018936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524140 + }, + { + "epoch": 2.5420327775036284, + "grad_norm": 3.3625152440208694e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524150 + }, + { + "epoch": 2.5420812756964644, + "grad_norm": 3.82711213831044e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524160 + }, + { + "epoch": 2.5421297738893003, + "grad_norm": 3.29538742960267e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524170 + }, + { + "epoch": 2.5421782720821366, + "grad_norm": 3.2364834368081574e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524180 + }, + { + "epoch": 2.5422267702749726, + "grad_norm": 3.19064348275333e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524190 + }, + { + "epoch": 2.542275268467809, + "grad_norm": 3.224670308554778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524200 + }, + { + "epoch": 2.542323766660645, + "grad_norm": 3.2452888376610645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524210 + }, + { + "epoch": 2.542372264853481, + "grad_norm": 3.380282009857183e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524220 + }, + { + "epoch": 2.542420763046317, + "grad_norm": 3.7679246389643595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524230 + }, + { + "epoch": 2.542469261239153, + "grad_norm": 3.121634151170838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524240 + }, + { + "epoch": 2.5425177594319894, + "grad_norm": 3.2525484527923254e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524250 + }, + { + "epoch": 2.5425662576248254, + "grad_norm": 3.067104259457665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524260 + }, + { + "epoch": 2.5426147558176613, + "grad_norm": 3.378213264682017e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524270 + }, + { + "epoch": 2.542663254010497, + "grad_norm": 3.629175537867013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524280 + }, + { + "epoch": 2.5427117522033336, + "grad_norm": 3.097997947065778e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524290 + }, + { + "epoch": 2.5427602503961695, + "grad_norm": 3.120185709803991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524300 + }, + { + "epoch": 2.542808748589006, + "grad_norm": 3.154975658503645e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524310 + }, + { + "epoch": 2.5428572467818418, + "grad_norm": 3.041317953034195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524320 + }, + { + "epoch": 2.5429057449746777, + "grad_norm": 3.476142040881314e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524330 + }, + { + "epoch": 2.542954243167514, + "grad_norm": 3.407506454777831e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524340 + }, + { + "epoch": 2.54300274136035, + "grad_norm": 3.121698100017056e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524350 + }, + { + "epoch": 2.5430512395531863, + "grad_norm": 3.35611005652936e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524360 + }, + { + "epoch": 2.5430997377460223, + "grad_norm": 3.126843495238063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524370 + }, + { + "epoch": 2.543148235938858, + "grad_norm": 3.7260683427575714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524380 + }, + { + "epoch": 2.5431967341316946, + "grad_norm": 3.0414881280194095e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524390 + }, + { + "epoch": 2.5432452323245305, + "grad_norm": 3.0694096153638384e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524400 + }, + { + "epoch": 2.543293730517367, + "grad_norm": 3.153180117010379e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524410 + }, + { + "epoch": 2.5433422287102028, + "grad_norm": 3.6094839117595257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524420 + }, + { + "epoch": 2.5433907269030387, + "grad_norm": 3.309807894424921e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524430 + }, + { + "epoch": 2.5434392250958746, + "grad_norm": 3.4708495633140046e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524440 + }, + { + "epoch": 2.543487723288711, + "grad_norm": 3.0952929108707394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524450 + }, + { + "epoch": 2.543536221481547, + "grad_norm": 3.040358365069551e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524460 + }, + { + "epoch": 2.5435847196743833, + "grad_norm": 2.98719058378083e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524470 + }, + { + "epoch": 2.543633217867219, + "grad_norm": 3.341860121963691e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524480 + }, + { + "epoch": 2.543681716060055, + "grad_norm": 3.644838741934109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524490 + }, + { + "epoch": 2.5437302142528915, + "grad_norm": 3.011408367115109e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524500 + }, + { + "epoch": 2.5437787124457274, + "grad_norm": 9.120457207245636e-07, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524510 + }, + { + "epoch": 2.5438272106385638, + "grad_norm": 3.009708393619803e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524520 + }, + { + "epoch": 2.5438757088313997, + "grad_norm": 3.515876301207754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524530 + }, + { + "epoch": 2.5439242070242356, + "grad_norm": 3.3982647806851674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524540 + }, + { + "epoch": 2.543972705217072, + "grad_norm": 3.2596119581285166e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524550 + }, + { + "epoch": 2.544021203409908, + "grad_norm": 2.8694879361523817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524560 + }, + { + "epoch": 2.5440697016027443, + "grad_norm": 3.0548250151696266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524570 + }, + { + "epoch": 2.54411819979558, + "grad_norm": 3.193981967797299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524580 + }, + { + "epoch": 2.544166697988416, + "grad_norm": 2.9978604487723715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524590 + }, + { + "epoch": 2.544215196181252, + "grad_norm": 3.425920169775054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524600 + }, + { + "epoch": 2.5442636943740884, + "grad_norm": 3.142331550520794e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524610 + }, + { + "epoch": 2.5443121925669243, + "grad_norm": 3.3160997503500766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524620 + }, + { + "epoch": 2.5443606907597607, + "grad_norm": 3.2249232617687085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524630 + }, + { + "epoch": 2.5444091889525966, + "grad_norm": 2.9573305582175635e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524640 + }, + { + "epoch": 2.5444576871454325, + "grad_norm": 2.9168282011937663e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524650 + }, + { + "epoch": 2.544506185338269, + "grad_norm": 2.8651967909354426e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524660 + }, + { + "epoch": 2.544554683531105, + "grad_norm": 2.8583730937725704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524670 + }, + { + "epoch": 2.544603181723941, + "grad_norm": 3.4062082931995974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524680 + }, + { + "epoch": 2.544651679916777, + "grad_norm": 2.988780778423461e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524690 + }, + { + "epoch": 2.544700178109613, + "grad_norm": 2.9536215251368958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524700 + }, + { + "epoch": 2.5447486763024494, + "grad_norm": 3.094961797955875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524710 + }, + { + "epoch": 2.5447971744952853, + "grad_norm": 2.9656733957494907e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524720 + }, + { + "epoch": 2.5448456726881217, + "grad_norm": 3.326696784711203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524730 + }, + { + "epoch": 2.5448941708809576, + "grad_norm": 2.9049177285855876e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524740 + }, + { + "epoch": 2.5449426690737935, + "grad_norm": 3.095953715614996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524750 + }, + { + "epoch": 2.54499116726663, + "grad_norm": 3.388986158370244e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524760 + }, + { + "epoch": 2.545039665459466, + "grad_norm": 2.8595790624308393e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524770 + }, + { + "epoch": 2.545088163652302, + "grad_norm": 3.337993348395685e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524780 + }, + { + "epoch": 2.545136661845138, + "grad_norm": 2.974113400000533e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524790 + }, + { + "epoch": 2.545185160037974, + "grad_norm": 2.99479552268167e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524800 + }, + { + "epoch": 2.54523365823081, + "grad_norm": 2.8366262228018968e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524810 + }, + { + "epoch": 2.5452821564236463, + "grad_norm": 2.8696167220232383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524820 + }, + { + "epoch": 2.5453306546164822, + "grad_norm": 3.377163082518564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524830 + }, + { + "epoch": 2.5453791528093186, + "grad_norm": 3.0399629480371004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524840 + }, + { + "epoch": 2.5454276510021545, + "grad_norm": 3.1589983962021506e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524850 + }, + { + "epoch": 2.5454761491949904, + "grad_norm": 2.7121382473183075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524860 + }, + { + "epoch": 2.545524647387827, + "grad_norm": 3.0415915119874626e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524870 + }, + { + "epoch": 2.5455731455806627, + "grad_norm": 3.572602480517162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524880 + }, + { + "epoch": 2.545621643773499, + "grad_norm": 2.9258012901323127e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524890 + }, + { + "epoch": 2.545670141966335, + "grad_norm": 3.142488935736765e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524900 + }, + { + "epoch": 2.545718640159171, + "grad_norm": 2.810253185714373e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524910 + }, + { + "epoch": 2.5457671383520073, + "grad_norm": 2.7846372319118018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524920 + }, + { + "epoch": 2.545815636544843, + "grad_norm": 3.3324820236657615e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524930 + }, + { + "epoch": 2.5458641347376796, + "grad_norm": 2.8155131559515212e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524940 + }, + { + "epoch": 2.5459126329305155, + "grad_norm": 2.79200538244595e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524950 + }, + { + "epoch": 2.5459611311233514, + "grad_norm": 2.9324926487106495e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524960 + }, + { + "epoch": 2.5460096293161874, + "grad_norm": 2.8340329194520564e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524970 + }, + { + "epoch": 2.5460581275090237, + "grad_norm": 3.206797671850836e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524980 + }, + { + "epoch": 2.5461066257018596, + "grad_norm": 2.7771323019010197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 524990 + }, + { + "epoch": 2.546155123894696, + "grad_norm": 2.700037704528313e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525000 + }, + { + "epoch": 2.546203622087532, + "grad_norm": 2.842308965966822e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525010 + }, + { + "epoch": 2.546252120280368, + "grad_norm": 2.6428757848862006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525020 + }, + { + "epoch": 2.546300618473204, + "grad_norm": 3.442522356067457e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525030 + }, + { + "epoch": 2.54634911666604, + "grad_norm": 2.868809012568363e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525040 + }, + { + "epoch": 2.5463976148588765, + "grad_norm": 2.8110079597354343e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525050 + }, + { + "epoch": 2.5464461130517124, + "grad_norm": 3.0137176310063296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525060 + }, + { + "epoch": 2.5464946112445483, + "grad_norm": 3.113237667662361e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525070 + }, + { + "epoch": 2.5465431094373847, + "grad_norm": 3.4682283711617856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525080 + }, + { + "epoch": 2.5465916076302206, + "grad_norm": 2.622636507965126e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525090 + }, + { + "epoch": 2.546640105823057, + "grad_norm": 2.878832106034679e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525100 + }, + { + "epoch": 2.546688604015893, + "grad_norm": 2.917275843117295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525110 + }, + { + "epoch": 2.546737102208729, + "grad_norm": 2.7930321166991234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525120 + }, + { + "epoch": 2.5467856004015648, + "grad_norm": 3.473088483474385e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525130 + }, + { + "epoch": 2.546834098594401, + "grad_norm": 2.981792945888628e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525140 + }, + { + "epoch": 2.546882596787237, + "grad_norm": 3.235527046285824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525150 + }, + { + "epoch": 2.5469310949800734, + "grad_norm": 2.7208484354446227e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525160 + }, + { + "epoch": 2.5469795931729093, + "grad_norm": 2.7184649198375155e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525170 + }, + { + "epoch": 2.5470280913657453, + "grad_norm": 3.530318792854814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525180 + }, + { + "epoch": 2.5470765895585816, + "grad_norm": 2.8646935490428405e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525190 + }, + { + "epoch": 2.5471250877514175, + "grad_norm": 2.796955200778939e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525200 + }, + { + "epoch": 2.547173585944254, + "grad_norm": 2.5418387394893216e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525210 + }, + { + "epoch": 2.54722208413709, + "grad_norm": 2.6955335741263298e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525220 + }, + { + "epoch": 2.5472705823299258, + "grad_norm": 3.293623862532513e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525230 + }, + { + "epoch": 2.547319080522762, + "grad_norm": 2.845445479238151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525240 + }, + { + "epoch": 2.547367578715598, + "grad_norm": 2.6756168836072902e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525250 + }, + { + "epoch": 2.5474160769084344, + "grad_norm": 2.5792591173967594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525260 + }, + { + "epoch": 2.5474645751012703, + "grad_norm": 2.5077728338374072e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525270 + }, + { + "epoch": 2.5475130732941063, + "grad_norm": 3.0647711923847964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525280 + }, + { + "epoch": 2.5475615714869426, + "grad_norm": 2.7986738260210586e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525290 + }, + { + "epoch": 2.5476100696797785, + "grad_norm": 2.671293408695874e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525300 + }, + { + "epoch": 2.547658567872615, + "grad_norm": 2.9007896529265054e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525310 + }, + { + "epoch": 2.547707066065451, + "grad_norm": 2.5598467345844256e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525320 + }, + { + "epoch": 2.5477555642582868, + "grad_norm": 3.163760808888583e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525330 + }, + { + "epoch": 2.5478040624511227, + "grad_norm": 2.9748347785130136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525340 + }, + { + "epoch": 2.547852560643959, + "grad_norm": 3.066440257271097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525350 + }, + { + "epoch": 2.547901058836795, + "grad_norm": 2.79989027518468e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525360 + }, + { + "epoch": 2.5479495570296313, + "grad_norm": 2.705487567311593e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525370 + }, + { + "epoch": 2.5479980552224673, + "grad_norm": 3.421987315732622e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525380 + }, + { + "epoch": 2.548046553415303, + "grad_norm": 3.2886052991898396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525390 + }, + { + "epoch": 2.5480950516081395, + "grad_norm": 2.4197795767122443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525400 + }, + { + "epoch": 2.5481435498009755, + "grad_norm": 2.724422287769812e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525410 + }, + { + "epoch": 2.548192047993812, + "grad_norm": 2.5374024659186034e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525420 + }, + { + "epoch": 2.5482405461866477, + "grad_norm": 3.053873243175076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525430 + }, + { + "epoch": 2.5482890443794837, + "grad_norm": 3.0859801825045e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525440 + }, + { + "epoch": 2.54833754257232, + "grad_norm": 2.4733617820515974e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525450 + }, + { + "epoch": 2.548386040765156, + "grad_norm": 2.992666381373965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525460 + }, + { + "epoch": 2.5484345389579923, + "grad_norm": 2.8771486526579793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525470 + }, + { + "epoch": 2.5484830371508282, + "grad_norm": 3.3452234760034116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525480 + }, + { + "epoch": 2.548531535343664, + "grad_norm": 2.723868952614339e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525490 + }, + { + "epoch": 2.5485800335365, + "grad_norm": 2.5728642327749185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525500 + }, + { + "epoch": 2.5486285317293365, + "grad_norm": 2.4474902104998364e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525510 + }, + { + "epoch": 2.5486770299221724, + "grad_norm": 2.5306558626425613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525520 + }, + { + "epoch": 2.5487255281150087, + "grad_norm": 3.497383360695494e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525530 + }, + { + "epoch": 2.5487740263078447, + "grad_norm": 2.514917163409791e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525540 + }, + { + "epoch": 2.5488225245006806, + "grad_norm": 2.7147208925271116e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525550 + }, + { + "epoch": 2.548871022693517, + "grad_norm": 2.5922908264419675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525560 + }, + { + "epoch": 2.548919520886353, + "grad_norm": 2.3822764205760905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525570 + }, + { + "epoch": 2.5489680190791892, + "grad_norm": 3.1539396871949066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525580 + }, + { + "epoch": 2.549016517272025, + "grad_norm": 2.510418894985378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525590 + }, + { + "epoch": 2.549065015464861, + "grad_norm": 2.320242309394871e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525600 + }, + { + "epoch": 2.5491135136576974, + "grad_norm": 2.3397113579903817e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525610 + }, + { + "epoch": 2.5491620118505334, + "grad_norm": 2.362981632586525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525620 + }, + { + "epoch": 2.5492105100433697, + "grad_norm": 3.6368128064623306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525630 + }, + { + "epoch": 2.5492590082362057, + "grad_norm": 2.5349409682462465e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525640 + }, + { + "epoch": 2.5493075064290416, + "grad_norm": 2.5418506410801456e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525650 + }, + { + "epoch": 2.5493560046218775, + "grad_norm": 2.3642918733912666e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525660 + }, + { + "epoch": 2.549404502814714, + "grad_norm": 2.5452033369788296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525670 + }, + { + "epoch": 2.54945300100755, + "grad_norm": 2.8043588784498752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525680 + }, + { + "epoch": 2.549501499200386, + "grad_norm": 2.461168868705954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525690 + }, + { + "epoch": 2.549549997393222, + "grad_norm": 2.376638441603518e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525700 + }, + { + "epoch": 2.549598495586058, + "grad_norm": 2.2785698661209608e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525710 + }, + { + "epoch": 2.5496469937788944, + "grad_norm": 2.581072777729787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525720 + }, + { + "epoch": 2.5496954919717303, + "grad_norm": 3.0504267556352715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525730 + }, + { + "epoch": 2.5497439901645667, + "grad_norm": 2.456453529475766e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525740 + }, + { + "epoch": 2.5497924883574026, + "grad_norm": 3.077649779470448e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525750 + }, + { + "epoch": 2.5498409865502385, + "grad_norm": 2.803554011165943e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525760 + }, + { + "epoch": 2.549889484743075, + "grad_norm": 2.441837310129813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525770 + }, + { + "epoch": 2.549937982935911, + "grad_norm": 3.079569310671104e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525780 + }, + { + "epoch": 2.549986481128747, + "grad_norm": 2.2249757591907837e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525790 + }, + { + "epoch": 2.550034979321583, + "grad_norm": 2.9133717660556613e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525800 + }, + { + "epoch": 2.550083477514419, + "grad_norm": 2.549683841834849e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525810 + }, + { + "epoch": 2.5501319757072554, + "grad_norm": 2.193226755764499e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525820 + }, + { + "epoch": 2.5501804739000913, + "grad_norm": 2.868421233870322e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525830 + }, + { + "epoch": 2.5502289720929276, + "grad_norm": 2.3960778250398107e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525840 + }, + { + "epoch": 2.5502774702857636, + "grad_norm": 2.3991253428334858e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525850 + }, + { + "epoch": 2.5503259684785995, + "grad_norm": 2.2377248498628433e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525860 + }, + { + "epoch": 2.5503744666714354, + "grad_norm": 2.4859254210696236e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525870 + }, + { + "epoch": 2.550422964864272, + "grad_norm": 3.108904422788328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525880 + }, + { + "epoch": 2.5504714630571077, + "grad_norm": 2.5646164303338992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525890 + }, + { + "epoch": 2.550519961249944, + "grad_norm": 2.523228204154293e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525900 + }, + { + "epoch": 2.55056845944278, + "grad_norm": 2.4470224957440223e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525910 + }, + { + "epoch": 2.550616957635616, + "grad_norm": 2.301007739902161e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525920 + }, + { + "epoch": 2.5506654558284523, + "grad_norm": 2.872350890470443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525930 + }, + { + "epoch": 2.550713954021288, + "grad_norm": 2.3166448315237176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525940 + }, + { + "epoch": 2.5507624522141246, + "grad_norm": 2.4218753225113687e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525950 + }, + { + "epoch": 2.5508109504069605, + "grad_norm": 2.2810771937997742e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525960 + }, + { + "epoch": 2.5508594485997964, + "grad_norm": 2.8479419711402443e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525970 + }, + { + "epoch": 2.5509079467926328, + "grad_norm": 3.1177968651263654e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525980 + }, + { + "epoch": 2.5509564449854687, + "grad_norm": 2.2411363431729114e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 525990 + }, + { + "epoch": 2.551004943178305, + "grad_norm": 2.454803826879015e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526000 + }, + { + "epoch": 2.551053441371141, + "grad_norm": 2.616913974406998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526010 + }, + { + "epoch": 2.551101939563977, + "grad_norm": 2.2792256970660674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526020 + }, + { + "epoch": 2.551150437756813, + "grad_norm": 2.9500085929612396e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526030 + }, + { + "epoch": 2.551198935949649, + "grad_norm": 2.300016177514408e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526040 + }, + { + "epoch": 2.551247434142485, + "grad_norm": 2.280932953624415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526050 + }, + { + "epoch": 2.5512959323353215, + "grad_norm": 2.459415604505466e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526060 + }, + { + "epoch": 2.5513444305281574, + "grad_norm": 2.0983465631729814e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526070 + }, + { + "epoch": 2.5513929287209933, + "grad_norm": 3.2084077616900686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526080 + }, + { + "epoch": 2.5514414269138297, + "grad_norm": 2.3409626237480552e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526090 + }, + { + "epoch": 2.5514899251066656, + "grad_norm": 2.4515351526588347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526100 + }, + { + "epoch": 2.551538423299502, + "grad_norm": 2.528784293076569e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526110 + }, + { + "epoch": 2.551586921492338, + "grad_norm": 8.004126783589527e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526120 + }, + { + "epoch": 2.551635419685174, + "grad_norm": 3.123425784679057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526130 + }, + { + "epoch": 2.55168391787801, + "grad_norm": 2.1406798111911485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526140 + }, + { + "epoch": 2.551732416070846, + "grad_norm": 2.2030313573395688e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526150 + }, + { + "epoch": 2.5517809142636825, + "grad_norm": 2.207384497410203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526160 + }, + { + "epoch": 2.5518294124565184, + "grad_norm": 2.700543966227542e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526170 + }, + { + "epoch": 2.5518779106493543, + "grad_norm": 3.1425667401663304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526180 + }, + { + "epoch": 2.5519264088421902, + "grad_norm": 2.3637857893277214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526190 + }, + { + "epoch": 2.5519749070350266, + "grad_norm": 2.0431082603522555e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526200 + }, + { + "epoch": 2.552023405227863, + "grad_norm": 2.135039345319001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526210 + }, + { + "epoch": 2.552071903420699, + "grad_norm": 2.0839156178453777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526220 + }, + { + "epoch": 2.552120401613535, + "grad_norm": 3.135891191163864e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526230 + }, + { + "epoch": 2.5521688998063707, + "grad_norm": 2.240390983843099e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526240 + }, + { + "epoch": 2.552217397999207, + "grad_norm": 2.2966803570056982e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526250 + }, + { + "epoch": 2.552265896192043, + "grad_norm": 2.1622422963218924e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526260 + }, + { + "epoch": 2.5523143943848794, + "grad_norm": 2.3957760220127966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526270 + }, + { + "epoch": 2.5523628925777153, + "grad_norm": 3.096107903388656e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526280 + }, + { + "epoch": 2.5524113907705512, + "grad_norm": 2.1684650519659954e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526290 + }, + { + "epoch": 2.5524598889633876, + "grad_norm": 2.0307638237682113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526300 + }, + { + "epoch": 2.5525083871562235, + "grad_norm": 2.38520421191879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526310 + }, + { + "epoch": 2.55255688534906, + "grad_norm": 2.467083426438421e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526320 + }, + { + "epoch": 2.552605383541896, + "grad_norm": 3.111329860416845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526330 + }, + { + "epoch": 2.5526538817347317, + "grad_norm": 2.358734008112151e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526340 + }, + { + "epoch": 2.552702379927568, + "grad_norm": 2.4978023205335376e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526350 + }, + { + "epoch": 2.552750878120404, + "grad_norm": 2.047673497429514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526360 + }, + { + "epoch": 2.5527993763132404, + "grad_norm": 2.11064836719288e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526370 + }, + { + "epoch": 2.5528478745060763, + "grad_norm": 2.7868807705999643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526380 + }, + { + "epoch": 2.5528963726989122, + "grad_norm": 2.0342758588753895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526390 + }, + { + "epoch": 2.552944870891748, + "grad_norm": 1.9411242391242922e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526400 + }, + { + "epoch": 2.5529933690845845, + "grad_norm": 2.1972768493583317e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526410 + }, + { + "epoch": 2.5530418672774204, + "grad_norm": 1.92077731497875e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526420 + }, + { + "epoch": 2.553090365470257, + "grad_norm": 2.8111701411148715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526430 + }, + { + "epoch": 2.5531388636630927, + "grad_norm": 2.0888244023353764e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526440 + }, + { + "epoch": 2.5531873618559286, + "grad_norm": 2.4313333568670714e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526450 + }, + { + "epoch": 2.553235860048765, + "grad_norm": 2.2139282407351857e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526460 + }, + { + "epoch": 2.553284358241601, + "grad_norm": 1.9659555761109004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526470 + }, + { + "epoch": 2.5533328564344373, + "grad_norm": 2.9029893156007347e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526480 + }, + { + "epoch": 2.5533813546272732, + "grad_norm": 2.2357779627668606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526490 + }, + { + "epoch": 2.553429852820109, + "grad_norm": 2.2501703611510493e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526500 + }, + { + "epoch": 2.5534783510129455, + "grad_norm": 2.1477855938201174e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526510 + }, + { + "epoch": 2.5535268492057814, + "grad_norm": 2.1793171711692594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526520 + }, + { + "epoch": 2.553575347398618, + "grad_norm": 2.9355588182511383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526530 + }, + { + "epoch": 2.5536238455914537, + "grad_norm": 2.5501050160414707e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526540 + }, + { + "epoch": 2.5536723437842896, + "grad_norm": 2.0232942432585332e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526550 + }, + { + "epoch": 2.5537208419771256, + "grad_norm": 2.0536523592795675e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526560 + }, + { + "epoch": 2.553769340169962, + "grad_norm": 2.241783292333821e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526570 + }, + { + "epoch": 2.553817838362798, + "grad_norm": 2.801098375471156e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526580 + }, + { + "epoch": 2.553866336555634, + "grad_norm": 2.101537255327912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526590 + }, + { + "epoch": 2.55391483474847, + "grad_norm": 2.6072974890212208e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526600 + }, + { + "epoch": 2.553963332941306, + "grad_norm": 1.947579164607305e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526610 + }, + { + "epoch": 2.5540118311341424, + "grad_norm": 2.2391985154968097e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526620 + }, + { + "epoch": 2.5540603293269784, + "grad_norm": 2.9271964407939777e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526630 + }, + { + "epoch": 2.5541088275198147, + "grad_norm": 2.36914541318356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526640 + }, + { + "epoch": 2.5541573257126506, + "grad_norm": 2.4345704119355105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526650 + }, + { + "epoch": 2.5542058239054866, + "grad_norm": 2.171830359998239e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526660 + }, + { + "epoch": 2.554254322098323, + "grad_norm": 2.0201365913408154e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526670 + }, + { + "epoch": 2.554302820291159, + "grad_norm": 3.008311821872667e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526680 + }, + { + "epoch": 2.554351318483995, + "grad_norm": 2.1633637103946057e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526690 + }, + { + "epoch": 2.554399816676831, + "grad_norm": 2.541870003369695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526700 + }, + { + "epoch": 2.554448314869667, + "grad_norm": 2.4223419714530792e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526710 + }, + { + "epoch": 2.5544968130625034, + "grad_norm": 2.1114290760237964e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526720 + }, + { + "epoch": 2.5545453112553393, + "grad_norm": 2.8955522424212177e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526730 + }, + { + "epoch": 2.5545938094481757, + "grad_norm": 1.9969133902009162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526740 + }, + { + "epoch": 2.5546423076410116, + "grad_norm": 2.4847324198162823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526750 + }, + { + "epoch": 2.5546908058338476, + "grad_norm": 1.9255445238286484e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526760 + }, + { + "epoch": 2.5547393040266835, + "grad_norm": 1.8526321809986257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526770 + }, + { + "epoch": 2.55478780221952, + "grad_norm": 3.033506956739984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526780 + }, + { + "epoch": 2.5548363004123558, + "grad_norm": 2.4890560723633826e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526790 + }, + { + "epoch": 2.554884798605192, + "grad_norm": 2.689473355133032e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526800 + }, + { + "epoch": 2.554933296798028, + "grad_norm": 2.1940854466606652e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526810 + }, + { + "epoch": 2.554981794990864, + "grad_norm": 1.938882654428653e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526820 + }, + { + "epoch": 2.5550302931837003, + "grad_norm": 2.5202643527677537e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526830 + }, + { + "epoch": 2.5550787913765363, + "grad_norm": 2.5011217985593248e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526840 + }, + { + "epoch": 2.5551272895693726, + "grad_norm": 1.9835818321212173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526850 + }, + { + "epoch": 2.5551757877622086, + "grad_norm": 1.9744978985158923e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526860 + }, + { + "epoch": 2.5552242859550445, + "grad_norm": 1.9463783473838703e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526870 + }, + { + "epoch": 2.555272784147881, + "grad_norm": 2.8513600369706182e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526880 + }, + { + "epoch": 2.5553212823407168, + "grad_norm": 2.2125037801856706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526890 + }, + { + "epoch": 2.555369780533553, + "grad_norm": 1.953881856309181e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526900 + }, + { + "epoch": 2.555418278726389, + "grad_norm": 1.888581557807356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526910 + }, + { + "epoch": 2.555466776919225, + "grad_norm": 2.0412009860137914e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526920 + }, + { + "epoch": 2.555515275112061, + "grad_norm": 2.4535818710091917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526930 + }, + { + "epoch": 2.5555637733048973, + "grad_norm": 1.788801107238669e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526940 + }, + { + "epoch": 2.555612271497733, + "grad_norm": 2.4359778194593673e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526950 + }, + { + "epoch": 2.5556607696905695, + "grad_norm": 2.1440614617063147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526960 + }, + { + "epoch": 2.5557092678834055, + "grad_norm": 1.810872518603901e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526970 + }, + { + "epoch": 2.5557577660762414, + "grad_norm": 2.5596776254133147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526980 + }, + { + "epoch": 2.5558062642690778, + "grad_norm": 1.937165272636321e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 526990 + }, + { + "epoch": 2.5558547624619137, + "grad_norm": 2.1795594662421536e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527000 + }, + { + "epoch": 2.55590326065475, + "grad_norm": 2.012676780793754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527010 + }, + { + "epoch": 2.555951758847586, + "grad_norm": 1.9857763433606124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527020 + }, + { + "epoch": 2.556000257040422, + "grad_norm": 3.0048710186747485e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527030 + }, + { + "epoch": 2.5560487552332583, + "grad_norm": 2.294767398325348e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527040 + }, + { + "epoch": 2.556097253426094, + "grad_norm": 2.0005053613658674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527050 + }, + { + "epoch": 2.5561457516189305, + "grad_norm": 2.1691253238032004e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527060 + }, + { + "epoch": 2.5561942498117665, + "grad_norm": 2.0490132257577898e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527070 + }, + { + "epoch": 2.5562427480046024, + "grad_norm": 2.7604599495134607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527080 + }, + { + "epoch": 2.5562912461974383, + "grad_norm": 2.9058623951527807e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527090 + }, + { + "epoch": 2.5563397443902747, + "grad_norm": 1.8818955282995375e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527100 + }, + { + "epoch": 2.5563882425831106, + "grad_norm": 1.8759669373480392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527110 + }, + { + "epoch": 2.556436740775947, + "grad_norm": 1.963818618833102e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527120 + }, + { + "epoch": 2.556485238968783, + "grad_norm": 2.8225880299714845e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527130 + }, + { + "epoch": 2.556533737161619, + "grad_norm": 1.8835569548514286e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527140 + }, + { + "epoch": 2.556582235354455, + "grad_norm": 2.117329778172916e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527150 + }, + { + "epoch": 2.556630733547291, + "grad_norm": 1.822100692550066e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527160 + }, + { + "epoch": 2.5566792317401275, + "grad_norm": 2.1853344023270438e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527170 + }, + { + "epoch": 2.5567277299329634, + "grad_norm": 2.9725249817147414e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527180 + }, + { + "epoch": 2.5567762281257993, + "grad_norm": 1.979922004125001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527190 + }, + { + "epoch": 2.5568247263186357, + "grad_norm": 1.801308258109202e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527200 + }, + { + "epoch": 2.5568732245114716, + "grad_norm": 1.8465611262286075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527210 + }, + { + "epoch": 2.556921722704308, + "grad_norm": 1.988617803760917e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527220 + }, + { + "epoch": 2.556970220897144, + "grad_norm": 2.97880209387813e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527230 + }, + { + "epoch": 2.55701871908998, + "grad_norm": 2.1101929092992577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527240 + }, + { + "epoch": 2.557067217282816, + "grad_norm": 2.5132269598771018e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527250 + }, + { + "epoch": 2.557115715475652, + "grad_norm": 2.083414329945299e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527260 + }, + { + "epoch": 2.5571642136684885, + "grad_norm": 1.7375779037820394e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527270 + }, + { + "epoch": 2.5572127118613244, + "grad_norm": 2.4443417956376834e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527280 + }, + { + "epoch": 2.5572612100541603, + "grad_norm": 1.8549997093941784e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527290 + }, + { + "epoch": 2.557309708246996, + "grad_norm": 1.890678547056268e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527300 + }, + { + "epoch": 2.5573582064398326, + "grad_norm": 2.7873326757799077e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527310 + }, + { + "epoch": 2.5574067046326685, + "grad_norm": 2.0314711690616605e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527320 + }, + { + "epoch": 2.557455202825505, + "grad_norm": 2.2272294231129308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527330 + }, + { + "epoch": 2.557503701018341, + "grad_norm": 2.1907640146423546e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527340 + }, + { + "epoch": 2.5575521992111767, + "grad_norm": 2.181186786742728e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527350 + }, + { + "epoch": 2.557600697404013, + "grad_norm": 1.854589548599961e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527360 + }, + { + "epoch": 2.557649195596849, + "grad_norm": 1.904747648495686e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527370 + }, + { + "epoch": 2.5576976937896854, + "grad_norm": 2.3963508510860265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527380 + }, + { + "epoch": 2.5577461919825213, + "grad_norm": 2.1809075434475744e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527390 + }, + { + "epoch": 2.557794690175357, + "grad_norm": 2.1189569210378068e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527400 + }, + { + "epoch": 2.5578431883681936, + "grad_norm": 1.9489080571588602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527410 + }, + { + "epoch": 2.5578916865610295, + "grad_norm": 1.888490608337179e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527420 + }, + { + "epoch": 2.557940184753866, + "grad_norm": 2.8005990415636006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527430 + }, + { + "epoch": 2.557988682946702, + "grad_norm": 2.0385824583968315e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527440 + }, + { + "epoch": 2.5580371811395377, + "grad_norm": 2.007138633075556e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527450 + }, + { + "epoch": 2.5580856793323736, + "grad_norm": 1.7251792883143935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527460 + }, + { + "epoch": 2.55813417752521, + "grad_norm": 1.7756764947307602e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527470 + }, + { + "epoch": 2.558182675718046, + "grad_norm": 2.7598270335715824e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527480 + }, + { + "epoch": 2.5582311739108823, + "grad_norm": 2.164002843585422e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527490 + }, + { + "epoch": 2.558279672103718, + "grad_norm": 1.7116736472644334e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527500 + }, + { + "epoch": 2.558328170296554, + "grad_norm": 1.6247918566136832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527510 + }, + { + "epoch": 2.5583766684893905, + "grad_norm": 1.8637461352000173e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527520 + }, + { + "epoch": 2.5584251666822264, + "grad_norm": 2.6470024394598113e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527530 + }, + { + "epoch": 2.558473664875063, + "grad_norm": 2.283269573410962e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527540 + }, + { + "epoch": 2.5585221630678987, + "grad_norm": 2.122815168092984e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527550 + }, + { + "epoch": 2.5585706612607346, + "grad_norm": 1.7545991326528565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527560 + }, + { + "epoch": 2.558619159453571, + "grad_norm": 1.737081056774059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527570 + }, + { + "epoch": 2.558667657646407, + "grad_norm": 2.5288422023095336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527580 + }, + { + "epoch": 2.5587161558392433, + "grad_norm": 2.518812358687228e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527590 + }, + { + "epoch": 2.558764654032079, + "grad_norm": 1.9712571130980905e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527600 + }, + { + "epoch": 2.558813152224915, + "grad_norm": 1.637557822675717e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527610 + }, + { + "epoch": 2.558861650417751, + "grad_norm": 2.0630006147825952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527620 + }, + { + "epoch": 2.5589101486105874, + "grad_norm": 2.3668718540648115e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527630 + }, + { + "epoch": 2.5589586468034233, + "grad_norm": 2.3369988610966175e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527640 + }, + { + "epoch": 2.5590071449962597, + "grad_norm": 2.0393487787373488e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527650 + }, + { + "epoch": 2.5590556431890956, + "grad_norm": 2.4838847423325205e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527660 + }, + { + "epoch": 2.5591041413819315, + "grad_norm": 1.9128743034002582e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527670 + }, + { + "epoch": 2.559152639574768, + "grad_norm": 3.396351999640501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527680 + }, + { + "epoch": 2.559201137767604, + "grad_norm": 1.7966309329153773e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527690 + }, + { + "epoch": 2.55924963596044, + "grad_norm": 2.4477571969327983e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527700 + }, + { + "epoch": 2.559298134153276, + "grad_norm": 2.226079764966471e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527710 + }, + { + "epoch": 2.559346632346112, + "grad_norm": 1.540909977393312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527720 + }, + { + "epoch": 2.5593951305389484, + "grad_norm": 3.374055879135085e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527730 + }, + { + "epoch": 2.5594436287317843, + "grad_norm": 2.535869469966201e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527740 + }, + { + "epoch": 2.5594921269246207, + "grad_norm": 1.7937130891709785e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527750 + }, + { + "epoch": 2.5595406251174566, + "grad_norm": 1.8286623770791266e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527760 + }, + { + "epoch": 2.5595891233102925, + "grad_norm": 1.9012977858778868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527770 + }, + { + "epoch": 2.559637621503129, + "grad_norm": 2.94818853774359e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527780 + }, + { + "epoch": 2.559686119695965, + "grad_norm": 1.9099086756568795e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527790 + }, + { + "epoch": 2.559734617888801, + "grad_norm": 1.6088094412225473e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527800 + }, + { + "epoch": 2.559783116081637, + "grad_norm": 2.211936767082534e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527810 + }, + { + "epoch": 2.559831614274473, + "grad_norm": 1.5751149717857515e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527820 + }, + { + "epoch": 2.559880112467309, + "grad_norm": 2.3414125749354753e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527830 + }, + { + "epoch": 2.5599286106601453, + "grad_norm": 2.0252189258940234e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527840 + }, + { + "epoch": 2.5599771088529812, + "grad_norm": 2.1400252236958295e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527850 + }, + { + "epoch": 2.5600256070458176, + "grad_norm": 2.0333107642045434e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527860 + }, + { + "epoch": 2.5600741052386535, + "grad_norm": 1.9002829532155374e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527870 + }, + { + "epoch": 2.5601226034314895, + "grad_norm": 2.1612640566104346e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527880 + }, + { + "epoch": 2.560171101624326, + "grad_norm": 2.3037957319615998e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527890 + }, + { + "epoch": 2.5602195998171617, + "grad_norm": 1.645886982259981e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527900 + }, + { + "epoch": 2.560268098009998, + "grad_norm": 1.828421147820336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527910 + }, + { + "epoch": 2.560316596202834, + "grad_norm": 2.1322701826420598e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527920 + }, + { + "epoch": 2.56036509439567, + "grad_norm": 2.1824480000987023e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527930 + }, + { + "epoch": 2.5604135925885063, + "grad_norm": 2.0192841176935872e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527940 + }, + { + "epoch": 2.5604620907813422, + "grad_norm": 1.6690812287833978e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527950 + }, + { + "epoch": 2.5605105889741786, + "grad_norm": 1.7526700091252678e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527960 + }, + { + "epoch": 2.5605590871670145, + "grad_norm": 1.8513997446234498e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527970 + }, + { + "epoch": 2.5606075853598504, + "grad_norm": 2.794466880118307e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527980 + }, + { + "epoch": 2.5606560835526864, + "grad_norm": 2.0173777315335428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 527990 + }, + { + "epoch": 2.5607045817455227, + "grad_norm": 1.5376178552628517e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528000 + }, + { + "epoch": 2.5607530799383587, + "grad_norm": 2.008935773289977e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528010 + }, + { + "epoch": 2.560801578131195, + "grad_norm": 1.88104714027304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528020 + }, + { + "epoch": 2.560850076324031, + "grad_norm": 2.255120179484038e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528030 + }, + { + "epoch": 2.560898574516867, + "grad_norm": 1.808712823958558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528040 + }, + { + "epoch": 2.5609470727097032, + "grad_norm": 1.821040740423996e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528050 + }, + { + "epoch": 2.560995570902539, + "grad_norm": 1.9720362232078514e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528060 + }, + { + "epoch": 2.5610440690953755, + "grad_norm": 1.6868911600909087e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528070 + }, + { + "epoch": 2.5610925672882114, + "grad_norm": 2.6762913662992105e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528080 + }, + { + "epoch": 2.5611410654810474, + "grad_norm": 2.4528576503257682e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528090 + }, + { + "epoch": 2.5611895636738837, + "grad_norm": 2.1199852540121356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528100 + }, + { + "epoch": 2.5612380618667197, + "grad_norm": 1.671690519344793e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528110 + }, + { + "epoch": 2.561286560059556, + "grad_norm": 1.659887516325398e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528120 + }, + { + "epoch": 2.561335058252392, + "grad_norm": 3.0530692640695634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528130 + }, + { + "epoch": 2.561383556445228, + "grad_norm": 2.5725716668034693e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528140 + }, + { + "epoch": 2.561432054638064, + "grad_norm": 1.79304500136368e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528150 + }, + { + "epoch": 2.5614805528309, + "grad_norm": 2.2400584498427634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528160 + }, + { + "epoch": 2.561529051023736, + "grad_norm": 2.074688865150165e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528170 + }, + { + "epoch": 2.5615775492165724, + "grad_norm": 2.5175364015694868e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528180 + }, + { + "epoch": 2.5616260474094084, + "grad_norm": 1.9212560431469683e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528190 + }, + { + "epoch": 2.5616745456022443, + "grad_norm": 1.7925279038877306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528200 + }, + { + "epoch": 2.5617230437950806, + "grad_norm": 2.137959320691607e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528210 + }, + { + "epoch": 2.5617715419879166, + "grad_norm": 1.7337775659598265e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528220 + }, + { + "epoch": 2.561820040180753, + "grad_norm": 2.5009622817151467e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528230 + }, + { + "epoch": 2.561868538373589, + "grad_norm": 1.5281676368772423e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528240 + }, + { + "epoch": 2.5619170365664248, + "grad_norm": 1.870470889286935e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528250 + }, + { + "epoch": 2.561965534759261, + "grad_norm": 1.6073308017894306e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528260 + }, + { + "epoch": 2.562014032952097, + "grad_norm": 1.89874747036356e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528270 + }, + { + "epoch": 2.5620625311449334, + "grad_norm": 2.3579278973784312e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528280 + }, + { + "epoch": 2.5621110293377694, + "grad_norm": 1.9165280917832206e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528290 + }, + { + "epoch": 2.5621595275306053, + "grad_norm": 2.0119504284821232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528300 + }, + { + "epoch": 2.5622080257234416, + "grad_norm": 1.656332493382706e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528310 + }, + { + "epoch": 2.5622565239162776, + "grad_norm": 1.5227701766207247e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528320 + }, + { + "epoch": 2.562305022109114, + "grad_norm": 2.1948485695588715e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528330 + }, + { + "epoch": 2.56235352030195, + "grad_norm": 1.621865663992139e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528340 + }, + { + "epoch": 2.5624020184947858, + "grad_norm": 1.4766531997167931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528350 + }, + { + "epoch": 2.5624505166876217, + "grad_norm": 1.7587106881933323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528360 + }, + { + "epoch": 2.562499014880458, + "grad_norm": 1.4778368750967275e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528370 + }, + { + "epoch": 2.562547513073294, + "grad_norm": 2.198573234579726e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528380 + }, + { + "epoch": 2.5625960112661303, + "grad_norm": 2.108237140419078e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528390 + }, + { + "epoch": 2.5626445094589663, + "grad_norm": 1.567795315793319e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528400 + }, + { + "epoch": 2.562693007651802, + "grad_norm": 1.565319251994879e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528410 + }, + { + "epoch": 2.5627415058446386, + "grad_norm": 1.6483587828020063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528420 + }, + { + "epoch": 2.5627900040374745, + "grad_norm": 1.911068636673008e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528430 + }, + { + "epoch": 2.562838502230311, + "grad_norm": 2.250274455661838e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528440 + }, + { + "epoch": 2.5628870004231468, + "grad_norm": 2.4585753877204297e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528450 + }, + { + "epoch": 2.5629354986159827, + "grad_norm": 2.2045478331733648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528460 + }, + { + "epoch": 2.562983996808819, + "grad_norm": 1.703198471147971e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528470 + }, + { + "epoch": 2.563032495001655, + "grad_norm": 2.1010970741031088e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528480 + }, + { + "epoch": 2.5630809931944913, + "grad_norm": 2.8192671308602257e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528490 + }, + { + "epoch": 2.5631294913873273, + "grad_norm": 2.6660147867119122e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528500 + }, + { + "epoch": 2.563177989580163, + "grad_norm": 1.7935462892637588e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528510 + }, + { + "epoch": 2.563226487772999, + "grad_norm": 1.7787110451195076e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528520 + }, + { + "epoch": 2.5632749859658355, + "grad_norm": 2.3129219428597025e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528530 + }, + { + "epoch": 2.5633234841586714, + "grad_norm": 2.497788820221558e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528540 + }, + { + "epoch": 2.5633719823515078, + "grad_norm": 1.6327929230897098e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528550 + }, + { + "epoch": 2.5634204805443437, + "grad_norm": 1.4870413345136058e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528560 + }, + { + "epoch": 2.5634689787371796, + "grad_norm": 1.5474078907118383e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528570 + }, + { + "epoch": 2.563517476930016, + "grad_norm": 2.8889717285096594e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528580 + }, + { + "epoch": 2.563565975122852, + "grad_norm": 1.7101084992532378e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528590 + }, + { + "epoch": 2.5636144733156883, + "grad_norm": 2.2440634239728752e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528600 + }, + { + "epoch": 2.563662971508524, + "grad_norm": 1.4335288511801991e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528610 + }, + { + "epoch": 2.56371146970136, + "grad_norm": 1.7902246796097643e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528620 + }, + { + "epoch": 2.5637599678941965, + "grad_norm": 1.774388458386511e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528630 + }, + { + "epoch": 2.5638084660870324, + "grad_norm": 1.9323111999369758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528640 + }, + { + "epoch": 2.5638569642798688, + "grad_norm": 2.2786482034575783e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528650 + }, + { + "epoch": 2.5639054624727047, + "grad_norm": 1.59740398686381e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528660 + }, + { + "epoch": 2.5639539606655406, + "grad_norm": 1.9273382889650748e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528670 + }, + { + "epoch": 2.5640024588583765, + "grad_norm": 1.9515500326861002e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528680 + }, + { + "epoch": 2.564050957051213, + "grad_norm": 1.8899291021057252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528690 + }, + { + "epoch": 2.564099455244049, + "grad_norm": 1.7199306867610176e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528700 + }, + { + "epoch": 2.564147953436885, + "grad_norm": 1.6265799374082235e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528710 + }, + { + "epoch": 2.564196451629721, + "grad_norm": 1.4590787245083447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528720 + }, + { + "epoch": 2.564244949822557, + "grad_norm": 2.0243415832510436e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528730 + }, + { + "epoch": 2.5642934480153934, + "grad_norm": 1.675179461813059e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528740 + }, + { + "epoch": 2.5643419462082293, + "grad_norm": 1.5355540838868365e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528750 + }, + { + "epoch": 2.5643904444010657, + "grad_norm": 1.481253431023788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528760 + }, + { + "epoch": 2.5644389425939016, + "grad_norm": 1.5877317238732758e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528770 + }, + { + "epoch": 2.5644874407867375, + "grad_norm": 1.847673658517124e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528780 + }, + { + "epoch": 2.564535938979574, + "grad_norm": 1.897138979245483e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528790 + }, + { + "epoch": 2.56458443717241, + "grad_norm": 1.6234995570130195e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528800 + }, + { + "epoch": 2.564632935365246, + "grad_norm": 1.8183666128379627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528810 + }, + { + "epoch": 2.564681433558082, + "grad_norm": 1.3420554445531252e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528820 + }, + { + "epoch": 2.564729931750918, + "grad_norm": 2.6621657767122997e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528830 + }, + { + "epoch": 2.5647784299437544, + "grad_norm": 1.6966982485655535e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528840 + }, + { + "epoch": 2.5648269281365903, + "grad_norm": 1.6412252890063428e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528850 + }, + { + "epoch": 2.5648754263294267, + "grad_norm": 1.434921248488763e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528860 + }, + { + "epoch": 2.5649239245222626, + "grad_norm": 1.6173885342141148e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528870 + }, + { + "epoch": 2.5649724227150985, + "grad_norm": 1.762263224236449e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528880 + }, + { + "epoch": 2.5650209209079344, + "grad_norm": 1.3672297072275796e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528890 + }, + { + "epoch": 2.565069419100771, + "grad_norm": 2.2032423885320895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528900 + }, + { + "epoch": 2.5651179172936067, + "grad_norm": 1.558758278008554e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528910 + }, + { + "epoch": 2.565166415486443, + "grad_norm": 1.5512958029262336e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528920 + }, + { + "epoch": 2.565214913679279, + "grad_norm": 2.5291770455737606e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528930 + }, + { + "epoch": 2.565263411872115, + "grad_norm": 1.50398804521501e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528940 + }, + { + "epoch": 2.5653119100649513, + "grad_norm": 1.499086010881001e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528950 + }, + { + "epoch": 2.565360408257787, + "grad_norm": 1.762304080443755e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528960 + }, + { + "epoch": 2.5654089064506236, + "grad_norm": 1.3687517785854197e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528970 + }, + { + "epoch": 2.5654574046434595, + "grad_norm": 2.095622697595445e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528980 + }, + { + "epoch": 2.5655059028362954, + "grad_norm": 1.4453590324592369e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 528990 + }, + { + "epoch": 2.565554401029132, + "grad_norm": 1.759434375969704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529000 + }, + { + "epoch": 2.5656028992219677, + "grad_norm": 1.4888482446906437e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529010 + }, + { + "epoch": 2.565651397414804, + "grad_norm": 1.6257097001926013e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529020 + }, + { + "epoch": 2.56569989560764, + "grad_norm": 1.9099022807722577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529030 + }, + { + "epoch": 2.565748393800476, + "grad_norm": 1.6610995245969207e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529040 + }, + { + "epoch": 2.565796891993312, + "grad_norm": 1.789553039088787e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529050 + }, + { + "epoch": 2.565845390186148, + "grad_norm": 1.4383254587357897e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529060 + }, + { + "epoch": 2.565893888378984, + "grad_norm": 1.9271261919584504e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529070 + }, + { + "epoch": 2.5659423865718205, + "grad_norm": 2.2302033997334547e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529080 + }, + { + "epoch": 2.5659908847646564, + "grad_norm": 1.4831185168873162e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529090 + }, + { + "epoch": 2.5660393829574923, + "grad_norm": 1.7310670230585856e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529100 + }, + { + "epoch": 2.5660878811503287, + "grad_norm": 1.8301415494192952e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529110 + }, + { + "epoch": 2.5661363793431646, + "grad_norm": 1.4675967996424788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529120 + }, + { + "epoch": 2.566184877536001, + "grad_norm": 2.4691940936349965e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529130 + }, + { + "epoch": 2.566233375728837, + "grad_norm": 1.764455070940585e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529140 + }, + { + "epoch": 2.566281873921673, + "grad_norm": 1.6205943254021804e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529150 + }, + { + "epoch": 2.566330372114509, + "grad_norm": 2.0150707769062137e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529160 + }, + { + "epoch": 2.566378870307345, + "grad_norm": 1.4950478188779925e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529170 + }, + { + "epoch": 2.5664273685001815, + "grad_norm": 1.9573297649344568e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529180 + }, + { + "epoch": 2.5664758666930174, + "grad_norm": 1.4768913203511147e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529190 + }, + { + "epoch": 2.5665243648858533, + "grad_norm": 2.2519163422884958e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529200 + }, + { + "epoch": 2.5665728630786893, + "grad_norm": 1.557710405108992e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529210 + }, + { + "epoch": 2.5666213612715256, + "grad_norm": 1.733429400019304e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529220 + }, + { + "epoch": 2.5666698594643615, + "grad_norm": 2.5622634680644296e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529230 + }, + { + "epoch": 2.566718357657198, + "grad_norm": 1.8943346447031217e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529240 + }, + { + "epoch": 2.566766855850034, + "grad_norm": 1.4785040747256062e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529250 + }, + { + "epoch": 2.5668153540428698, + "grad_norm": 1.5570446265655846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529260 + }, + { + "epoch": 2.566863852235706, + "grad_norm": 1.333342414255867e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529270 + }, + { + "epoch": 2.566912350428542, + "grad_norm": 2.0452882054655674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529280 + }, + { + "epoch": 2.5669608486213784, + "grad_norm": 1.6837315541806674e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529290 + }, + { + "epoch": 2.5670093468142143, + "grad_norm": 1.5407868758643417e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529300 + }, + { + "epoch": 2.5670578450070503, + "grad_norm": 2.2823204659516705e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529310 + }, + { + "epoch": 2.5671063431998866, + "grad_norm": 1.7263589668914392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529320 + }, + { + "epoch": 2.5671548413927225, + "grad_norm": 1.993712217540633e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529330 + }, + { + "epoch": 2.567203339585559, + "grad_norm": 1.7945541941344345e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529340 + }, + { + "epoch": 2.567251837778395, + "grad_norm": 1.5922958951364308e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529350 + }, + { + "epoch": 2.5673003359712308, + "grad_norm": 1.7152684606003277e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529360 + }, + { + "epoch": 2.567348834164067, + "grad_norm": 1.357282908287516e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529370 + }, + { + "epoch": 2.567397332356903, + "grad_norm": 2.4382124763633328e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529380 + }, + { + "epoch": 2.5674458305497394, + "grad_norm": 1.6273535408117823e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529390 + }, + { + "epoch": 2.5674943287425753, + "grad_norm": 1.9788860328162627e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529400 + }, + { + "epoch": 2.5675428269354112, + "grad_norm": 1.4424095695630967e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529410 + }, + { + "epoch": 2.567591325128247, + "grad_norm": 2.332137150062863e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529420 + }, + { + "epoch": 2.5676398233210835, + "grad_norm": 1.82564949824382e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529430 + }, + { + "epoch": 2.5676883215139195, + "grad_norm": 1.8267527934767713e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529440 + }, + { + "epoch": 2.567736819706756, + "grad_norm": 1.2792289894036912e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529450 + }, + { + "epoch": 2.5677853178995917, + "grad_norm": 2.089663908577677e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529460 + }, + { + "epoch": 2.5678338160924277, + "grad_norm": 1.4740044740335634e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529470 + }, + { + "epoch": 2.567882314285264, + "grad_norm": 2.370451213096203e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529480 + }, + { + "epoch": 2.5679308124781, + "grad_norm": 2.0536678135840702e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529490 + }, + { + "epoch": 2.5679793106709363, + "grad_norm": 1.5186502722031037e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529500 + }, + { + "epoch": 2.5680278088637722, + "grad_norm": 1.4940965797904937e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529510 + }, + { + "epoch": 2.568076307056608, + "grad_norm": 1.726819043312844e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529520 + }, + { + "epoch": 2.5681248052494445, + "grad_norm": 2.4289258604426323e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529530 + }, + { + "epoch": 2.5681733034422805, + "grad_norm": 1.8025298587076577e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529540 + }, + { + "epoch": 2.568221801635117, + "grad_norm": 1.5129698383020695e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529550 + }, + { + "epoch": 2.5682702998279527, + "grad_norm": 1.4133278547490136e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529560 + }, + { + "epoch": 2.5683187980207887, + "grad_norm": 1.3547688304527128e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529570 + }, + { + "epoch": 2.5683672962136246, + "grad_norm": 2.8209271363266453e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529580 + }, + { + "epoch": 2.568415794406461, + "grad_norm": 1.5253567298145754e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529590 + }, + { + "epoch": 2.568464292599297, + "grad_norm": 1.2833619500440818e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529600 + }, + { + "epoch": 2.5685127907921332, + "grad_norm": 1.5390105190249415e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529610 + }, + { + "epoch": 2.568561288984969, + "grad_norm": 1.3991418690295632e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529620 + }, + { + "epoch": 2.568609787177805, + "grad_norm": 2.3876809862599657e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529630 + }, + { + "epoch": 2.5686582853706414, + "grad_norm": 1.3988945113396767e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529640 + }, + { + "epoch": 2.5687067835634774, + "grad_norm": 1.8464159978748285e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529650 + }, + { + "epoch": 2.5687552817563137, + "grad_norm": 1.8080875463510893e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529660 + }, + { + "epoch": 2.5688037799491497, + "grad_norm": 1.2360472645411846e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529670 + }, + { + "epoch": 2.5688522781419856, + "grad_norm": 2.5441634576850447e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529680 + }, + { + "epoch": 2.568900776334822, + "grad_norm": 1.611961586434063e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529690 + }, + { + "epoch": 2.568949274527658, + "grad_norm": 1.818253281271609e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529700 + }, + { + "epoch": 2.5689977727204942, + "grad_norm": 1.4493360289691282e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529710 + }, + { + "epoch": 2.56904627091333, + "grad_norm": 1.4077562227043927e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529720 + }, + { + "epoch": 2.569094769106166, + "grad_norm": 2.0837173764221006e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529730 + }, + { + "epoch": 2.569143267299002, + "grad_norm": 1.428568108252648e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529740 + }, + { + "epoch": 2.5691917654918384, + "grad_norm": 1.354617040760786e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529750 + }, + { + "epoch": 2.5692402636846743, + "grad_norm": 1.341063970983214e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529760 + }, + { + "epoch": 2.5692887618775107, + "grad_norm": 1.537427785081036e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529770 + }, + { + "epoch": 2.5693372600703466, + "grad_norm": 1.8636212573142075e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529780 + }, + { + "epoch": 2.5693857582631825, + "grad_norm": 1.747808830998565e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529790 + }, + { + "epoch": 2.569434256456019, + "grad_norm": 2.2439211377900392e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529800 + }, + { + "epoch": 2.569482754648855, + "grad_norm": 1.8111405708509665e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529810 + }, + { + "epoch": 2.569531252841691, + "grad_norm": 1.8396155709865525e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529820 + }, + { + "epoch": 2.569579751034527, + "grad_norm": 1.963850770891895e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529830 + }, + { + "epoch": 2.569628249227363, + "grad_norm": 1.9474018841947327e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529840 + }, + { + "epoch": 2.5696767474201994, + "grad_norm": 1.7101305260780464e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529850 + }, + { + "epoch": 2.5697252456130353, + "grad_norm": 1.5600830849393788e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529860 + }, + { + "epoch": 2.5697737438058716, + "grad_norm": 1.5717454004970932e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529870 + }, + { + "epoch": 2.5698222419987076, + "grad_norm": 2.4051121982893164e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529880 + }, + { + "epoch": 2.5698707401915435, + "grad_norm": 1.4657832281272931e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529890 + }, + { + "epoch": 2.56991923838438, + "grad_norm": 2.2874480976042832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529900 + }, + { + "epoch": 2.5699677365772158, + "grad_norm": 1.3646461738403559e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529910 + }, + { + "epoch": 2.570016234770052, + "grad_norm": 1.3221660211115704e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529920 + }, + { + "epoch": 2.570064732962888, + "grad_norm": 1.969188190287241e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529930 + }, + { + "epoch": 2.570113231155724, + "grad_norm": 2.0729364891280966e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529940 + }, + { + "epoch": 2.57016172934856, + "grad_norm": 1.5477011672260232e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529950 + }, + { + "epoch": 2.5702102275413963, + "grad_norm": 1.5670144293267185e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529960 + }, + { + "epoch": 2.570258725734232, + "grad_norm": 1.5738827130462596e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529970 + }, + { + "epoch": 2.5703072239270686, + "grad_norm": 1.9983078303198454e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529980 + }, + { + "epoch": 2.5703557221199045, + "grad_norm": 1.684832007242676e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 529990 + }, + { + "epoch": 2.5704042203127404, + "grad_norm": 2.2553773959543832e-08, + "learning_rate": 0.0002, + "loss": 0.0, + "step": 530000 + } + ], + "logging_steps": 10, + "max_steps": 1000000, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.062510940966241e+17, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +}