emilykang's picture
Training in progress, epoch 0
10ffc60 verified
raw
history blame
52 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.992429977289932,
"eval_steps": 500,
"global_step": 3300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03028009084027252,
"grad_norm": NaN,
"learning_rate": 0.0001999954685354173,
"loss": 0.0,
"step": 10
},
{
"epoch": 0.06056018168054504,
"grad_norm": NaN,
"learning_rate": 0.0001999818745523526,
"loss": 0.0,
"step": 20
},
{
"epoch": 0.09084027252081757,
"grad_norm": NaN,
"learning_rate": 0.00019995921928281894,
"loss": 0.0,
"step": 30
},
{
"epoch": 0.12112036336109008,
"grad_norm": NaN,
"learning_rate": 0.00019992750478004738,
"loss": 0.0,
"step": 40
},
{
"epoch": 0.1514004542013626,
"grad_norm": NaN,
"learning_rate": 0.0001998867339183008,
"loss": 0.0,
"step": 50
},
{
"epoch": 0.18168054504163514,
"grad_norm": NaN,
"learning_rate": 0.00019983691039261357,
"loss": 0.0,
"step": 60
},
{
"epoch": 0.21196063588190764,
"grad_norm": NaN,
"learning_rate": 0.0001997780387184565,
"loss": 0.0,
"step": 70
},
{
"epoch": 0.24224072672218017,
"grad_norm": NaN,
"learning_rate": 0.00019971012423132775,
"loss": 0.0,
"step": 80
},
{
"epoch": 0.27252081756245267,
"grad_norm": NaN,
"learning_rate": 0.00019963317308626914,
"loss": 0.0,
"step": 90
},
{
"epoch": 0.3028009084027252,
"grad_norm": NaN,
"learning_rate": 0.00019954719225730847,
"loss": 0.0,
"step": 100
},
{
"epoch": 0.3330809992429977,
"grad_norm": NaN,
"learning_rate": 0.00019945218953682734,
"loss": 0.0,
"step": 110
},
{
"epoch": 0.3633610900832703,
"grad_norm": NaN,
"learning_rate": 0.00019934817353485501,
"loss": 0.0,
"step": 120
},
{
"epoch": 0.3936411809235428,
"grad_norm": NaN,
"learning_rate": 0.0001992351536782881,
"loss": 0.0,
"step": 130
},
{
"epoch": 0.4239212717638153,
"grad_norm": NaN,
"learning_rate": 0.00019911314021003613,
"loss": 0.0,
"step": 140
},
{
"epoch": 0.45420136260408783,
"grad_norm": NaN,
"learning_rate": 0.0001989821441880933,
"loss": 0.0,
"step": 150
},
{
"epoch": 0.48448145344436033,
"grad_norm": NaN,
"learning_rate": 0.00019884217748453623,
"loss": 0.0,
"step": 160
},
{
"epoch": 0.5147615442846328,
"grad_norm": NaN,
"learning_rate": 0.00019869325278444824,
"loss": 0.0,
"step": 170
},
{
"epoch": 0.5450416351249053,
"grad_norm": NaN,
"learning_rate": 0.00019853538358476932,
"loss": 0.0,
"step": 180
},
{
"epoch": 0.5753217259651779,
"grad_norm": NaN,
"learning_rate": 0.00019836858419307324,
"loss": 0.0,
"step": 190
},
{
"epoch": 0.6056018168054504,
"grad_norm": NaN,
"learning_rate": 0.00019819286972627066,
"loss": 0.0,
"step": 200
},
{
"epoch": 0.6358819076457229,
"grad_norm": NaN,
"learning_rate": 0.00019800825610923934,
"loss": 0.0,
"step": 210
},
{
"epoch": 0.6661619984859954,
"grad_norm": NaN,
"learning_rate": 0.00019781476007338058,
"loss": 0.0,
"step": 220
},
{
"epoch": 0.6964420893262679,
"grad_norm": NaN,
"learning_rate": 0.00019761239915510302,
"loss": 0.0,
"step": 230
},
{
"epoch": 0.7267221801665406,
"grad_norm": NaN,
"learning_rate": 0.00019740119169423337,
"loss": 0.0,
"step": 240
},
{
"epoch": 0.757002271006813,
"grad_norm": NaN,
"learning_rate": 0.00019718115683235417,
"loss": 0.0,
"step": 250
},
{
"epoch": 0.7872823618470856,
"grad_norm": NaN,
"learning_rate": 0.00019695231451106912,
"loss": 0.0,
"step": 260
},
{
"epoch": 0.817562452687358,
"grad_norm": NaN,
"learning_rate": 0.00019671468547019573,
"loss": 0.0,
"step": 270
},
{
"epoch": 0.8478425435276306,
"grad_norm": NaN,
"learning_rate": 0.0001964682912458856,
"loss": 0.0,
"step": 280
},
{
"epoch": 0.878122634367903,
"grad_norm": NaN,
"learning_rate": 0.00019621315416867274,
"loss": 0.0,
"step": 290
},
{
"epoch": 0.9084027252081757,
"grad_norm": NaN,
"learning_rate": 0.00019594929736144976,
"loss": 0.0,
"step": 300
},
{
"epoch": 0.9386828160484482,
"grad_norm": NaN,
"learning_rate": 0.00019567674473737218,
"loss": 0.0,
"step": 310
},
{
"epoch": 0.9689629068887207,
"grad_norm": NaN,
"learning_rate": 0.00019539552099769126,
"loss": 0.0,
"step": 320
},
{
"epoch": 0.9992429977289932,
"grad_norm": NaN,
"learning_rate": 0.00019510565162951537,
"loss": 0.0,
"step": 330
},
{
"epoch": 1.0295230885692657,
"grad_norm": NaN,
"learning_rate": 0.00019480716290349995,
"loss": 0.0,
"step": 340
},
{
"epoch": 1.0598031794095382,
"grad_norm": NaN,
"learning_rate": 0.00019450008187146684,
"loss": 0.0,
"step": 350
},
{
"epoch": 1.0900832702498107,
"grad_norm": NaN,
"learning_rate": 0.00019418443636395248,
"loss": 0.0,
"step": 360
},
{
"epoch": 1.1203633610900834,
"grad_norm": NaN,
"learning_rate": 0.00019386025498768558,
"loss": 0.0,
"step": 370
},
{
"epoch": 1.1506434519303559,
"grad_norm": NaN,
"learning_rate": 0.00019352756712299468,
"loss": 0.0,
"step": 380
},
{
"epoch": 1.1809235427706284,
"grad_norm": NaN,
"learning_rate": 0.00019318640292114524,
"loss": 0.0,
"step": 390
},
{
"epoch": 1.2112036336109009,
"grad_norm": NaN,
"learning_rate": 0.00019283679330160726,
"loss": 0.0,
"step": 400
},
{
"epoch": 1.2414837244511734,
"grad_norm": NaN,
"learning_rate": 0.00019247876994925292,
"loss": 0.0,
"step": 410
},
{
"epoch": 1.2717638152914459,
"grad_norm": NaN,
"learning_rate": 0.000192112365311485,
"loss": 0.0,
"step": 420
},
{
"epoch": 1.3020439061317184,
"grad_norm": NaN,
"learning_rate": 0.00019173761259529633,
"loss": 0.0,
"step": 430
},
{
"epoch": 1.3323239969719909,
"grad_norm": NaN,
"learning_rate": 0.0001913545457642601,
"loss": 0.0,
"step": 440
},
{
"epoch": 1.3626040878122634,
"grad_norm": NaN,
"learning_rate": 0.00019096319953545185,
"loss": 0.0,
"step": 450
},
{
"epoch": 1.3928841786525359,
"grad_norm": NaN,
"learning_rate": 0.0001905636093763031,
"loss": 0.0,
"step": 460
},
{
"epoch": 1.4231642694928084,
"grad_norm": NaN,
"learning_rate": 0.00019015581150138693,
"loss": 0.0,
"step": 470
},
{
"epoch": 1.4534443603330809,
"grad_norm": NaN,
"learning_rate": 0.00018973984286913584,
"loss": 0.0,
"step": 480
},
{
"epoch": 1.4837244511733534,
"grad_norm": NaN,
"learning_rate": 0.0001893157411784924,
"loss": 0.0,
"step": 490
},
{
"epoch": 1.5140045420136259,
"grad_norm": NaN,
"learning_rate": 0.00018888354486549237,
"loss": 0.0,
"step": 500
},
{
"epoch": 1.5442846328538986,
"grad_norm": NaN,
"learning_rate": 0.00018844329309978145,
"loss": 0.0,
"step": 510
},
{
"epoch": 1.574564723694171,
"grad_norm": NaN,
"learning_rate": 0.00018799502578106534,
"loss": 0.0,
"step": 520
},
{
"epoch": 1.6048448145344436,
"grad_norm": NaN,
"learning_rate": 0.00018753878353549357,
"loss": 0.0,
"step": 530
},
{
"epoch": 1.635124905374716,
"grad_norm": NaN,
"learning_rate": 0.00018707460771197774,
"loss": 0.0,
"step": 540
},
{
"epoch": 1.6654049962149886,
"grad_norm": NaN,
"learning_rate": 0.00018660254037844388,
"loss": 0.0,
"step": 550
},
{
"epoch": 1.6956850870552613,
"grad_norm": NaN,
"learning_rate": 0.00018612262431802007,
"loss": 0.0,
"step": 560
},
{
"epoch": 1.7259651778955338,
"grad_norm": NaN,
"learning_rate": 0.0001856349030251589,
"loss": 0.0,
"step": 570
},
{
"epoch": 1.7562452687358063,
"grad_norm": NaN,
"learning_rate": 0.0001851394207016957,
"loss": 0.0,
"step": 580
},
{
"epoch": 1.7865253595760788,
"grad_norm": NaN,
"learning_rate": 0.00018463622225284242,
"loss": 0.0,
"step": 590
},
{
"epoch": 1.8168054504163513,
"grad_norm": NaN,
"learning_rate": 0.00018412535328311814,
"loss": 0.0,
"step": 600
},
{
"epoch": 1.8470855412566238,
"grad_norm": NaN,
"learning_rate": 0.0001836068600922156,
"loss": 0.0,
"step": 610
},
{
"epoch": 1.8773656320968963,
"grad_norm": NaN,
"learning_rate": 0.00018308078967080546,
"loss": 0.0,
"step": 620
},
{
"epoch": 1.9076457229371688,
"grad_norm": NaN,
"learning_rate": 0.0001825471896962774,
"loss": 0.0,
"step": 630
},
{
"epoch": 1.9379258137774413,
"grad_norm": NaN,
"learning_rate": 0.00018200610852841913,
"loss": 0.0,
"step": 640
},
{
"epoch": 1.9682059046177138,
"grad_norm": NaN,
"learning_rate": 0.00018145759520503358,
"loss": 0.0,
"step": 650
},
{
"epoch": 1.9984859954579863,
"grad_norm": NaN,
"learning_rate": 0.00018090169943749476,
"loss": 0.0,
"step": 660
},
{
"epoch": 2.028766086298259,
"grad_norm": NaN,
"learning_rate": 0.00018033847160624225,
"loss": 0.0,
"step": 670
},
{
"epoch": 2.0590461771385313,
"grad_norm": NaN,
"learning_rate": 0.00017976796275621555,
"loss": 0.0,
"step": 680
},
{
"epoch": 2.089326267978804,
"grad_norm": NaN,
"learning_rate": 0.00017919022459222752,
"loss": 0.0,
"step": 690
},
{
"epoch": 2.1196063588190763,
"grad_norm": NaN,
"learning_rate": 0.00017860530947427875,
"loss": 0.0,
"step": 700
},
{
"epoch": 2.149886449659349,
"grad_norm": NaN,
"learning_rate": 0.00017801327041281207,
"loss": 0.0,
"step": 710
},
{
"epoch": 2.1801665404996213,
"grad_norm": NaN,
"learning_rate": 0.00017741416106390826,
"loss": 0.0,
"step": 720
},
{
"epoch": 2.210446631339894,
"grad_norm": NaN,
"learning_rate": 0.00017680803572442318,
"loss": 0.0,
"step": 730
},
{
"epoch": 2.2407267221801668,
"grad_norm": NaN,
"learning_rate": 0.0001761949493270671,
"loss": 0.0,
"step": 740
},
{
"epoch": 2.2710068130204393,
"grad_norm": NaN,
"learning_rate": 0.00017557495743542585,
"loss": 0.0,
"step": 750
},
{
"epoch": 2.3012869038607118,
"grad_norm": NaN,
"learning_rate": 0.0001749481162389254,
"loss": 0.0,
"step": 760
},
{
"epoch": 2.3315669947009843,
"grad_norm": NaN,
"learning_rate": 0.00017431448254773944,
"loss": 0.0,
"step": 770
},
{
"epoch": 2.3618470855412568,
"grad_norm": NaN,
"learning_rate": 0.0001736741137876405,
"loss": 0.0,
"step": 780
},
{
"epoch": 2.3921271763815293,
"grad_norm": NaN,
"learning_rate": 0.00017302706799479574,
"loss": 0.0,
"step": 790
},
{
"epoch": 2.4224072672218018,
"grad_norm": NaN,
"learning_rate": 0.00017237340381050703,
"loss": 0.0,
"step": 800
},
{
"epoch": 2.4526873580620743,
"grad_norm": NaN,
"learning_rate": 0.00017171318047589637,
"loss": 0.0,
"step": 810
},
{
"epoch": 2.4829674489023468,
"grad_norm": NaN,
"learning_rate": 0.0001710464578265369,
"loss": 0.0,
"step": 820
},
{
"epoch": 2.5132475397426193,
"grad_norm": NaN,
"learning_rate": 0.00017037329628703004,
"loss": 0.0,
"step": 830
},
{
"epoch": 2.5435276305828918,
"grad_norm": NaN,
"learning_rate": 0.00016969375686552937,
"loss": 0.0,
"step": 840
},
{
"epoch": 2.5738077214231643,
"grad_norm": NaN,
"learning_rate": 0.00016900790114821122,
"loss": 0.0,
"step": 850
},
{
"epoch": 2.6040878122634368,
"grad_norm": NaN,
"learning_rate": 0.00016831579129369346,
"loss": 0.0,
"step": 860
},
{
"epoch": 2.6343679031037093,
"grad_norm": NaN,
"learning_rate": 0.00016761749002740193,
"loss": 0.0,
"step": 870
},
{
"epoch": 2.6646479939439818,
"grad_norm": NaN,
"learning_rate": 0.00016691306063588583,
"loss": 0.0,
"step": 880
},
{
"epoch": 2.6949280847842543,
"grad_norm": NaN,
"learning_rate": 0.00016620256696108188,
"loss": 0.0,
"step": 890
},
{
"epoch": 2.7252081756245268,
"grad_norm": NaN,
"learning_rate": 0.00016548607339452853,
"loss": 0.0,
"step": 900
},
{
"epoch": 2.7554882664647993,
"grad_norm": NaN,
"learning_rate": 0.00016476364487153023,
"loss": 0.0,
"step": 910
},
{
"epoch": 2.7857683573050718,
"grad_norm": NaN,
"learning_rate": 0.00016403534686527225,
"loss": 0.0,
"step": 920
},
{
"epoch": 2.8160484481453443,
"grad_norm": NaN,
"learning_rate": 0.00016330124538088705,
"loss": 0.0,
"step": 930
},
{
"epoch": 2.8463285389856168,
"grad_norm": NaN,
"learning_rate": 0.00016256140694947217,
"loss": 0.0,
"step": 940
},
{
"epoch": 2.8766086298258893,
"grad_norm": NaN,
"learning_rate": 0.00016181589862206052,
"loss": 0.0,
"step": 950
},
{
"epoch": 2.9068887206661618,
"grad_norm": NaN,
"learning_rate": 0.00016106478796354382,
"loss": 0.0,
"step": 960
},
{
"epoch": 2.9371688115064343,
"grad_norm": NaN,
"learning_rate": 0.00016030814304654895,
"loss": 0.0,
"step": 970
},
{
"epoch": 2.9674489023467068,
"grad_norm": NaN,
"learning_rate": 0.0001595460324452688,
"loss": 0.0,
"step": 980
},
{
"epoch": 2.9977289931869797,
"grad_norm": NaN,
"learning_rate": 0.00015877852522924732,
"loss": 0.0,
"step": 990
},
{
"epoch": 3.028009084027252,
"grad_norm": NaN,
"learning_rate": 0.00015800569095711982,
"loss": 0.0,
"step": 1000
},
{
"epoch": 3.0582891748675247,
"grad_norm": NaN,
"learning_rate": 0.00015722759967030898,
"loss": 0.0,
"step": 1010
},
{
"epoch": 3.088569265707797,
"grad_norm": NaN,
"learning_rate": 0.00015644432188667695,
"loss": 0.0,
"step": 1020
},
{
"epoch": 3.1188493565480697,
"grad_norm": NaN,
"learning_rate": 0.0001556559285941344,
"loss": 0.0,
"step": 1030
},
{
"epoch": 3.149129447388342,
"grad_norm": NaN,
"learning_rate": 0.000154862491244207,
"loss": 0.0,
"step": 1040
},
{
"epoch": 3.1794095382286147,
"grad_norm": NaN,
"learning_rate": 0.00015406408174555976,
"loss": 0.0,
"step": 1050
},
{
"epoch": 3.209689629068887,
"grad_norm": NaN,
"learning_rate": 0.00015326077245747999,
"loss": 0.0,
"step": 1060
},
{
"epoch": 3.2399697199091597,
"grad_norm": NaN,
"learning_rate": 0.00015245263618331945,
"loss": 0.0,
"step": 1070
},
{
"epoch": 3.270249810749432,
"grad_norm": NaN,
"learning_rate": 0.0001516397461638962,
"loss": 0.0,
"step": 1080
},
{
"epoch": 3.3005299015897047,
"grad_norm": NaN,
"learning_rate": 0.00015082217607085692,
"loss": 0.0,
"step": 1090
},
{
"epoch": 3.330809992429977,
"grad_norm": NaN,
"learning_rate": 0.00015000000000000001,
"loss": 0.0,
"step": 1100
},
{
"epoch": 3.3610900832702497,
"grad_norm": NaN,
"learning_rate": 0.0001491732924645604,
"loss": 0.0,
"step": 1110
},
{
"epoch": 3.391370174110522,
"grad_norm": NaN,
"learning_rate": 0.00014834212838845637,
"loss": 0.0,
"step": 1120
},
{
"epoch": 3.4216502649507947,
"grad_norm": NaN,
"learning_rate": 0.0001475065830994995,
"loss": 0.0,
"step": 1130
},
{
"epoch": 3.451930355791067,
"grad_norm": NaN,
"learning_rate": 0.00014666673232256738,
"loss": 0.0,
"step": 1140
},
{
"epoch": 3.4822104466313397,
"grad_norm": NaN,
"learning_rate": 0.00014582265217274104,
"loss": 0.0,
"step": 1150
},
{
"epoch": 3.5124905374716127,
"grad_norm": NaN,
"learning_rate": 0.0001449744191484066,
"loss": 0.0,
"step": 1160
},
{
"epoch": 3.542770628311885,
"grad_norm": NaN,
"learning_rate": 0.00014412211012432212,
"loss": 0.0,
"step": 1170
},
{
"epoch": 3.5730507191521577,
"grad_norm": NaN,
"learning_rate": 0.00014326580234465085,
"loss": 0.0,
"step": 1180
},
{
"epoch": 3.60333080999243,
"grad_norm": NaN,
"learning_rate": 0.00014240557341596018,
"loss": 0.0,
"step": 1190
},
{
"epoch": 3.6336109008327027,
"grad_norm": NaN,
"learning_rate": 0.00014154150130018866,
"loss": 0.0,
"step": 1200
},
{
"epoch": 3.663890991672975,
"grad_norm": NaN,
"learning_rate": 0.00014067366430758004,
"loss": 0.0,
"step": 1210
},
{
"epoch": 3.6941710825132477,
"grad_norm": NaN,
"learning_rate": 0.00013980214108958624,
"loss": 0.0,
"step": 1220
},
{
"epoch": 3.72445117335352,
"grad_norm": NaN,
"learning_rate": 0.00013892701063173918,
"loss": 0.0,
"step": 1230
},
{
"epoch": 3.7547312641937927,
"grad_norm": NaN,
"learning_rate": 0.0001380483522464923,
"loss": 0.0,
"step": 1240
},
{
"epoch": 3.785011355034065,
"grad_norm": NaN,
"learning_rate": 0.00013716624556603274,
"loss": 0.0,
"step": 1250
},
{
"epoch": 3.8152914458743377,
"grad_norm": NaN,
"learning_rate": 0.0001362807705350641,
"loss": 0.0,
"step": 1260
},
{
"epoch": 3.84557153671461,
"grad_norm": NaN,
"learning_rate": 0.00013539200740356118,
"loss": 0.0,
"step": 1270
},
{
"epoch": 3.8758516275548827,
"grad_norm": NaN,
"learning_rate": 0.00013450003671949706,
"loss": 0.0,
"step": 1280
},
{
"epoch": 3.906131718395155,
"grad_norm": NaN,
"learning_rate": 0.00013360493932154302,
"loss": 0.0,
"step": 1290
},
{
"epoch": 3.9364118092354277,
"grad_norm": NaN,
"learning_rate": 0.00013270679633174218,
"loss": 0.0,
"step": 1300
},
{
"epoch": 3.9666919000757,
"grad_norm": NaN,
"learning_rate": 0.00013180568914815752,
"loss": 0.0,
"step": 1310
},
{
"epoch": 3.9969719909159727,
"grad_norm": NaN,
"learning_rate": 0.00013090169943749476,
"loss": 0.0,
"step": 1320
},
{
"epoch": 4.027252081756245,
"grad_norm": NaN,
"learning_rate": 0.00012999490912770107,
"loss": 0.0,
"step": 1330
},
{
"epoch": 4.057532172596518,
"grad_norm": NaN,
"learning_rate": 0.0001290854004005399,
"loss": 0.0,
"step": 1340
},
{
"epoch": 4.08781226343679,
"grad_norm": NaN,
"learning_rate": 0.00012817325568414297,
"loss": 0.0,
"step": 1350
},
{
"epoch": 4.118092354277063,
"grad_norm": NaN,
"learning_rate": 0.0001272585576455398,
"loss": 0.0,
"step": 1360
},
{
"epoch": 4.148372445117335,
"grad_norm": NaN,
"learning_rate": 0.00012634138918316568,
"loss": 0.0,
"step": 1370
},
{
"epoch": 4.178652535957608,
"grad_norm": NaN,
"learning_rate": 0.00012542183341934872,
"loss": 0.0,
"step": 1380
},
{
"epoch": 4.20893262679788,
"grad_norm": NaN,
"learning_rate": 0.0001244999736927764,
"loss": 0.0,
"step": 1390
},
{
"epoch": 4.239212717638153,
"grad_norm": NaN,
"learning_rate": 0.00012357589355094275,
"loss": 0.0,
"step": 1400
},
{
"epoch": 4.269492808478425,
"grad_norm": NaN,
"learning_rate": 0.00012264967674257646,
"loss": 0.0,
"step": 1410
},
{
"epoch": 4.299772899318698,
"grad_norm": NaN,
"learning_rate": 0.00012172140721005079,
"loss": 0.0,
"step": 1420
},
{
"epoch": 4.33005299015897,
"grad_norm": NaN,
"learning_rate": 0.00012079116908177593,
"loss": 0.0,
"step": 1430
},
{
"epoch": 4.360333080999243,
"grad_norm": NaN,
"learning_rate": 0.00011985904666457455,
"loss": 0.0,
"step": 1440
},
{
"epoch": 4.390613171839515,
"grad_norm": NaN,
"learning_rate": 0.00011892512443604102,
"loss": 0.0,
"step": 1450
},
{
"epoch": 4.420893262679788,
"grad_norm": NaN,
"learning_rate": 0.00011798948703688539,
"loss": 0.0,
"step": 1460
},
{
"epoch": 4.45117335352006,
"grad_norm": NaN,
"learning_rate": 0.0001170522192632624,
"loss": 0.0,
"step": 1470
},
{
"epoch": 4.4814534443603335,
"grad_norm": NaN,
"learning_rate": 0.00011611340605908642,
"loss": 0.0,
"step": 1480
},
{
"epoch": 4.511733535200605,
"grad_norm": NaN,
"learning_rate": 0.00011517313250833317,
"loss": 0.0,
"step": 1490
},
{
"epoch": 4.5420136260408785,
"grad_norm": NaN,
"learning_rate": 0.00011423148382732853,
"loss": 0.0,
"step": 1500
},
{
"epoch": 4.57229371688115,
"grad_norm": NaN,
"learning_rate": 0.00011328854535702543,
"loss": 0.0,
"step": 1510
},
{
"epoch": 4.6025738077214235,
"grad_norm": NaN,
"learning_rate": 0.00011234440255526948,
"loss": 0.0,
"step": 1520
},
{
"epoch": 4.632853898561696,
"grad_norm": NaN,
"learning_rate": 0.00011139914098905406,
"loss": 0.0,
"step": 1530
},
{
"epoch": 4.6631339894019685,
"grad_norm": NaN,
"learning_rate": 0.00011045284632676536,
"loss": 0.0,
"step": 1540
},
{
"epoch": 4.693414080242241,
"grad_norm": NaN,
"learning_rate": 0.00010950560433041826,
"loss": 0.0,
"step": 1550
},
{
"epoch": 4.7236941710825135,
"grad_norm": NaN,
"learning_rate": 0.00010855750084788398,
"loss": 0.0,
"step": 1560
},
{
"epoch": 4.753974261922786,
"grad_norm": NaN,
"learning_rate": 0.00010760862180510951,
"loss": 0.0,
"step": 1570
},
{
"epoch": 4.7842543527630585,
"grad_norm": NaN,
"learning_rate": 0.00010665905319833041,
"loss": 0.0,
"step": 1580
},
{
"epoch": 4.814534443603331,
"grad_norm": NaN,
"learning_rate": 0.00010570888108627681,
"loss": 0.0,
"step": 1590
},
{
"epoch": 4.8448145344436035,
"grad_norm": NaN,
"learning_rate": 0.00010475819158237425,
"loss": 0.0,
"step": 1600
},
{
"epoch": 4.875094625283876,
"grad_norm": NaN,
"learning_rate": 0.00010380707084693901,
"loss": 0.0,
"step": 1610
},
{
"epoch": 4.9053747161241485,
"grad_norm": NaN,
"learning_rate": 0.00010285560507936961,
"loss": 0.0,
"step": 1620
},
{
"epoch": 4.935654806964421,
"grad_norm": NaN,
"learning_rate": 0.00010190388051033466,
"loss": 0.0,
"step": 1630
},
{
"epoch": 4.9659348978046935,
"grad_norm": NaN,
"learning_rate": 0.00010095198339395769,
"loss": 0.0,
"step": 1640
},
{
"epoch": 4.996214988644966,
"grad_norm": NaN,
"learning_rate": 0.0001,
"loss": 0.0,
"step": 1650
},
{
"epoch": 5.0264950794852385,
"grad_norm": NaN,
"learning_rate": 9.904801660604234e-05,
"loss": 0.0,
"step": 1660
},
{
"epoch": 5.056775170325511,
"grad_norm": NaN,
"learning_rate": 9.809611948966533e-05,
"loss": 0.0,
"step": 1670
},
{
"epoch": 5.0870552611657835,
"grad_norm": NaN,
"learning_rate": 9.71443949206304e-05,
"loss": 0.0,
"step": 1680
},
{
"epoch": 5.117335352006056,
"grad_norm": NaN,
"learning_rate": 9.619292915306101e-05,
"loss": 0.0,
"step": 1690
},
{
"epoch": 5.1476154428463285,
"grad_norm": NaN,
"learning_rate": 9.524180841762577e-05,
"loss": 0.0,
"step": 1700
},
{
"epoch": 5.177895533686601,
"grad_norm": NaN,
"learning_rate": 9.42911189137232e-05,
"loss": 0.0,
"step": 1710
},
{
"epoch": 5.2081756245268735,
"grad_norm": NaN,
"learning_rate": 9.334094680166962e-05,
"loss": 0.0,
"step": 1720
},
{
"epoch": 5.238455715367146,
"grad_norm": NaN,
"learning_rate": 9.239137819489047e-05,
"loss": 0.0,
"step": 1730
},
{
"epoch": 5.2687358062074185,
"grad_norm": NaN,
"learning_rate": 9.144249915211605e-05,
"loss": 0.0,
"step": 1740
},
{
"epoch": 5.299015897047691,
"grad_norm": NaN,
"learning_rate": 9.049439566958175e-05,
"loss": 0.0,
"step": 1750
},
{
"epoch": 5.3292959878879635,
"grad_norm": NaN,
"learning_rate": 8.954715367323468e-05,
"loss": 0.0,
"step": 1760
},
{
"epoch": 5.359576078728236,
"grad_norm": NaN,
"learning_rate": 8.860085901094595e-05,
"loss": 0.0,
"step": 1770
},
{
"epoch": 5.3898561695685085,
"grad_norm": NaN,
"learning_rate": 8.765559744473053e-05,
"loss": 0.0,
"step": 1780
},
{
"epoch": 5.420136260408781,
"grad_norm": NaN,
"learning_rate": 8.67114546429746e-05,
"loss": 0.0,
"step": 1790
},
{
"epoch": 5.4504163512490535,
"grad_norm": NaN,
"learning_rate": 8.57685161726715e-05,
"loss": 0.0,
"step": 1800
},
{
"epoch": 5.480696442089326,
"grad_norm": NaN,
"learning_rate": 8.482686749166686e-05,
"loss": 0.0,
"step": 1810
},
{
"epoch": 5.5109765329295985,
"grad_norm": NaN,
"learning_rate": 8.38865939409136e-05,
"loss": 0.0,
"step": 1820
},
{
"epoch": 5.541256623769871,
"grad_norm": NaN,
"learning_rate": 8.294778073673762e-05,
"loss": 0.0,
"step": 1830
},
{
"epoch": 5.5715367146101435,
"grad_norm": NaN,
"learning_rate": 8.201051296311462e-05,
"loss": 0.0,
"step": 1840
},
{
"epoch": 5.601816805450416,
"grad_norm": NaN,
"learning_rate": 8.107487556395901e-05,
"loss": 0.0,
"step": 1850
},
{
"epoch": 5.6320968962906885,
"grad_norm": NaN,
"learning_rate": 8.014095333542548e-05,
"loss": 0.0,
"step": 1860
},
{
"epoch": 5.662376987130961,
"grad_norm": NaN,
"learning_rate": 7.920883091822408e-05,
"loss": 0.0,
"step": 1870
},
{
"epoch": 5.6926570779712335,
"grad_norm": NaN,
"learning_rate": 7.827859278994925e-05,
"loss": 0.0,
"step": 1880
},
{
"epoch": 5.722937168811506,
"grad_norm": NaN,
"learning_rate": 7.735032325742355e-05,
"loss": 0.0,
"step": 1890
},
{
"epoch": 5.7532172596517785,
"grad_norm": NaN,
"learning_rate": 7.642410644905726e-05,
"loss": 0.0,
"step": 1900
},
{
"epoch": 5.783497350492052,
"grad_norm": NaN,
"learning_rate": 7.550002630722366e-05,
"loss": 0.0,
"step": 1910
},
{
"epoch": 5.8137774413323235,
"grad_norm": NaN,
"learning_rate": 7.457816658065134e-05,
"loss": 0.0,
"step": 1920
},
{
"epoch": 5.844057532172597,
"grad_norm": NaN,
"learning_rate": 7.365861081683433e-05,
"loss": 0.0,
"step": 1930
},
{
"epoch": 5.8743376230128685,
"grad_norm": NaN,
"learning_rate": 7.274144235446023e-05,
"loss": 0.0,
"step": 1940
},
{
"epoch": 5.904617713853142,
"grad_norm": NaN,
"learning_rate": 7.182674431585704e-05,
"loss": 0.0,
"step": 1950
},
{
"epoch": 5.934897804693414,
"grad_norm": NaN,
"learning_rate": 7.09145995994601e-05,
"loss": 0.0,
"step": 1960
},
{
"epoch": 5.965177895533687,
"grad_norm": NaN,
"learning_rate": 7.000509087229895e-05,
"loss": 0.0,
"step": 1970
},
{
"epoch": 5.995457986373959,
"grad_norm": NaN,
"learning_rate": 6.909830056250527e-05,
"loss": 0.0,
"step": 1980
},
{
"epoch": 6.025738077214232,
"grad_norm": NaN,
"learning_rate": 6.819431085184251e-05,
"loss": 0.0,
"step": 1990
},
{
"epoch": 6.056018168054504,
"grad_norm": NaN,
"learning_rate": 6.729320366825784e-05,
"loss": 0.0,
"step": 2000
},
{
"epoch": 6.086298258894777,
"grad_norm": NaN,
"learning_rate": 6.639506067845697e-05,
"loss": 0.0,
"step": 2010
},
{
"epoch": 6.116578349735049,
"grad_norm": NaN,
"learning_rate": 6.549996328050296e-05,
"loss": 0.0,
"step": 2020
},
{
"epoch": 6.146858440575322,
"grad_norm": NaN,
"learning_rate": 6.460799259643884e-05,
"loss": 0.0,
"step": 2030
},
{
"epoch": 6.177138531415594,
"grad_norm": NaN,
"learning_rate": 6.371922946493591e-05,
"loss": 0.0,
"step": 2040
},
{
"epoch": 6.207418622255867,
"grad_norm": NaN,
"learning_rate": 6.283375443396726e-05,
"loss": 0.0,
"step": 2050
},
{
"epoch": 6.237698713096139,
"grad_norm": NaN,
"learning_rate": 6.19516477535077e-05,
"loss": 0.0,
"step": 2060
},
{
"epoch": 6.267978803936412,
"grad_norm": NaN,
"learning_rate": 6.107298936826086e-05,
"loss": 0.0,
"step": 2070
},
{
"epoch": 6.298258894776684,
"grad_norm": NaN,
"learning_rate": 6.019785891041381e-05,
"loss": 0.0,
"step": 2080
},
{
"epoch": 6.328538985616957,
"grad_norm": NaN,
"learning_rate": 5.9326335692419995e-05,
"loss": 0.0,
"step": 2090
},
{
"epoch": 6.358819076457229,
"grad_norm": NaN,
"learning_rate": 5.845849869981137e-05,
"loss": 0.0,
"step": 2100
},
{
"epoch": 6.389099167297502,
"grad_norm": NaN,
"learning_rate": 5.759442658403985e-05,
"loss": 0.0,
"step": 2110
},
{
"epoch": 6.419379258137774,
"grad_norm": NaN,
"learning_rate": 5.6734197655349156e-05,
"loss": 0.0,
"step": 2120
},
{
"epoch": 6.449659348978047,
"grad_norm": NaN,
"learning_rate": 5.5877889875677845e-05,
"loss": 0.0,
"step": 2130
},
{
"epoch": 6.479939439818319,
"grad_norm": NaN,
"learning_rate": 5.5025580851593436e-05,
"loss": 0.0,
"step": 2140
},
{
"epoch": 6.510219530658592,
"grad_norm": NaN,
"learning_rate": 5.417734782725896e-05,
"loss": 0.0,
"step": 2150
},
{
"epoch": 6.540499621498864,
"grad_norm": NaN,
"learning_rate": 5.333326767743263e-05,
"loss": 0.0,
"step": 2160
},
{
"epoch": 6.570779712339137,
"grad_norm": NaN,
"learning_rate": 5.249341690050051e-05,
"loss": 0.0,
"step": 2170
},
{
"epoch": 6.601059803179409,
"grad_norm": NaN,
"learning_rate": 5.1657871611543605e-05,
"loss": 0.0,
"step": 2180
},
{
"epoch": 6.631339894019682,
"grad_norm": NaN,
"learning_rate": 5.082670753543961e-05,
"loss": 0.0,
"step": 2190
},
{
"epoch": 6.661619984859954,
"grad_norm": NaN,
"learning_rate": 5.000000000000002e-05,
"loss": 0.0,
"step": 2200
},
{
"epoch": 6.691900075700227,
"grad_norm": NaN,
"learning_rate": 4.9177823929143106e-05,
"loss": 0.0,
"step": 2210
},
{
"epoch": 6.722180166540499,
"grad_norm": NaN,
"learning_rate": 4.836025383610382e-05,
"loss": 0.0,
"step": 2220
},
{
"epoch": 6.752460257380772,
"grad_norm": NaN,
"learning_rate": 4.754736381668057e-05,
"loss": 0.0,
"step": 2230
},
{
"epoch": 6.782740348221044,
"grad_norm": NaN,
"learning_rate": 4.673922754252002e-05,
"loss": 0.0,
"step": 2240
},
{
"epoch": 6.813020439061317,
"grad_norm": NaN,
"learning_rate": 4.593591825444028e-05,
"loss": 0.0,
"step": 2250
},
{
"epoch": 6.843300529901589,
"grad_norm": NaN,
"learning_rate": 4.513750875579303e-05,
"loss": 0.0,
"step": 2260
},
{
"epoch": 6.873580620741862,
"grad_norm": NaN,
"learning_rate": 4.434407140586565e-05,
"loss": 0.0,
"step": 2270
},
{
"epoch": 6.903860711582134,
"grad_norm": NaN,
"learning_rate": 4.355567811332311e-05,
"loss": 0.0,
"step": 2280
},
{
"epoch": 6.934140802422407,
"grad_norm": NaN,
"learning_rate": 4.277240032969105e-05,
"loss": 0.0,
"step": 2290
},
{
"epoch": 6.964420893262679,
"grad_norm": NaN,
"learning_rate": 4.19943090428802e-05,
"loss": 0.0,
"step": 2300
},
{
"epoch": 6.994700984102952,
"grad_norm": NaN,
"learning_rate": 4.12214747707527e-05,
"loss": 0.0,
"step": 2310
},
{
"epoch": 7.024981074943224,
"grad_norm": NaN,
"learning_rate": 4.045396755473121e-05,
"loss": 0.0,
"step": 2320
},
{
"epoch": 7.055261165783497,
"grad_norm": NaN,
"learning_rate": 3.969185695345105e-05,
"loss": 0.0,
"step": 2330
},
{
"epoch": 7.085541256623769,
"grad_norm": NaN,
"learning_rate": 3.893521203645618e-05,
"loss": 0.0,
"step": 2340
},
{
"epoch": 7.115821347464043,
"grad_norm": NaN,
"learning_rate": 3.8184101377939476e-05,
"loss": 0.0,
"step": 2350
},
{
"epoch": 7.146101438304315,
"grad_norm": NaN,
"learning_rate": 3.7438593050527845e-05,
"loss": 0.0,
"step": 2360
},
{
"epoch": 7.176381529144588,
"grad_norm": NaN,
"learning_rate": 3.669875461911297e-05,
"loss": 0.0,
"step": 2370
},
{
"epoch": 7.20666161998486,
"grad_norm": NaN,
"learning_rate": 3.5964653134727776e-05,
"loss": 0.0,
"step": 2380
},
{
"epoch": 7.236941710825133,
"grad_norm": NaN,
"learning_rate": 3.523635512846981e-05,
"loss": 0.0,
"step": 2390
},
{
"epoch": 7.267221801665405,
"grad_norm": NaN,
"learning_rate": 3.45139266054715e-05,
"loss": 0.0,
"step": 2400
},
{
"epoch": 7.297501892505678,
"grad_norm": NaN,
"learning_rate": 3.379743303891815e-05,
"loss": 0.0,
"step": 2410
},
{
"epoch": 7.32778198334595,
"grad_norm": NaN,
"learning_rate": 3.308693936411421e-05,
"loss": 0.0,
"step": 2420
},
{
"epoch": 7.358062074186223,
"grad_norm": NaN,
"learning_rate": 3.238250997259808e-05,
"loss": 0.0,
"step": 2430
},
{
"epoch": 7.388342165026495,
"grad_norm": NaN,
"learning_rate": 3.1684208706306574e-05,
"loss": 0.0,
"step": 2440
},
{
"epoch": 7.418622255866768,
"grad_norm": NaN,
"learning_rate": 3.099209885178882e-05,
"loss": 0.0,
"step": 2450
},
{
"epoch": 7.44890234670704,
"grad_norm": NaN,
"learning_rate": 3.030624313447067e-05,
"loss": 0.0,
"step": 2460
},
{
"epoch": 7.479182437547313,
"grad_norm": NaN,
"learning_rate": 2.962670371296996e-05,
"loss": 0.0,
"step": 2470
},
{
"epoch": 7.509462528387585,
"grad_norm": NaN,
"learning_rate": 2.8953542173463133e-05,
"loss": 0.0,
"step": 2480
},
{
"epoch": 7.539742619227858,
"grad_norm": NaN,
"learning_rate": 2.828681952410366e-05,
"loss": 0.0,
"step": 2490
},
{
"epoch": 7.57002271006813,
"grad_norm": NaN,
"learning_rate": 2.7626596189492983e-05,
"loss": 0.0,
"step": 2500
},
{
"epoch": 7.600302800908403,
"grad_norm": NaN,
"learning_rate": 2.6972932005204267e-05,
"loss": 0.0,
"step": 2510
},
{
"epoch": 7.630582891748675,
"grad_norm": NaN,
"learning_rate": 2.6325886212359498e-05,
"loss": 0.0,
"step": 2520
},
{
"epoch": 7.660862982588948,
"grad_norm": NaN,
"learning_rate": 2.5685517452260567e-05,
"loss": 0.0,
"step": 2530
},
{
"epoch": 7.69114307342922,
"grad_norm": NaN,
"learning_rate": 2.5051883761074614e-05,
"loss": 0.0,
"step": 2540
},
{
"epoch": 7.721423164269493,
"grad_norm": NaN,
"learning_rate": 2.4425042564574184e-05,
"loss": 0.0,
"step": 2550
},
{
"epoch": 7.751703255109765,
"grad_norm": NaN,
"learning_rate": 2.3805050672932928e-05,
"loss": 0.0,
"step": 2560
},
{
"epoch": 7.781983345950038,
"grad_norm": NaN,
"learning_rate": 2.3191964275576805e-05,
"loss": 0.0,
"step": 2570
},
{
"epoch": 7.81226343679031,
"grad_norm": NaN,
"learning_rate": 2.2585838936091754e-05,
"loss": 0.0,
"step": 2580
},
{
"epoch": 7.842543527630583,
"grad_norm": NaN,
"learning_rate": 2.198672958718796e-05,
"loss": 0.0,
"step": 2590
},
{
"epoch": 7.872823618470855,
"grad_norm": NaN,
"learning_rate": 2.139469052572127e-05,
"loss": 0.0,
"step": 2600
},
{
"epoch": 7.903103709311128,
"grad_norm": NaN,
"learning_rate": 2.0809775407772503e-05,
"loss": 0.0,
"step": 2610
},
{
"epoch": 7.9333838001514,
"grad_norm": NaN,
"learning_rate": 2.0232037243784475e-05,
"loss": 0.0,
"step": 2620
},
{
"epoch": 7.963663890991673,
"grad_norm": NaN,
"learning_rate": 1.9661528393757744e-05,
"loss": 0.0,
"step": 2630
},
{
"epoch": 7.993943981831945,
"grad_norm": NaN,
"learning_rate": 1.9098300562505266e-05,
"loss": 0.0,
"step": 2640
},
{
"epoch": 8.024224072672219,
"grad_norm": NaN,
"learning_rate": 1.854240479496643e-05,
"loss": 0.0,
"step": 2650
},
{
"epoch": 8.05450416351249,
"grad_norm": NaN,
"learning_rate": 1.7993891471580893e-05,
"loss": 0.0,
"step": 2660
},
{
"epoch": 8.084784254352764,
"grad_norm": NaN,
"learning_rate": 1.74528103037226e-05,
"loss": 0.0,
"step": 2670
},
{
"epoch": 8.115064345193035,
"grad_norm": NaN,
"learning_rate": 1.6919210329194533e-05,
"loss": 0.0,
"step": 2680
},
{
"epoch": 8.145344436033309,
"grad_norm": NaN,
"learning_rate": 1.6393139907784404e-05,
"loss": 0.0,
"step": 2690
},
{
"epoch": 8.17562452687358,
"grad_norm": NaN,
"learning_rate": 1.587464671688187e-05,
"loss": 0.0,
"step": 2700
},
{
"epoch": 8.205904617713854,
"grad_norm": NaN,
"learning_rate": 1.5363777747157572e-05,
"loss": 0.0,
"step": 2710
},
{
"epoch": 8.236184708554125,
"grad_norm": NaN,
"learning_rate": 1.4860579298304312e-05,
"loss": 0.0,
"step": 2720
},
{
"epoch": 8.266464799394399,
"grad_norm": NaN,
"learning_rate": 1.4365096974841108e-05,
"loss": 0.0,
"step": 2730
},
{
"epoch": 8.29674489023467,
"grad_norm": NaN,
"learning_rate": 1.3877375681979943e-05,
"loss": 0.0,
"step": 2740
},
{
"epoch": 8.327024981074944,
"grad_norm": NaN,
"learning_rate": 1.339745962155613e-05,
"loss": 0.0,
"step": 2750
},
{
"epoch": 8.357305071915215,
"grad_norm": NaN,
"learning_rate": 1.2925392288022298e-05,
"loss": 0.0,
"step": 2760
},
{
"epoch": 8.387585162755489,
"grad_norm": NaN,
"learning_rate": 1.2461216464506454e-05,
"loss": 0.0,
"step": 2770
},
{
"epoch": 8.41786525359576,
"grad_norm": NaN,
"learning_rate": 1.2004974218934695e-05,
"loss": 0.0,
"step": 2780
},
{
"epoch": 8.448145344436034,
"grad_norm": NaN,
"learning_rate": 1.1556706900218572e-05,
"loss": 0.0,
"step": 2790
},
{
"epoch": 8.478425435276305,
"grad_norm": NaN,
"learning_rate": 1.1116455134507664e-05,
"loss": 0.0,
"step": 2800
},
{
"epoch": 8.508705526116579,
"grad_norm": NaN,
"learning_rate": 1.068425882150762e-05,
"loss": 0.0,
"step": 2810
},
{
"epoch": 8.53898561695685,
"grad_norm": NaN,
"learning_rate": 1.026015713086418e-05,
"loss": 0.0,
"step": 2820
},
{
"epoch": 8.569265707797124,
"grad_norm": NaN,
"learning_rate": 9.844188498613116e-06,
"loss": 0.0,
"step": 2830
},
{
"epoch": 8.599545798637395,
"grad_norm": NaN,
"learning_rate": 9.436390623696911e-06,
"loss": 0.0,
"step": 2840
},
{
"epoch": 8.629825889477669,
"grad_norm": NaN,
"learning_rate": 9.036800464548157e-06,
"loss": 0.0,
"step": 2850
},
{
"epoch": 8.66010598031794,
"grad_norm": NaN,
"learning_rate": 8.645454235739903e-06,
"loss": 0.0,
"step": 2860
},
{
"epoch": 8.690386071158214,
"grad_norm": NaN,
"learning_rate": 8.262387404703653e-06,
"loss": 0.0,
"step": 2870
},
{
"epoch": 8.720666161998485,
"grad_norm": NaN,
"learning_rate": 7.887634688515e-06,
"loss": 0.0,
"step": 2880
},
{
"epoch": 8.750946252838759,
"grad_norm": NaN,
"learning_rate": 7.521230050747086e-06,
"loss": 0.0,
"step": 2890
},
{
"epoch": 8.78122634367903,
"grad_norm": NaN,
"learning_rate": 7.163206698392744e-06,
"loss": 0.0,
"step": 2900
},
{
"epoch": 8.811506434519304,
"grad_norm": NaN,
"learning_rate": 6.813597078854772e-06,
"loss": 0.0,
"step": 2910
},
{
"epoch": 8.841786525359575,
"grad_norm": NaN,
"learning_rate": 6.472432877005341e-06,
"loss": 0.0,
"step": 2920
},
{
"epoch": 8.872066616199849,
"grad_norm": NaN,
"learning_rate": 6.139745012314424e-06,
"loss": 0.0,
"step": 2930
},
{
"epoch": 8.90234670704012,
"grad_norm": NaN,
"learning_rate": 5.8155636360475385e-06,
"loss": 0.0,
"step": 2940
},
{
"epoch": 8.932626797880394,
"grad_norm": NaN,
"learning_rate": 5.499918128533155e-06,
"loss": 0.0,
"step": 2950
},
{
"epoch": 8.962906888720667,
"grad_norm": NaN,
"learning_rate": 5.192837096500058e-06,
"loss": 0.0,
"step": 2960
},
{
"epoch": 8.993186979560939,
"grad_norm": NaN,
"learning_rate": 4.8943483704846475e-06,
"loss": 0.0,
"step": 2970
},
{
"epoch": 9.02346707040121,
"grad_norm": NaN,
"learning_rate": 4.604479002308737e-06,
"loss": 0.0,
"step": 2980
},
{
"epoch": 9.053747161241484,
"grad_norm": NaN,
"learning_rate": 4.323255262627846e-06,
"loss": 0.0,
"step": 2990
},
{
"epoch": 9.084027252081757,
"grad_norm": NaN,
"learning_rate": 4.050702638550275e-06,
"loss": 0.0,
"step": 3000
},
{
"epoch": 9.114307342922029,
"grad_norm": NaN,
"learning_rate": 3.7868458313272904e-06,
"loss": 0.0,
"step": 3010
},
{
"epoch": 9.144587433762302,
"grad_norm": NaN,
"learning_rate": 3.5317087541144377e-06,
"loss": 0.0,
"step": 3020
},
{
"epoch": 9.174867524602574,
"grad_norm": NaN,
"learning_rate": 3.2853145298042953e-06,
"loss": 0.0,
"step": 3030
},
{
"epoch": 9.205147615442847,
"grad_norm": NaN,
"learning_rate": 3.047685488930874e-06,
"loss": 0.0,
"step": 3040
},
{
"epoch": 9.235427706283119,
"grad_norm": NaN,
"learning_rate": 2.818843167645835e-06,
"loss": 0.0,
"step": 3050
},
{
"epoch": 9.265707797123392,
"grad_norm": NaN,
"learning_rate": 2.5988083057666533e-06,
"loss": 0.0,
"step": 3060
},
{
"epoch": 9.295987887963664,
"grad_norm": NaN,
"learning_rate": 2.3876008448969976e-06,
"loss": 0.0,
"step": 3070
},
{
"epoch": 9.326267978803937,
"grad_norm": NaN,
"learning_rate": 2.1852399266194314e-06,
"loss": 0.0,
"step": 3080
},
{
"epoch": 9.356548069644209,
"grad_norm": NaN,
"learning_rate": 1.9917438907606556e-06,
"loss": 0.0,
"step": 3090
},
{
"epoch": 9.386828160484482,
"grad_norm": NaN,
"learning_rate": 1.8071302737293295e-06,
"loss": 0.0,
"step": 3100
},
{
"epoch": 9.417108251324754,
"grad_norm": NaN,
"learning_rate": 1.6314158069267948e-06,
"loss": 0.0,
"step": 3110
},
{
"epoch": 9.447388342165027,
"grad_norm": NaN,
"learning_rate": 1.4646164152307018e-06,
"loss": 0.0,
"step": 3120
},
{
"epoch": 9.477668433005299,
"grad_norm": NaN,
"learning_rate": 1.3067472155517735e-06,
"loss": 0.0,
"step": 3130
},
{
"epoch": 9.507948523845572,
"grad_norm": NaN,
"learning_rate": 1.157822515463758e-06,
"loss": 0.0,
"step": 3140
},
{
"epoch": 9.538228614685844,
"grad_norm": NaN,
"learning_rate": 1.0178558119067315e-06,
"loss": 0.0,
"step": 3150
},
{
"epoch": 9.568508705526117,
"grad_norm": NaN,
"learning_rate": 8.868597899638898e-07,
"loss": 0.0,
"step": 3160
},
{
"epoch": 9.598788796366389,
"grad_norm": NaN,
"learning_rate": 7.648463217118984e-07,
"loss": 0.0,
"step": 3170
},
{
"epoch": 9.629068887206662,
"grad_norm": NaN,
"learning_rate": 6.518264651449779e-07,
"loss": 0.0,
"step": 3180
},
{
"epoch": 9.659348978046934,
"grad_norm": NaN,
"learning_rate": 5.478104631726711e-07,
"loss": 0.0,
"step": 3190
},
{
"epoch": 9.689629068887207,
"grad_norm": NaN,
"learning_rate": 4.5280774269154115e-07,
"loss": 0.0,
"step": 3200
},
{
"epoch": 9.719909159727479,
"grad_norm": NaN,
"learning_rate": 3.6682691373086665e-07,
"loss": 0.0,
"step": 3210
},
{
"epoch": 9.750189250567752,
"grad_norm": NaN,
"learning_rate": 2.898757686722542e-07,
"loss": 0.0,
"step": 3220
},
{
"epoch": 9.780469341408024,
"grad_norm": NaN,
"learning_rate": 2.219612815434924e-07,
"loss": 0.0,
"step": 3230
},
{
"epoch": 9.810749432248297,
"grad_norm": NaN,
"learning_rate": 1.630896073864352e-07,
"loss": 0.0,
"step": 3240
},
{
"epoch": 9.841029523088569,
"grad_norm": NaN,
"learning_rate": 1.1326608169920372e-07,
"loss": 0.0,
"step": 3250
},
{
"epoch": 9.871309613928842,
"grad_norm": NaN,
"learning_rate": 7.249521995263964e-08,
"loss": 0.0,
"step": 3260
},
{
"epoch": 9.901589704769114,
"grad_norm": NaN,
"learning_rate": 4.078071718107701e-08,
"loss": 0.0,
"step": 3270
},
{
"epoch": 9.931869795609387,
"grad_norm": NaN,
"learning_rate": 1.81254476474213e-08,
"loss": 0.0,
"step": 3280
},
{
"epoch": 9.962149886449659,
"grad_norm": NaN,
"learning_rate": 4.531464582713252e-09,
"loss": 0.0,
"step": 3290
},
{
"epoch": 9.992429977289932,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3300
},
{
"epoch": 9.992429977289932,
"step": 3300,
"total_flos": 1.623229319872512e+17,
"train_loss": 0.0,
"train_runtime": 3221.4406,
"train_samples_per_second": 4.101,
"train_steps_per_second": 1.024
}
],
"logging_steps": 10,
"max_steps": 3300,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.623229319872512e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}