Spaces:
Build error
Build error
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.03930611603165453, | |
"eval_steps": 500, | |
"global_step": 1500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0002620407735443635, | |
"grad_norm": 1.2820378541946411, | |
"learning_rate": 4.999563265377426e-05, | |
"loss": 0.6609, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.000524081547088727, | |
"grad_norm": 1.3106062412261963, | |
"learning_rate": 4.999126530754852e-05, | |
"loss": 0.8066, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.0007861223206330905, | |
"grad_norm": 1.6985974311828613, | |
"learning_rate": 4.9986897961322784e-05, | |
"loss": 0.6863, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.001048163094177454, | |
"grad_norm": 1.2552741765975952, | |
"learning_rate": 4.9982530615097044e-05, | |
"loss": 0.5674, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.0013102038677218176, | |
"grad_norm": 1.3928155899047852, | |
"learning_rate": 4.9978163268871304e-05, | |
"loss": 0.5992, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.001572244641266181, | |
"grad_norm": 1.5794411897659302, | |
"learning_rate": 4.9973795922645565e-05, | |
"loss": 0.7939, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.0018342854148105446, | |
"grad_norm": 1.4024512767791748, | |
"learning_rate": 4.9969428576419825e-05, | |
"loss": 0.6178, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.002096326188354908, | |
"grad_norm": 1.2232664823532104, | |
"learning_rate": 4.9965061230194085e-05, | |
"loss": 0.7544, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.0023583669618992714, | |
"grad_norm": 1.5368870496749878, | |
"learning_rate": 4.9960693883968346e-05, | |
"loss": 0.6645, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.002620407735443635, | |
"grad_norm": 1.6177372932434082, | |
"learning_rate": 4.9956326537742606e-05, | |
"loss": 0.6329, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.0028824485089879986, | |
"grad_norm": 1.3803173303604126, | |
"learning_rate": 4.995195919151687e-05, | |
"loss": 0.7114, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.003144489282532362, | |
"grad_norm": 1.5191670656204224, | |
"learning_rate": 4.9947591845291134e-05, | |
"loss": 0.8717, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.0034065300560767254, | |
"grad_norm": 1.2967548370361328, | |
"learning_rate": 4.994322449906539e-05, | |
"loss": 0.7618, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.003668570829621089, | |
"grad_norm": 1.8742738962173462, | |
"learning_rate": 4.9938857152839655e-05, | |
"loss": 0.5896, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.003930611603165452, | |
"grad_norm": 1.697966456413269, | |
"learning_rate": 4.993448980661391e-05, | |
"loss": 0.7493, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.004192652376709816, | |
"grad_norm": 1.5282775163650513, | |
"learning_rate": 4.9930122460388175e-05, | |
"loss": 0.6359, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.00445469315025418, | |
"grad_norm": 1.2776225805282593, | |
"learning_rate": 4.992575511416243e-05, | |
"loss": 0.7677, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.004716733923798543, | |
"grad_norm": 1.2036848068237305, | |
"learning_rate": 4.9921387767936696e-05, | |
"loss": 0.6234, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.0049787746973429066, | |
"grad_norm": 1.1325912475585938, | |
"learning_rate": 4.991702042171095e-05, | |
"loss": 0.624, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.00524081547088727, | |
"grad_norm": 1.3846409320831299, | |
"learning_rate": 4.991265307548522e-05, | |
"loss": 0.5979, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.005502856244431633, | |
"grad_norm": 1.9792439937591553, | |
"learning_rate": 4.990828572925948e-05, | |
"loss": 0.7897, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.005764897017975997, | |
"grad_norm": 1.8546253442764282, | |
"learning_rate": 4.990391838303374e-05, | |
"loss": 0.7035, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.006026937791520361, | |
"grad_norm": 1.5434975624084473, | |
"learning_rate": 4.9899551036808e-05, | |
"loss": 0.7092, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.006288978565064724, | |
"grad_norm": 1.0328209400177002, | |
"learning_rate": 4.989518369058226e-05, | |
"loss": 0.6239, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.006551019338609088, | |
"grad_norm": 0.9389006495475769, | |
"learning_rate": 4.989081634435652e-05, | |
"loss": 0.6957, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.006813060112153451, | |
"grad_norm": 1.0274962186813354, | |
"learning_rate": 4.988644899813078e-05, | |
"loss": 0.5302, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.0070751008856978145, | |
"grad_norm": 1.2626285552978516, | |
"learning_rate": 4.988208165190504e-05, | |
"loss": 0.6541, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.007337141659242178, | |
"grad_norm": 1.4558316469192505, | |
"learning_rate": 4.98777143056793e-05, | |
"loss": 0.7284, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.007599182432786541, | |
"grad_norm": 1.3997328281402588, | |
"learning_rate": 4.987334695945356e-05, | |
"loss": 0.6888, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.007861223206330904, | |
"grad_norm": 1.7742432355880737, | |
"learning_rate": 4.986897961322782e-05, | |
"loss": 0.6869, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.008123263979875268, | |
"grad_norm": 1.4925923347473145, | |
"learning_rate": 4.986461226700208e-05, | |
"loss": 0.731, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.008385304753419632, | |
"grad_norm": 1.5941082239151, | |
"learning_rate": 4.986024492077634e-05, | |
"loss": 0.7149, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.008647345526963996, | |
"grad_norm": 1.507450819015503, | |
"learning_rate": 4.98558775745506e-05, | |
"loss": 0.6443, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.00890938630050836, | |
"grad_norm": 0.9866214990615845, | |
"learning_rate": 4.985151022832487e-05, | |
"loss": 0.6003, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.009171427074052723, | |
"grad_norm": 0.9682250022888184, | |
"learning_rate": 4.984714288209912e-05, | |
"loss": 0.6602, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.009433467847597085, | |
"grad_norm": 1.7567181587219238, | |
"learning_rate": 4.984277553587338e-05, | |
"loss": 0.5647, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.00969550862114145, | |
"grad_norm": 1.4297257661819458, | |
"learning_rate": 4.9838408189647643e-05, | |
"loss": 0.6628, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.009957549394685813, | |
"grad_norm": 1.515763521194458, | |
"learning_rate": 4.9834040843421904e-05, | |
"loss": 0.568, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.010219590168230177, | |
"grad_norm": 1.1806342601776123, | |
"learning_rate": 4.9829673497196164e-05, | |
"loss": 0.6729, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.01048163094177454, | |
"grad_norm": 1.1328020095825195, | |
"learning_rate": 4.9825306150970425e-05, | |
"loss": 0.6963, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.010743671715318903, | |
"grad_norm": 0.7740004658699036, | |
"learning_rate": 4.9820938804744685e-05, | |
"loss": 0.6654, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.011005712488863267, | |
"grad_norm": 0.9519413113594055, | |
"learning_rate": 4.9816571458518945e-05, | |
"loss": 0.6487, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.01126775326240763, | |
"grad_norm": 0.8964847922325134, | |
"learning_rate": 4.981220411229321e-05, | |
"loss": 0.5667, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.011529794035951994, | |
"grad_norm": 1.428072452545166, | |
"learning_rate": 4.9807836766067466e-05, | |
"loss": 0.8164, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.011791834809496358, | |
"grad_norm": 1.4375147819519043, | |
"learning_rate": 4.980346941984173e-05, | |
"loss": 0.5476, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.012053875583040722, | |
"grad_norm": 1.1702146530151367, | |
"learning_rate": 4.979910207361599e-05, | |
"loss": 0.7342, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.012315916356585084, | |
"grad_norm": 1.2703320980072021, | |
"learning_rate": 4.9794734727390254e-05, | |
"loss": 0.5767, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.012577957130129448, | |
"grad_norm": 1.2520267963409424, | |
"learning_rate": 4.979036738116451e-05, | |
"loss": 0.5969, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.012839997903673812, | |
"grad_norm": 1.413979172706604, | |
"learning_rate": 4.9786000034938775e-05, | |
"loss": 0.7011, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.013102038677218176, | |
"grad_norm": 1.3918565511703491, | |
"learning_rate": 4.978163268871303e-05, | |
"loss": 0.6041, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.01336407945076254, | |
"grad_norm": 0.9175894260406494, | |
"learning_rate": 4.9777265342487296e-05, | |
"loss": 0.5052, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.013626120224306901, | |
"grad_norm": 1.296505331993103, | |
"learning_rate": 4.9772897996261556e-05, | |
"loss": 0.6076, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.013888160997851265, | |
"grad_norm": 1.2490183115005493, | |
"learning_rate": 4.9768530650035816e-05, | |
"loss": 0.6287, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.014150201771395629, | |
"grad_norm": 1.398285984992981, | |
"learning_rate": 4.976416330381008e-05, | |
"loss": 0.7905, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.014412242544939993, | |
"grad_norm": 1.3094829320907593, | |
"learning_rate": 4.975979595758434e-05, | |
"loss": 0.5133, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.014674283318484357, | |
"grad_norm": 1.7128199338912964, | |
"learning_rate": 4.97554286113586e-05, | |
"loss": 0.824, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.014936324092028719, | |
"grad_norm": 1.1319103240966797, | |
"learning_rate": 4.975106126513286e-05, | |
"loss": 0.5893, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.015198364865573083, | |
"grad_norm": 1.6176029443740845, | |
"learning_rate": 4.974669391890712e-05, | |
"loss": 0.581, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.015460405639117446, | |
"grad_norm": 1.590836524963379, | |
"learning_rate": 4.974232657268138e-05, | |
"loss": 0.5814, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.01572244641266181, | |
"grad_norm": 1.6922227144241333, | |
"learning_rate": 4.973795922645564e-05, | |
"loss": 0.5917, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.015984487186206174, | |
"grad_norm": 1.3479337692260742, | |
"learning_rate": 4.97335918802299e-05, | |
"loss": 0.7356, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.016246527959750536, | |
"grad_norm": 2.2019124031066895, | |
"learning_rate": 4.972922453400416e-05, | |
"loss": 0.6197, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.016508568733294902, | |
"grad_norm": 1.6211423873901367, | |
"learning_rate": 4.972485718777842e-05, | |
"loss": 0.6299, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.016770609506839264, | |
"grad_norm": 1.157416582107544, | |
"learning_rate": 4.972048984155268e-05, | |
"loss": 0.6343, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.01703265028038363, | |
"grad_norm": 1.2576712369918823, | |
"learning_rate": 4.971612249532695e-05, | |
"loss": 0.6126, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.01729469105392799, | |
"grad_norm": 1.3852715492248535, | |
"learning_rate": 4.97117551491012e-05, | |
"loss": 0.6352, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.017556731827472354, | |
"grad_norm": 1.0178048610687256, | |
"learning_rate": 4.970738780287547e-05, | |
"loss": 0.5923, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.01781877260101672, | |
"grad_norm": 0.8760583996772766, | |
"learning_rate": 4.970302045664972e-05, | |
"loss": 0.6158, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.01808081337456108, | |
"grad_norm": 0.8956984281539917, | |
"learning_rate": 4.969865311042398e-05, | |
"loss": 0.5746, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.018342854148105447, | |
"grad_norm": 1.1126501560211182, | |
"learning_rate": 4.969428576419824e-05, | |
"loss": 0.6254, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.01860489492164981, | |
"grad_norm": 1.168455958366394, | |
"learning_rate": 4.96899184179725e-05, | |
"loss": 0.652, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.01886693569519417, | |
"grad_norm": 1.3628567457199097, | |
"learning_rate": 4.9685551071746764e-05, | |
"loss": 0.5789, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.019128976468738536, | |
"grad_norm": 1.1971865892410278, | |
"learning_rate": 4.9681183725521024e-05, | |
"loss": 0.6408, | |
"step": 730 | |
}, | |
{ | |
"epoch": 0.0193910172422829, | |
"grad_norm": 1.1916868686676025, | |
"learning_rate": 4.9676816379295285e-05, | |
"loss": 0.6461, | |
"step": 740 | |
}, | |
{ | |
"epoch": 0.019653058015827264, | |
"grad_norm": 1.1797837018966675, | |
"learning_rate": 4.9672449033069545e-05, | |
"loss": 0.5843, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.019915098789371626, | |
"grad_norm": 0.7941935658454895, | |
"learning_rate": 4.966808168684381e-05, | |
"loss": 0.6165, | |
"step": 760 | |
}, | |
{ | |
"epoch": 0.02017713956291599, | |
"grad_norm": 1.4876329898834229, | |
"learning_rate": 4.9663714340618066e-05, | |
"loss": 0.6347, | |
"step": 770 | |
}, | |
{ | |
"epoch": 0.020439180336460354, | |
"grad_norm": 1.1482038497924805, | |
"learning_rate": 4.965934699439233e-05, | |
"loss": 0.5662, | |
"step": 780 | |
}, | |
{ | |
"epoch": 0.020701221110004716, | |
"grad_norm": 1.3942419290542603, | |
"learning_rate": 4.9654979648166586e-05, | |
"loss": 0.6189, | |
"step": 790 | |
}, | |
{ | |
"epoch": 0.02096326188354908, | |
"grad_norm": 0.8826277256011963, | |
"learning_rate": 4.9650612301940854e-05, | |
"loss": 0.6801, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.021225302657093444, | |
"grad_norm": 1.3729712963104248, | |
"learning_rate": 4.964624495571511e-05, | |
"loss": 0.5789, | |
"step": 810 | |
}, | |
{ | |
"epoch": 0.021487343430637806, | |
"grad_norm": 0.747199296951294, | |
"learning_rate": 4.9641877609489374e-05, | |
"loss": 0.651, | |
"step": 820 | |
}, | |
{ | |
"epoch": 0.02174938420418217, | |
"grad_norm": 0.7911145091056824, | |
"learning_rate": 4.963751026326363e-05, | |
"loss": 0.6834, | |
"step": 830 | |
}, | |
{ | |
"epoch": 0.022011424977726533, | |
"grad_norm": 1.1725844144821167, | |
"learning_rate": 4.9633142917037895e-05, | |
"loss": 0.6687, | |
"step": 840 | |
}, | |
{ | |
"epoch": 0.0222734657512709, | |
"grad_norm": 1.2759829759597778, | |
"learning_rate": 4.9628775570812156e-05, | |
"loss": 0.6612, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.02253550652481526, | |
"grad_norm": 1.497684359550476, | |
"learning_rate": 4.9624408224586416e-05, | |
"loss": 0.686, | |
"step": 860 | |
}, | |
{ | |
"epoch": 0.022797547298359623, | |
"grad_norm": 1.4431102275848389, | |
"learning_rate": 4.9620040878360676e-05, | |
"loss": 0.5838, | |
"step": 870 | |
}, | |
{ | |
"epoch": 0.02305958807190399, | |
"grad_norm": 0.8864196538925171, | |
"learning_rate": 4.961567353213494e-05, | |
"loss": 0.6076, | |
"step": 880 | |
}, | |
{ | |
"epoch": 0.02332162884544835, | |
"grad_norm": 1.4421597719192505, | |
"learning_rate": 4.96113061859092e-05, | |
"loss": 0.6669, | |
"step": 890 | |
}, | |
{ | |
"epoch": 0.023583669618992716, | |
"grad_norm": 1.541601300239563, | |
"learning_rate": 4.960693883968346e-05, | |
"loss": 0.477, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.02384571039253708, | |
"grad_norm": 1.0725853443145752, | |
"learning_rate": 4.960257149345772e-05, | |
"loss": 0.5399, | |
"step": 910 | |
}, | |
{ | |
"epoch": 0.024107751166081444, | |
"grad_norm": 1.4579834938049316, | |
"learning_rate": 4.959820414723198e-05, | |
"loss": 0.636, | |
"step": 920 | |
}, | |
{ | |
"epoch": 0.024369791939625806, | |
"grad_norm": 1.1018449068069458, | |
"learning_rate": 4.959383680100624e-05, | |
"loss": 0.7731, | |
"step": 930 | |
}, | |
{ | |
"epoch": 0.024631832713170168, | |
"grad_norm": 1.3531861305236816, | |
"learning_rate": 4.95894694547805e-05, | |
"loss": 0.7017, | |
"step": 940 | |
}, | |
{ | |
"epoch": 0.024893873486714534, | |
"grad_norm": 1.1225773096084595, | |
"learning_rate": 4.958510210855476e-05, | |
"loss": 0.7228, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.025155914260258896, | |
"grad_norm": 0.6442508697509766, | |
"learning_rate": 4.958073476232902e-05, | |
"loss": 0.4421, | |
"step": 960 | |
}, | |
{ | |
"epoch": 0.02541795503380326, | |
"grad_norm": 1.1338638067245483, | |
"learning_rate": 4.957636741610328e-05, | |
"loss": 0.6533, | |
"step": 970 | |
}, | |
{ | |
"epoch": 0.025679995807347623, | |
"grad_norm": 1.0796573162078857, | |
"learning_rate": 4.957200006987755e-05, | |
"loss": 0.6407, | |
"step": 980 | |
}, | |
{ | |
"epoch": 0.025942036580891985, | |
"grad_norm": 1.001578450202942, | |
"learning_rate": 4.95676327236518e-05, | |
"loss": 0.652, | |
"step": 990 | |
}, | |
{ | |
"epoch": 0.02620407735443635, | |
"grad_norm": 1.521545648574829, | |
"learning_rate": 4.956326537742607e-05, | |
"loss": 0.4812, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.026466118127980713, | |
"grad_norm": 1.1855580806732178, | |
"learning_rate": 4.955889803120032e-05, | |
"loss": 0.5927, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 0.02672815890152508, | |
"grad_norm": 1.3463046550750732, | |
"learning_rate": 4.955453068497459e-05, | |
"loss": 0.7304, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 0.02699019967506944, | |
"grad_norm": 1.0500322580337524, | |
"learning_rate": 4.955016333874884e-05, | |
"loss": 0.5409, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 0.027252240448613803, | |
"grad_norm": 1.4060779809951782, | |
"learning_rate": 4.95457959925231e-05, | |
"loss": 0.7348, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 0.02751428122215817, | |
"grad_norm": 1.433759331703186, | |
"learning_rate": 4.954142864629736e-05, | |
"loss": 0.6079, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 0.02777632199570253, | |
"grad_norm": 2.5895800590515137, | |
"learning_rate": 4.9537061300071624e-05, | |
"loss": 0.5789, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 0.028038362769246896, | |
"grad_norm": 1.1626940965652466, | |
"learning_rate": 4.953269395384589e-05, | |
"loss": 0.5478, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 0.028300403542791258, | |
"grad_norm": 1.6183115243911743, | |
"learning_rate": 4.9528326607620144e-05, | |
"loss": 0.6782, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 0.02856244431633562, | |
"grad_norm": 1.5321258306503296, | |
"learning_rate": 4.952395926139441e-05, | |
"loss": 0.5878, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 0.028824485089879986, | |
"grad_norm": 0.9456602334976196, | |
"learning_rate": 4.9519591915168665e-05, | |
"loss": 0.6277, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.029086525863424348, | |
"grad_norm": 1.0988554954528809, | |
"learning_rate": 4.951522456894293e-05, | |
"loss": 0.5019, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 0.029348566636968713, | |
"grad_norm": 1.053934931755066, | |
"learning_rate": 4.9510857222717186e-05, | |
"loss": 0.6578, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 0.029610607410513076, | |
"grad_norm": 1.292533040046692, | |
"learning_rate": 4.950648987649145e-05, | |
"loss": 0.6115, | |
"step": 1130 | |
}, | |
{ | |
"epoch": 0.029872648184057438, | |
"grad_norm": 0.8503355383872986, | |
"learning_rate": 4.950212253026571e-05, | |
"loss": 0.56, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 0.030134688957601803, | |
"grad_norm": 1.5698297023773193, | |
"learning_rate": 4.9497755184039974e-05, | |
"loss": 0.6208, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 0.030396729731146165, | |
"grad_norm": 1.3464306592941284, | |
"learning_rate": 4.9493387837814234e-05, | |
"loss": 0.5812, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 0.03065877050469053, | |
"grad_norm": 0.9388158321380615, | |
"learning_rate": 4.9489020491588495e-05, | |
"loss": 0.5684, | |
"step": 1170 | |
}, | |
{ | |
"epoch": 0.030920811278234893, | |
"grad_norm": 1.271061897277832, | |
"learning_rate": 4.9484653145362755e-05, | |
"loss": 0.5456, | |
"step": 1180 | |
}, | |
{ | |
"epoch": 0.03118285205177926, | |
"grad_norm": 0.8666424751281738, | |
"learning_rate": 4.9480285799137015e-05, | |
"loss": 0.5418, | |
"step": 1190 | |
}, | |
{ | |
"epoch": 0.03144489282532362, | |
"grad_norm": 1.2482844591140747, | |
"learning_rate": 4.9475918452911276e-05, | |
"loss": 0.7439, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.03170693359886798, | |
"grad_norm": 1.226784110069275, | |
"learning_rate": 4.9471551106685536e-05, | |
"loss": 0.5813, | |
"step": 1210 | |
}, | |
{ | |
"epoch": 0.03196897437241235, | |
"grad_norm": 1.1704344749450684, | |
"learning_rate": 4.9467183760459797e-05, | |
"loss": 0.757, | |
"step": 1220 | |
}, | |
{ | |
"epoch": 0.032231015145956714, | |
"grad_norm": 1.0429288148880005, | |
"learning_rate": 4.946281641423406e-05, | |
"loss": 0.5162, | |
"step": 1230 | |
}, | |
{ | |
"epoch": 0.03249305591950107, | |
"grad_norm": 1.3331559896469116, | |
"learning_rate": 4.945844906800832e-05, | |
"loss": 0.6224, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 0.03275509669304544, | |
"grad_norm": 1.2090318202972412, | |
"learning_rate": 4.945408172178258e-05, | |
"loss": 0.6473, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 0.033017137466589803, | |
"grad_norm": 1.3969329595565796, | |
"learning_rate": 4.944971437555684e-05, | |
"loss": 0.5806, | |
"step": 1260 | |
}, | |
{ | |
"epoch": 0.03327917824013416, | |
"grad_norm": 1.6032638549804688, | |
"learning_rate": 4.94453470293311e-05, | |
"loss": 0.5766, | |
"step": 1270 | |
}, | |
{ | |
"epoch": 0.03354121901367853, | |
"grad_norm": 1.0155426263809204, | |
"learning_rate": 4.944097968310536e-05, | |
"loss": 0.5711, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 0.03380325978722289, | |
"grad_norm": 1.2512564659118652, | |
"learning_rate": 4.9436612336879626e-05, | |
"loss": 0.6783, | |
"step": 1290 | |
}, | |
{ | |
"epoch": 0.03406530056076726, | |
"grad_norm": 1.0756443738937378, | |
"learning_rate": 4.943224499065388e-05, | |
"loss": 0.6255, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 0.03432734133431162, | |
"grad_norm": 1.2002949714660645, | |
"learning_rate": 4.942787764442815e-05, | |
"loss": 0.6057, | |
"step": 1310 | |
}, | |
{ | |
"epoch": 0.03458938210785598, | |
"grad_norm": 0.6843838691711426, | |
"learning_rate": 4.94235102982024e-05, | |
"loss": 0.5411, | |
"step": 1320 | |
}, | |
{ | |
"epoch": 0.03485142288140035, | |
"grad_norm": 0.8038992285728455, | |
"learning_rate": 4.941914295197667e-05, | |
"loss": 0.6097, | |
"step": 1330 | |
}, | |
{ | |
"epoch": 0.03511346365494471, | |
"grad_norm": 1.2226810455322266, | |
"learning_rate": 4.941477560575092e-05, | |
"loss": 0.5731, | |
"step": 1340 | |
}, | |
{ | |
"epoch": 0.03537550442848907, | |
"grad_norm": 1.236194372177124, | |
"learning_rate": 4.941040825952519e-05, | |
"loss": 0.5677, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 0.03563754520203344, | |
"grad_norm": 1.2129302024841309, | |
"learning_rate": 4.940604091329944e-05, | |
"loss": 0.6031, | |
"step": 1360 | |
}, | |
{ | |
"epoch": 0.0358995859755778, | |
"grad_norm": 1.5371068716049194, | |
"learning_rate": 4.94016735670737e-05, | |
"loss": 0.5957, | |
"step": 1370 | |
}, | |
{ | |
"epoch": 0.03616162674912216, | |
"grad_norm": 1.6025753021240234, | |
"learning_rate": 4.939730622084797e-05, | |
"loss": 0.6261, | |
"step": 1380 | |
}, | |
{ | |
"epoch": 0.03642366752266653, | |
"grad_norm": 1.3252272605895996, | |
"learning_rate": 4.939293887462222e-05, | |
"loss": 0.626, | |
"step": 1390 | |
}, | |
{ | |
"epoch": 0.036685708296210894, | |
"grad_norm": 1.0813733339309692, | |
"learning_rate": 4.938857152839649e-05, | |
"loss": 0.6659, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 0.03694774906975525, | |
"grad_norm": 1.280362844467163, | |
"learning_rate": 4.9384204182170744e-05, | |
"loss": 0.5828, | |
"step": 1410 | |
}, | |
{ | |
"epoch": 0.03720978984329962, | |
"grad_norm": 0.7822287082672119, | |
"learning_rate": 4.937983683594501e-05, | |
"loss": 0.4939, | |
"step": 1420 | |
}, | |
{ | |
"epoch": 0.03747183061684398, | |
"grad_norm": 1.0170814990997314, | |
"learning_rate": 4.9375469489719265e-05, | |
"loss": 0.5846, | |
"step": 1430 | |
}, | |
{ | |
"epoch": 0.03773387139038834, | |
"grad_norm": 1.4380531311035156, | |
"learning_rate": 4.937110214349353e-05, | |
"loss": 0.5203, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 0.03799591216393271, | |
"grad_norm": 0.9485931396484375, | |
"learning_rate": 4.9366734797267785e-05, | |
"loss": 0.4703, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 0.03825795293747707, | |
"grad_norm": 1.4566253423690796, | |
"learning_rate": 4.936236745104205e-05, | |
"loss": 0.6879, | |
"step": 1460 | |
}, | |
{ | |
"epoch": 0.03851999371102143, | |
"grad_norm": 1.3309845924377441, | |
"learning_rate": 4.935800010481631e-05, | |
"loss": 0.6359, | |
"step": 1470 | |
}, | |
{ | |
"epoch": 0.0387820344845658, | |
"grad_norm": 0.8272371292114258, | |
"learning_rate": 4.9353632758590573e-05, | |
"loss": 0.5521, | |
"step": 1480 | |
}, | |
{ | |
"epoch": 0.03904407525811016, | |
"grad_norm": 1.122835397720337, | |
"learning_rate": 4.9349265412364834e-05, | |
"loss": 0.5871, | |
"step": 1490 | |
}, | |
{ | |
"epoch": 0.03930611603165453, | |
"grad_norm": 1.701686978340149, | |
"learning_rate": 4.9344898066139094e-05, | |
"loss": 0.5549, | |
"step": 1500 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 114486, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 5608556199936000.0, | |
"train_batch_size": 4, | |
"trial_name": null, | |
"trial_params": null | |
} | |