aimonbc24's picture
Upload folder using huggingface_hub
0fc0117 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999898011218766,
"eval_steps": 500,
"global_step": 9804,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.7619047619047613e-08,
"loss": 5.1575,
"step": 32
},
{
"epoch": 0.01,
"learning_rate": 1.0204081632653061e-07,
"loss": 4.7952,
"step": 64
},
{
"epoch": 0.01,
"learning_rate": 1.564625850340136e-07,
"loss": 4.5513,
"step": 96
},
{
"epoch": 0.01,
"learning_rate": 2.0918367346938776e-07,
"loss": 3.9771,
"step": 128
},
{
"epoch": 0.02,
"learning_rate": 2.619047619047619e-07,
"loss": 3.2318,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 3.163265306122449e-07,
"loss": 2.508,
"step": 192
},
{
"epoch": 0.02,
"learning_rate": 3.707482993197279e-07,
"loss": 2.0866,
"step": 224
},
{
"epoch": 0.03,
"learning_rate": 4.2517006802721085e-07,
"loss": 1.8228,
"step": 256
},
{
"epoch": 0.03,
"learning_rate": 4.795918367346938e-07,
"loss": 1.6854,
"step": 288
},
{
"epoch": 0.03,
"learning_rate": 5.340136054421769e-07,
"loss": 1.4892,
"step": 320
},
{
"epoch": 0.04,
"learning_rate": 5.884353741496599e-07,
"loss": 1.2407,
"step": 352
},
{
"epoch": 0.04,
"learning_rate": 6.428571428571429e-07,
"loss": 1.0248,
"step": 384
},
{
"epoch": 0.04,
"learning_rate": 6.95578231292517e-07,
"loss": 0.577,
"step": 416
},
{
"epoch": 0.05,
"learning_rate": 7.5e-07,
"loss": 0.401,
"step": 448
},
{
"epoch": 0.05,
"learning_rate": 8.04421768707483e-07,
"loss": 0.3855,
"step": 480
},
{
"epoch": 0.05,
"learning_rate": 8.58843537414966e-07,
"loss": 0.3437,
"step": 512
},
{
"epoch": 0.06,
"learning_rate": 9.132653061224489e-07,
"loss": 0.3528,
"step": 544
},
{
"epoch": 0.06,
"learning_rate": 9.67687074829932e-07,
"loss": 0.4408,
"step": 576
},
{
"epoch": 0.06,
"learning_rate": 9.985895627644569e-07,
"loss": 0.4262,
"step": 608
},
{
"epoch": 0.07,
"learning_rate": 9.951177172615818e-07,
"loss": 0.3917,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 9.916458717587067e-07,
"loss": 0.4139,
"step": 672
},
{
"epoch": 0.07,
"learning_rate": 9.881740262558316e-07,
"loss": 0.3101,
"step": 704
},
{
"epoch": 0.08,
"learning_rate": 9.847021807529563e-07,
"loss": 0.4566,
"step": 736
},
{
"epoch": 0.08,
"learning_rate": 9.812303352500815e-07,
"loss": 0.2914,
"step": 768
},
{
"epoch": 0.08,
"learning_rate": 9.777584897472062e-07,
"loss": 0.4062,
"step": 800
},
{
"epoch": 0.08,
"learning_rate": 9.74286644244331e-07,
"loss": 0.3887,
"step": 832
},
{
"epoch": 0.09,
"learning_rate": 9.70814798741456e-07,
"loss": 0.3655,
"step": 864
},
{
"epoch": 0.09,
"learning_rate": 9.67342953238581e-07,
"loss": 0.3668,
"step": 896
},
{
"epoch": 0.09,
"learning_rate": 9.638711077357056e-07,
"loss": 0.3761,
"step": 928
},
{
"epoch": 0.1,
"learning_rate": 9.603992622328305e-07,
"loss": 0.3528,
"step": 960
},
{
"epoch": 0.1,
"learning_rate": 9.569274167299554e-07,
"loss": 0.2583,
"step": 992
},
{
"epoch": 0.1,
"learning_rate": 9.534555712270804e-07,
"loss": 0.3145,
"step": 1024
},
{
"epoch": 0.11,
"learning_rate": 9.499837257242053e-07,
"loss": 0.2842,
"step": 1056
},
{
"epoch": 0.11,
"learning_rate": 9.465118802213301e-07,
"loss": 0.2259,
"step": 1088
},
{
"epoch": 0.11,
"learning_rate": 9.430400347184549e-07,
"loss": 0.3493,
"step": 1120
},
{
"epoch": 0.12,
"learning_rate": 9.395681892155799e-07,
"loss": 0.3421,
"step": 1152
},
{
"epoch": 0.12,
"learning_rate": 9.360963437127047e-07,
"loss": 0.4288,
"step": 1184
},
{
"epoch": 0.12,
"learning_rate": 9.326244982098295e-07,
"loss": 0.2881,
"step": 1216
},
{
"epoch": 0.13,
"learning_rate": 9.291526527069546e-07,
"loss": 0.3361,
"step": 1248
},
{
"epoch": 0.13,
"learning_rate": 9.256808072040794e-07,
"loss": 0.2797,
"step": 1280
},
{
"epoch": 0.13,
"learning_rate": 9.222089617012042e-07,
"loss": 0.2531,
"step": 1312
},
{
"epoch": 0.14,
"learning_rate": 9.187371161983291e-07,
"loss": 0.282,
"step": 1344
},
{
"epoch": 0.14,
"learning_rate": 9.15265270695454e-07,
"loss": 0.3408,
"step": 1376
},
{
"epoch": 0.14,
"learning_rate": 9.117934251925788e-07,
"loss": 0.2354,
"step": 1408
},
{
"epoch": 0.15,
"learning_rate": 9.083215796897038e-07,
"loss": 0.3474,
"step": 1440
},
{
"epoch": 0.15,
"learning_rate": 9.048497341868287e-07,
"loss": 0.3546,
"step": 1472
},
{
"epoch": 0.15,
"learning_rate": 9.013778886839535e-07,
"loss": 0.3966,
"step": 1504
},
{
"epoch": 0.16,
"learning_rate": 8.979060431810784e-07,
"loss": 0.3376,
"step": 1536
},
{
"epoch": 0.16,
"learning_rate": 8.944341976782033e-07,
"loss": 0.3938,
"step": 1568
},
{
"epoch": 0.16,
"learning_rate": 8.909623521753281e-07,
"loss": 0.3343,
"step": 1600
},
{
"epoch": 0.17,
"learning_rate": 8.87490506672453e-07,
"loss": 0.3399,
"step": 1632
},
{
"epoch": 0.17,
"learning_rate": 8.840186611695779e-07,
"loss": 0.3072,
"step": 1664
},
{
"epoch": 0.17,
"learning_rate": 8.805468156667028e-07,
"loss": 0.262,
"step": 1696
},
{
"epoch": 0.18,
"learning_rate": 8.770749701638277e-07,
"loss": 0.2995,
"step": 1728
},
{
"epoch": 0.18,
"learning_rate": 8.736031246609525e-07,
"loss": 0.2804,
"step": 1760
},
{
"epoch": 0.18,
"learning_rate": 8.701312791580774e-07,
"loss": 0.3693,
"step": 1792
},
{
"epoch": 0.19,
"learning_rate": 8.666594336552023e-07,
"loss": 0.2589,
"step": 1824
},
{
"epoch": 0.19,
"learning_rate": 8.631875881523272e-07,
"loss": 0.2638,
"step": 1856
},
{
"epoch": 0.19,
"learning_rate": 8.59715742649452e-07,
"loss": 0.3516,
"step": 1888
},
{
"epoch": 0.2,
"learning_rate": 8.56243897146577e-07,
"loss": 0.369,
"step": 1920
},
{
"epoch": 0.2,
"learning_rate": 8.527720516437018e-07,
"loss": 0.3453,
"step": 1952
},
{
"epoch": 0.2,
"learning_rate": 8.493002061408266e-07,
"loss": 0.3813,
"step": 1984
},
{
"epoch": 0.21,
"learning_rate": 8.458283606379516e-07,
"loss": 0.2657,
"step": 2016
},
{
"epoch": 0.21,
"learning_rate": 8.423565151350764e-07,
"loss": 0.3514,
"step": 2048
},
{
"epoch": 0.21,
"learning_rate": 8.388846696322013e-07,
"loss": 0.2214,
"step": 2080
},
{
"epoch": 0.22,
"learning_rate": 8.354128241293263e-07,
"loss": 0.4351,
"step": 2112
},
{
"epoch": 0.22,
"learning_rate": 8.319409786264511e-07,
"loss": 0.2986,
"step": 2144
},
{
"epoch": 0.22,
"learning_rate": 8.284691331235759e-07,
"loss": 0.2211,
"step": 2176
},
{
"epoch": 0.23,
"learning_rate": 8.249972876207008e-07,
"loss": 0.2291,
"step": 2208
},
{
"epoch": 0.23,
"learning_rate": 8.215254421178257e-07,
"loss": 0.3048,
"step": 2240
},
{
"epoch": 0.23,
"learning_rate": 8.180535966149505e-07,
"loss": 0.3369,
"step": 2272
},
{
"epoch": 0.23,
"learning_rate": 8.145817511120755e-07,
"loss": 0.2413,
"step": 2304
},
{
"epoch": 0.24,
"learning_rate": 8.111099056092004e-07,
"loss": 0.2656,
"step": 2336
},
{
"epoch": 0.24,
"learning_rate": 8.076380601063252e-07,
"loss": 0.2661,
"step": 2368
},
{
"epoch": 0.24,
"learning_rate": 8.041662146034501e-07,
"loss": 0.3069,
"step": 2400
},
{
"epoch": 0.25,
"learning_rate": 8.00694369100575e-07,
"loss": 0.2357,
"step": 2432
},
{
"epoch": 0.25,
"learning_rate": 7.972225235976998e-07,
"loss": 0.3374,
"step": 2464
},
{
"epoch": 0.25,
"learning_rate": 7.937506780948248e-07,
"loss": 0.2678,
"step": 2496
},
{
"epoch": 0.26,
"learning_rate": 7.902788325919496e-07,
"loss": 0.2813,
"step": 2528
},
{
"epoch": 0.26,
"learning_rate": 7.868069870890745e-07,
"loss": 0.386,
"step": 2560
},
{
"epoch": 0.26,
"learning_rate": 7.833351415861994e-07,
"loss": 0.2247,
"step": 2592
},
{
"epoch": 0.27,
"learning_rate": 7.798632960833242e-07,
"loss": 0.3458,
"step": 2624
},
{
"epoch": 0.27,
"learning_rate": 7.763914505804491e-07,
"loss": 0.2967,
"step": 2656
},
{
"epoch": 0.27,
"learning_rate": 7.72919605077574e-07,
"loss": 0.277,
"step": 2688
},
{
"epoch": 0.28,
"learning_rate": 7.694477595746989e-07,
"loss": 0.2149,
"step": 2720
},
{
"epoch": 0.28,
"learning_rate": 7.659759140718237e-07,
"loss": 0.3624,
"step": 2752
},
{
"epoch": 0.28,
"learning_rate": 7.625040685689487e-07,
"loss": 0.3259,
"step": 2784
},
{
"epoch": 0.29,
"learning_rate": 7.590322230660735e-07,
"loss": 0.2905,
"step": 2816
},
{
"epoch": 0.29,
"learning_rate": 7.555603775631984e-07,
"loss": 0.2397,
"step": 2848
},
{
"epoch": 0.29,
"learning_rate": 7.520885320603233e-07,
"loss": 0.4047,
"step": 2880
},
{
"epoch": 0.3,
"learning_rate": 7.486166865574481e-07,
"loss": 0.258,
"step": 2912
},
{
"epoch": 0.3,
"learning_rate": 7.451448410545731e-07,
"loss": 0.3667,
"step": 2944
},
{
"epoch": 0.3,
"learning_rate": 7.41672995551698e-07,
"loss": 0.3022,
"step": 2976
},
{
"epoch": 0.31,
"learning_rate": 7.382011500488228e-07,
"loss": 0.2435,
"step": 3008
},
{
"epoch": 0.31,
"learning_rate": 7.347293045459477e-07,
"loss": 0.3491,
"step": 3040
},
{
"epoch": 0.31,
"learning_rate": 7.312574590430725e-07,
"loss": 0.2885,
"step": 3072
},
{
"epoch": 0.32,
"learning_rate": 7.277856135401974e-07,
"loss": 0.3822,
"step": 3104
},
{
"epoch": 0.32,
"learning_rate": 7.243137680373224e-07,
"loss": 0.3265,
"step": 3136
},
{
"epoch": 0.32,
"learning_rate": 7.208419225344472e-07,
"loss": 0.2879,
"step": 3168
},
{
"epoch": 0.33,
"learning_rate": 7.173700770315721e-07,
"loss": 0.3678,
"step": 3200
},
{
"epoch": 0.33,
"learning_rate": 7.13898231528697e-07,
"loss": 0.2696,
"step": 3232
},
{
"epoch": 0.33,
"learning_rate": 7.104263860258218e-07,
"loss": 0.3353,
"step": 3264
},
{
"epoch": 0.34,
"learning_rate": 7.069545405229467e-07,
"loss": 0.3552,
"step": 3296
},
{
"epoch": 0.34,
"learning_rate": 7.034826950200716e-07,
"loss": 0.2246,
"step": 3328
},
{
"epoch": 0.34,
"learning_rate": 7.000108495171965e-07,
"loss": 0.2622,
"step": 3360
},
{
"epoch": 0.35,
"learning_rate": 6.965390040143213e-07,
"loss": 0.286,
"step": 3392
},
{
"epoch": 0.35,
"learning_rate": 6.930671585114463e-07,
"loss": 0.2755,
"step": 3424
},
{
"epoch": 0.35,
"learning_rate": 6.895953130085711e-07,
"loss": 0.3111,
"step": 3456
},
{
"epoch": 0.36,
"learning_rate": 6.861234675056959e-07,
"loss": 0.2431,
"step": 3488
},
{
"epoch": 0.36,
"learning_rate": 6.826516220028209e-07,
"loss": 0.2097,
"step": 3520
},
{
"epoch": 0.36,
"learning_rate": 6.791797764999457e-07,
"loss": 0.3134,
"step": 3552
},
{
"epoch": 0.37,
"learning_rate": 6.757079309970706e-07,
"loss": 0.2535,
"step": 3584
},
{
"epoch": 0.37,
"learning_rate": 6.722360854941956e-07,
"loss": 0.3329,
"step": 3616
},
{
"epoch": 0.37,
"learning_rate": 6.687642399913204e-07,
"loss": 0.2857,
"step": 3648
},
{
"epoch": 0.38,
"learning_rate": 6.652923944884452e-07,
"loss": 0.2697,
"step": 3680
},
{
"epoch": 0.38,
"learning_rate": 6.618205489855701e-07,
"loss": 0.2815,
"step": 3712
},
{
"epoch": 0.38,
"learning_rate": 6.58348703482695e-07,
"loss": 0.2856,
"step": 3744
},
{
"epoch": 0.39,
"learning_rate": 6.548768579798198e-07,
"loss": 0.2912,
"step": 3776
},
{
"epoch": 0.39,
"learning_rate": 6.514050124769448e-07,
"loss": 0.2916,
"step": 3808
},
{
"epoch": 0.39,
"learning_rate": 6.479331669740697e-07,
"loss": 0.308,
"step": 3840
},
{
"epoch": 0.39,
"learning_rate": 6.444613214711945e-07,
"loss": 0.2934,
"step": 3872
},
{
"epoch": 0.4,
"learning_rate": 6.409894759683194e-07,
"loss": 0.2869,
"step": 3904
},
{
"epoch": 0.4,
"learning_rate": 6.375176304654442e-07,
"loss": 0.3232,
"step": 3936
},
{
"epoch": 0.4,
"learning_rate": 6.340457849625691e-07,
"loss": 0.2486,
"step": 3968
},
{
"epoch": 0.41,
"learning_rate": 6.305739394596941e-07,
"loss": 0.3123,
"step": 4000
},
{
"epoch": 0.41,
"learning_rate": 6.271020939568189e-07,
"loss": 0.3183,
"step": 4032
},
{
"epoch": 0.41,
"learning_rate": 6.236302484539438e-07,
"loss": 0.3688,
"step": 4064
},
{
"epoch": 0.42,
"learning_rate": 6.201584029510687e-07,
"loss": 0.3236,
"step": 4096
},
{
"epoch": 0.42,
"learning_rate": 6.166865574481935e-07,
"loss": 0.2206,
"step": 4128
},
{
"epoch": 0.42,
"learning_rate": 6.132147119453183e-07,
"loss": 0.2519,
"step": 4160
},
{
"epoch": 0.43,
"learning_rate": 6.097428664424433e-07,
"loss": 0.2467,
"step": 4192
},
{
"epoch": 0.43,
"learning_rate": 6.062710209395682e-07,
"loss": 0.2555,
"step": 4224
},
{
"epoch": 0.43,
"learning_rate": 6.02799175436693e-07,
"loss": 0.2391,
"step": 4256
},
{
"epoch": 0.44,
"learning_rate": 5.99327329933818e-07,
"loss": 0.3167,
"step": 4288
},
{
"epoch": 0.44,
"learning_rate": 5.958554844309428e-07,
"loss": 0.2342,
"step": 4320
},
{
"epoch": 0.44,
"learning_rate": 5.923836389280676e-07,
"loss": 0.1986,
"step": 4352
},
{
"epoch": 0.45,
"learning_rate": 5.889117934251926e-07,
"loss": 0.2359,
"step": 4384
},
{
"epoch": 0.45,
"learning_rate": 5.854399479223174e-07,
"loss": 0.2453,
"step": 4416
},
{
"epoch": 0.45,
"learning_rate": 5.819681024194423e-07,
"loss": 0.3077,
"step": 4448
},
{
"epoch": 0.46,
"learning_rate": 5.784962569165672e-07,
"loss": 0.2052,
"step": 4480
},
{
"epoch": 0.46,
"learning_rate": 5.750244114136921e-07,
"loss": 0.2313,
"step": 4512
},
{
"epoch": 0.46,
"learning_rate": 5.715525659108169e-07,
"loss": 0.3616,
"step": 4544
},
{
"epoch": 0.47,
"learning_rate": 5.680807204079418e-07,
"loss": 0.2848,
"step": 4576
},
{
"epoch": 0.47,
"learning_rate": 5.646088749050667e-07,
"loss": 0.2016,
"step": 4608
},
{
"epoch": 0.47,
"learning_rate": 5.611370294021915e-07,
"loss": 0.2467,
"step": 4640
},
{
"epoch": 0.48,
"learning_rate": 5.576651838993165e-07,
"loss": 0.3043,
"step": 4672
},
{
"epoch": 0.48,
"learning_rate": 5.541933383964414e-07,
"loss": 0.2283,
"step": 4704
},
{
"epoch": 0.48,
"learning_rate": 5.507214928935662e-07,
"loss": 0.1914,
"step": 4736
},
{
"epoch": 0.49,
"learning_rate": 5.472496473906911e-07,
"loss": 0.1776,
"step": 4768
},
{
"epoch": 0.49,
"learning_rate": 5.437778018878159e-07,
"loss": 0.2395,
"step": 4800
},
{
"epoch": 0.49,
"learning_rate": 5.403059563849408e-07,
"loss": 0.3197,
"step": 4832
},
{
"epoch": 0.5,
"learning_rate": 5.368341108820658e-07,
"loss": 0.2135,
"step": 4864
},
{
"epoch": 0.5,
"learning_rate": 5.333622653791906e-07,
"loss": 0.2104,
"step": 4896
},
{
"epoch": 0.5,
"learning_rate": 5.298904198763155e-07,
"loss": 0.2355,
"step": 4928
},
{
"epoch": 0.51,
"learning_rate": 5.264185743734404e-07,
"loss": 0.34,
"step": 4960
},
{
"epoch": 0.51,
"learning_rate": 5.229467288705652e-07,
"loss": 0.3004,
"step": 4992
},
{
"epoch": 0.51,
"learning_rate": 5.1947488336769e-07,
"loss": 0.3025,
"step": 5024
},
{
"epoch": 0.52,
"learning_rate": 5.16003037864815e-07,
"loss": 0.2561,
"step": 5056
},
{
"epoch": 0.52,
"learning_rate": 5.125311923619399e-07,
"loss": 0.2891,
"step": 5088
},
{
"epoch": 0.52,
"learning_rate": 5.090593468590647e-07,
"loss": 0.2668,
"step": 5120
},
{
"epoch": 0.53,
"learning_rate": 5.055875013561897e-07,
"loss": 0.2539,
"step": 5152
},
{
"epoch": 0.53,
"learning_rate": 5.021156558533145e-07,
"loss": 0.2294,
"step": 5184
},
{
"epoch": 0.53,
"learning_rate": 4.986438103504394e-07,
"loss": 0.2286,
"step": 5216
},
{
"epoch": 0.54,
"learning_rate": 4.951719648475642e-07,
"loss": 0.2987,
"step": 5248
},
{
"epoch": 0.54,
"learning_rate": 4.917001193446891e-07,
"loss": 0.2871,
"step": 5280
},
{
"epoch": 0.54,
"learning_rate": 4.882282738418141e-07,
"loss": 0.3253,
"step": 5312
},
{
"epoch": 0.55,
"learning_rate": 4.847564283389389e-07,
"loss": 0.2106,
"step": 5344
},
{
"epoch": 0.55,
"learning_rate": 4.812845828360638e-07,
"loss": 0.2428,
"step": 5376
},
{
"epoch": 0.55,
"learning_rate": 4.778127373331887e-07,
"loss": 0.1568,
"step": 5408
},
{
"epoch": 0.55,
"learning_rate": 4.743408918303135e-07,
"loss": 0.2879,
"step": 5440
},
{
"epoch": 0.56,
"learning_rate": 4.708690463274384e-07,
"loss": 0.2221,
"step": 5472
},
{
"epoch": 0.56,
"learning_rate": 4.673972008245633e-07,
"loss": 0.3336,
"step": 5504
},
{
"epoch": 0.56,
"learning_rate": 4.639253553216881e-07,
"loss": 0.2265,
"step": 5536
},
{
"epoch": 0.57,
"learning_rate": 4.6045350981881303e-07,
"loss": 0.2408,
"step": 5568
},
{
"epoch": 0.57,
"learning_rate": 4.5698166431593795e-07,
"loss": 0.3562,
"step": 5600
},
{
"epoch": 0.57,
"learning_rate": 4.5350981881306276e-07,
"loss": 0.1839,
"step": 5632
},
{
"epoch": 0.58,
"learning_rate": 4.500379733101877e-07,
"loss": 0.2341,
"step": 5664
},
{
"epoch": 0.58,
"learning_rate": 4.4656612780731254e-07,
"loss": 0.2683,
"step": 5696
},
{
"epoch": 0.58,
"learning_rate": 4.430942823044374e-07,
"loss": 0.3978,
"step": 5728
},
{
"epoch": 0.59,
"learning_rate": 4.396224368015623e-07,
"loss": 0.2493,
"step": 5760
},
{
"epoch": 0.59,
"learning_rate": 4.361505912986872e-07,
"loss": 0.28,
"step": 5792
},
{
"epoch": 0.59,
"learning_rate": 4.3267874579581205e-07,
"loss": 0.2263,
"step": 5824
},
{
"epoch": 0.6,
"learning_rate": 4.292069002929369e-07,
"loss": 0.3391,
"step": 5856
},
{
"epoch": 0.6,
"learning_rate": 4.2573505479006183e-07,
"loss": 0.3003,
"step": 5888
},
{
"epoch": 0.6,
"learning_rate": 4.2226320928718675e-07,
"loss": 0.2635,
"step": 5920
},
{
"epoch": 0.61,
"learning_rate": 4.1879136378431156e-07,
"loss": 0.2997,
"step": 5952
},
{
"epoch": 0.61,
"learning_rate": 4.153195182814365e-07,
"loss": 0.2135,
"step": 5984
},
{
"epoch": 0.61,
"learning_rate": 4.1184767277856134e-07,
"loss": 0.2651,
"step": 6016
},
{
"epoch": 0.62,
"learning_rate": 4.083758272756862e-07,
"loss": 0.1867,
"step": 6048
},
{
"epoch": 0.62,
"learning_rate": 4.049039817728111e-07,
"loss": 0.1986,
"step": 6080
},
{
"epoch": 0.62,
"learning_rate": 4.01432136269936e-07,
"loss": 0.2905,
"step": 6112
},
{
"epoch": 0.63,
"learning_rate": 3.9796029076706085e-07,
"loss": 0.2226,
"step": 6144
},
{
"epoch": 0.63,
"learning_rate": 3.944884452641857e-07,
"loss": 0.2472,
"step": 6176
},
{
"epoch": 0.63,
"learning_rate": 3.9101659976131063e-07,
"loss": 0.2103,
"step": 6208
},
{
"epoch": 0.64,
"learning_rate": 3.8754475425843544e-07,
"loss": 0.2976,
"step": 6240
},
{
"epoch": 0.64,
"learning_rate": 3.8407290875556036e-07,
"loss": 0.2777,
"step": 6272
},
{
"epoch": 0.64,
"learning_rate": 3.806010632526853e-07,
"loss": 0.2452,
"step": 6304
},
{
"epoch": 0.65,
"learning_rate": 3.771292177498101e-07,
"loss": 0.2243,
"step": 6336
},
{
"epoch": 0.65,
"learning_rate": 3.73657372246935e-07,
"loss": 0.3171,
"step": 6368
},
{
"epoch": 0.65,
"learning_rate": 3.7018552674405987e-07,
"loss": 0.2472,
"step": 6400
},
{
"epoch": 0.66,
"learning_rate": 3.6671368124118474e-07,
"loss": 0.2778,
"step": 6432
},
{
"epoch": 0.66,
"learning_rate": 3.6335033091027445e-07,
"loss": 0.2834,
"step": 6464
},
{
"epoch": 0.66,
"learning_rate": 3.5987848540739937e-07,
"loss": 0.23,
"step": 6496
},
{
"epoch": 0.67,
"learning_rate": 3.5640663990452423e-07,
"loss": 0.2185,
"step": 6528
},
{
"epoch": 0.67,
"learning_rate": 3.529347944016491e-07,
"loss": 0.282,
"step": 6560
},
{
"epoch": 0.67,
"learning_rate": 3.4946294889877396e-07,
"loss": 0.1885,
"step": 6592
},
{
"epoch": 0.68,
"learning_rate": 3.459911033958989e-07,
"loss": 0.263,
"step": 6624
},
{
"epoch": 0.68,
"learning_rate": 3.425192578930238e-07,
"loss": 0.2431,
"step": 6656
},
{
"epoch": 0.68,
"learning_rate": 3.390474123901486e-07,
"loss": 0.2472,
"step": 6688
},
{
"epoch": 0.69,
"learning_rate": 3.355755668872735e-07,
"loss": 0.284,
"step": 6720
},
{
"epoch": 0.69,
"learning_rate": 3.321037213843984e-07,
"loss": 0.2947,
"step": 6752
},
{
"epoch": 0.69,
"learning_rate": 3.2863187588152325e-07,
"loss": 0.2448,
"step": 6784
},
{
"epoch": 0.7,
"learning_rate": 3.251600303786481e-07,
"loss": 0.3035,
"step": 6816
},
{
"epoch": 0.7,
"learning_rate": 3.2168818487577303e-07,
"loss": 0.3378,
"step": 6848
},
{
"epoch": 0.7,
"learning_rate": 3.182163393728979e-07,
"loss": 0.2878,
"step": 6880
},
{
"epoch": 0.7,
"learning_rate": 3.1474449387002276e-07,
"loss": 0.1969,
"step": 6912
},
{
"epoch": 0.71,
"learning_rate": 3.112726483671477e-07,
"loss": 0.2282,
"step": 6944
},
{
"epoch": 0.71,
"learning_rate": 3.078008028642725e-07,
"loss": 0.2472,
"step": 6976
},
{
"epoch": 0.71,
"learning_rate": 3.043289573613974e-07,
"loss": 0.3127,
"step": 7008
},
{
"epoch": 0.72,
"learning_rate": 3.008571118585223e-07,
"loss": 0.3757,
"step": 7040
},
{
"epoch": 0.72,
"learning_rate": 2.9738526635564714e-07,
"loss": 0.2801,
"step": 7072
},
{
"epoch": 0.72,
"learning_rate": 2.9402191602473685e-07,
"loss": 0.3094,
"step": 7104
},
{
"epoch": 0.73,
"learning_rate": 2.9055007052186177e-07,
"loss": 0.3006,
"step": 7136
},
{
"epoch": 0.73,
"learning_rate": 2.8707822501898663e-07,
"loss": 0.2431,
"step": 7168
},
{
"epoch": 0.73,
"learning_rate": 2.836063795161115e-07,
"loss": 0.217,
"step": 7200
},
{
"epoch": 0.74,
"learning_rate": 2.801345340132364e-07,
"loss": 0.1981,
"step": 7232
},
{
"epoch": 0.74,
"learning_rate": 2.766626885103613e-07,
"loss": 0.242,
"step": 7264
},
{
"epoch": 0.74,
"learning_rate": 2.7319084300748614e-07,
"loss": 0.2426,
"step": 7296
},
{
"epoch": 0.75,
"learning_rate": 2.69718997504611e-07,
"loss": 0.2771,
"step": 7328
},
{
"epoch": 0.75,
"learning_rate": 2.662471520017359e-07,
"loss": 0.2508,
"step": 7360
},
{
"epoch": 0.75,
"learning_rate": 2.6277530649886084e-07,
"loss": 0.2495,
"step": 7392
},
{
"epoch": 0.76,
"learning_rate": 2.5930346099598565e-07,
"loss": 0.2353,
"step": 7424
},
{
"epoch": 0.76,
"learning_rate": 2.5583161549311057e-07,
"loss": 0.2047,
"step": 7456
},
{
"epoch": 0.76,
"learning_rate": 2.5235976999023543e-07,
"loss": 0.36,
"step": 7488
},
{
"epoch": 0.77,
"learning_rate": 2.488879244873603e-07,
"loss": 0.2685,
"step": 7520
},
{
"epoch": 0.77,
"learning_rate": 2.4541607898448516e-07,
"loss": 0.2505,
"step": 7552
},
{
"epoch": 0.77,
"learning_rate": 2.419442334816101e-07,
"loss": 0.2223,
"step": 7584
},
{
"epoch": 0.78,
"learning_rate": 2.3847238797873494e-07,
"loss": 0.2466,
"step": 7616
},
{
"epoch": 0.78,
"learning_rate": 2.350005424758598e-07,
"loss": 0.2872,
"step": 7648
},
{
"epoch": 0.78,
"learning_rate": 2.315286969729847e-07,
"loss": 0.2257,
"step": 7680
},
{
"epoch": 0.79,
"learning_rate": 2.2805685147010956e-07,
"loss": 0.2488,
"step": 7712
},
{
"epoch": 0.79,
"learning_rate": 2.2458500596723445e-07,
"loss": 0.3667,
"step": 7744
},
{
"epoch": 0.79,
"learning_rate": 2.2111316046435932e-07,
"loss": 0.2617,
"step": 7776
},
{
"epoch": 0.8,
"learning_rate": 2.1764131496148418e-07,
"loss": 0.1686,
"step": 7808
},
{
"epoch": 0.8,
"learning_rate": 2.141694694586091e-07,
"loss": 0.2216,
"step": 7840
},
{
"epoch": 0.8,
"learning_rate": 2.1069762395573396e-07,
"loss": 0.2418,
"step": 7872
},
{
"epoch": 0.81,
"learning_rate": 2.0722577845285885e-07,
"loss": 0.2023,
"step": 7904
},
{
"epoch": 0.81,
"learning_rate": 2.0375393294998372e-07,
"loss": 0.3171,
"step": 7936
},
{
"epoch": 0.81,
"learning_rate": 2.0028208744710858e-07,
"loss": 0.2518,
"step": 7968
},
{
"epoch": 0.82,
"learning_rate": 1.9681024194423347e-07,
"loss": 0.2388,
"step": 8000
},
{
"epoch": 0.82,
"learning_rate": 1.9333839644135836e-07,
"loss": 0.2612,
"step": 8032
},
{
"epoch": 0.82,
"learning_rate": 1.8986655093848323e-07,
"loss": 0.3058,
"step": 8064
},
{
"epoch": 0.83,
"learning_rate": 1.8639470543560812e-07,
"loss": 0.2292,
"step": 8096
},
{
"epoch": 0.83,
"learning_rate": 1.8292285993273298e-07,
"loss": 0.3214,
"step": 8128
},
{
"epoch": 0.83,
"learning_rate": 1.7945101442985785e-07,
"loss": 0.2081,
"step": 8160
},
{
"epoch": 0.84,
"learning_rate": 1.7597916892698276e-07,
"loss": 0.3473,
"step": 8192
},
{
"epoch": 0.84,
"learning_rate": 1.7250732342410763e-07,
"loss": 0.2174,
"step": 8224
},
{
"epoch": 0.84,
"learning_rate": 1.690354779212325e-07,
"loss": 0.2021,
"step": 8256
},
{
"epoch": 0.85,
"learning_rate": 1.6556363241835738e-07,
"loss": 0.2808,
"step": 8288
},
{
"epoch": 0.85,
"learning_rate": 1.6209178691548225e-07,
"loss": 0.2753,
"step": 8320
},
{
"epoch": 0.85,
"learning_rate": 1.586199414126071e-07,
"loss": 0.1965,
"step": 8352
},
{
"epoch": 0.86,
"learning_rate": 1.5514809590973203e-07,
"loss": 0.2076,
"step": 8384
},
{
"epoch": 0.86,
"learning_rate": 1.516762504068569e-07,
"loss": 0.2856,
"step": 8416
},
{
"epoch": 0.86,
"learning_rate": 1.4820440490398176e-07,
"loss": 0.2812,
"step": 8448
},
{
"epoch": 0.86,
"learning_rate": 1.4473255940110665e-07,
"loss": 0.28,
"step": 8480
},
{
"epoch": 0.87,
"learning_rate": 1.412607138982315e-07,
"loss": 0.2871,
"step": 8512
},
{
"epoch": 0.87,
"learning_rate": 1.377888683953564e-07,
"loss": 0.2063,
"step": 8544
},
{
"epoch": 0.87,
"learning_rate": 1.343170228924813e-07,
"loss": 0.2929,
"step": 8576
},
{
"epoch": 0.88,
"learning_rate": 1.3084517738960616e-07,
"loss": 0.2784,
"step": 8608
},
{
"epoch": 0.88,
"learning_rate": 1.2737333188673102e-07,
"loss": 0.2889,
"step": 8640
},
{
"epoch": 0.88,
"learning_rate": 1.239014863838559e-07,
"loss": 0.2367,
"step": 8672
},
{
"epoch": 0.89,
"learning_rate": 1.2042964088098078e-07,
"loss": 0.2572,
"step": 8704
},
{
"epoch": 0.89,
"learning_rate": 1.1695779537810567e-07,
"loss": 0.2167,
"step": 8736
},
{
"epoch": 0.89,
"learning_rate": 1.1348594987523056e-07,
"loss": 0.2448,
"step": 8768
},
{
"epoch": 0.9,
"learning_rate": 1.1001410437235542e-07,
"loss": 0.2384,
"step": 8800
},
{
"epoch": 0.9,
"learning_rate": 1.065422588694803e-07,
"loss": 0.2742,
"step": 8832
},
{
"epoch": 0.9,
"learning_rate": 1.0307041336660519e-07,
"loss": 0.2615,
"step": 8864
},
{
"epoch": 0.91,
"learning_rate": 9.959856786373005e-08,
"loss": 0.2511,
"step": 8896
},
{
"epoch": 0.91,
"learning_rate": 9.612672236085493e-08,
"loss": 0.218,
"step": 8928
},
{
"epoch": 0.91,
"learning_rate": 9.265487685797982e-08,
"loss": 0.1846,
"step": 8960
},
{
"epoch": 0.92,
"learning_rate": 8.918303135510468e-08,
"loss": 0.2521,
"step": 8992
},
{
"epoch": 0.92,
"learning_rate": 8.571118585222958e-08,
"loss": 0.1836,
"step": 9024
},
{
"epoch": 0.92,
"learning_rate": 8.223934034935445e-08,
"loss": 0.2421,
"step": 9056
},
{
"epoch": 0.93,
"learning_rate": 7.876749484647933e-08,
"loss": 0.2631,
"step": 9088
},
{
"epoch": 0.93,
"learning_rate": 7.529564934360421e-08,
"loss": 0.2602,
"step": 9120
},
{
"epoch": 0.93,
"learning_rate": 7.182380384072909e-08,
"loss": 0.2896,
"step": 9152
},
{
"epoch": 0.94,
"learning_rate": 6.835195833785396e-08,
"loss": 0.2901,
"step": 9184
},
{
"epoch": 0.94,
"learning_rate": 6.488011283497884e-08,
"loss": 0.2488,
"step": 9216
},
{
"epoch": 0.94,
"learning_rate": 6.140826733210372e-08,
"loss": 0.2209,
"step": 9248
},
{
"epoch": 0.95,
"learning_rate": 5.7936421829228595e-08,
"loss": 0.2494,
"step": 9280
},
{
"epoch": 0.95,
"learning_rate": 5.446457632635348e-08,
"loss": 0.251,
"step": 9312
},
{
"epoch": 0.95,
"learning_rate": 5.099273082347835e-08,
"loss": 0.2243,
"step": 9344
},
{
"epoch": 0.96,
"learning_rate": 4.752088532060323e-08,
"loss": 0.259,
"step": 9376
},
{
"epoch": 0.96,
"learning_rate": 4.404903981772811e-08,
"loss": 0.267,
"step": 9408
},
{
"epoch": 0.96,
"learning_rate": 4.057719431485299e-08,
"loss": 0.2674,
"step": 9440
},
{
"epoch": 0.97,
"learning_rate": 3.7105348811977866e-08,
"loss": 0.2383,
"step": 9472
},
{
"epoch": 0.97,
"learning_rate": 3.363350330910274e-08,
"loss": 0.2545,
"step": 9504
},
{
"epoch": 0.97,
"learning_rate": 3.016165780622762e-08,
"loss": 0.2904,
"step": 9536
},
{
"epoch": 0.98,
"learning_rate": 2.6689812303352498e-08,
"loss": 0.2211,
"step": 9568
},
{
"epoch": 0.98,
"learning_rate": 2.321796680047738e-08,
"loss": 0.2802,
"step": 9600
},
{
"epoch": 0.98,
"learning_rate": 1.9746121297602256e-08,
"loss": 0.2471,
"step": 9632
},
{
"epoch": 0.99,
"learning_rate": 1.6274275794727136e-08,
"loss": 0.2593,
"step": 9664
},
{
"epoch": 0.99,
"learning_rate": 1.2802430291852012e-08,
"loss": 0.1877,
"step": 9696
},
{
"epoch": 0.99,
"learning_rate": 9.33058478897689e-09,
"loss": 0.2619,
"step": 9728
},
{
"epoch": 1.0,
"learning_rate": 5.858739286101768e-09,
"loss": 0.1832,
"step": 9760
},
{
"epoch": 1.0,
"learning_rate": 2.3868937832266464e-09,
"loss": 0.1479,
"step": 9792
}
],
"logging_steps": 32,
"max_steps": 9805,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 9804,
"total_flos": 4.16311373758464e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}