wangchanglm_cosmetic / trainer_state.json
SADATO's picture
Upload 11 files
668657f verified
{
"best_metric": 1.3205376863479614,
"best_model_checkpoint": "model/E4/wangchanglm_E4_wangchanglm_shuffle_augment_gpt4/checkpoint-11960",
"epoch": 9.99712666196484,
"eval_steps": 500,
"global_step": 11960,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 3.3333333333333335e-05,
"loss": 3.2138,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 4.99999133180605e-05,
"loss": 2.4292,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 4.999921986615105e-05,
"loss": 1.8051,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 4.999783298156723e-05,
"loss": 1.7382,
"step": 80
},
{
"epoch": 0.08,
"learning_rate": 4.99957527027787e-05,
"loss": 1.6732,
"step": 100
},
{
"epoch": 0.1,
"learning_rate": 4.999297908748858e-05,
"loss": 1.683,
"step": 120
},
{
"epoch": 0.12,
"learning_rate": 4.998951221263189e-05,
"loss": 1.6447,
"step": 140
},
{
"epoch": 0.13,
"learning_rate": 4.9985352174373414e-05,
"loss": 1.656,
"step": 160
},
{
"epoch": 0.15,
"learning_rate": 4.998049908810499e-05,
"loss": 1.624,
"step": 180
},
{
"epoch": 0.17,
"learning_rate": 4.997495308844239e-05,
"loss": 1.6439,
"step": 200
},
{
"epoch": 0.18,
"learning_rate": 4.9968714329221486e-05,
"loss": 1.6123,
"step": 220
},
{
"epoch": 0.2,
"learning_rate": 4.996178298349407e-05,
"loss": 1.6174,
"step": 240
},
{
"epoch": 0.22,
"learning_rate": 4.9954159243523e-05,
"loss": 1.6039,
"step": 260
},
{
"epoch": 0.23,
"learning_rate": 4.9945843320776896e-05,
"loss": 1.6085,
"step": 280
},
{
"epoch": 0.25,
"learning_rate": 4.993683544592426e-05,
"loss": 1.6019,
"step": 300
},
{
"epoch": 0.27,
"learning_rate": 4.9927135868827065e-05,
"loss": 1.6012,
"step": 320
},
{
"epoch": 0.28,
"learning_rate": 4.991674485853387e-05,
"loss": 1.5745,
"step": 340
},
{
"epoch": 0.3,
"learning_rate": 4.99056627032723e-05,
"loss": 1.5897,
"step": 360
},
{
"epoch": 0.32,
"learning_rate": 4.98938897104411e-05,
"loss": 1.5815,
"step": 380
},
{
"epoch": 0.33,
"learning_rate": 4.9881426206601566e-05,
"loss": 1.574,
"step": 400
},
{
"epoch": 0.35,
"learning_rate": 4.986827253746853e-05,
"loss": 1.6022,
"step": 420
},
{
"epoch": 0.37,
"learning_rate": 4.9854429067900723e-05,
"loss": 1.5715,
"step": 440
},
{
"epoch": 0.38,
"learning_rate": 4.98398961818907e-05,
"loss": 1.5711,
"step": 460
},
{
"epoch": 0.4,
"learning_rate": 4.9824674282554165e-05,
"loss": 1.5793,
"step": 480
},
{
"epoch": 0.42,
"learning_rate": 4.980876379211879e-05,
"loss": 1.5696,
"step": 500
},
{
"epoch": 0.43,
"learning_rate": 4.9792165151912484e-05,
"loss": 1.5586,
"step": 520
},
{
"epoch": 0.45,
"learning_rate": 4.977487882235121e-05,
"loss": 1.5664,
"step": 540
},
{
"epoch": 0.47,
"learning_rate": 4.975690528292617e-05,
"loss": 1.5594,
"step": 560
},
{
"epoch": 0.48,
"learning_rate": 4.973824503219048e-05,
"loss": 1.568,
"step": 580
},
{
"epoch": 0.5,
"learning_rate": 4.97188985877454e-05,
"loss": 1.5401,
"step": 600
},
{
"epoch": 0.52,
"learning_rate": 4.9698866486225955e-05,
"loss": 1.529,
"step": 620
},
{
"epoch": 0.53,
"learning_rate": 4.9678149283286024e-05,
"loss": 1.5383,
"step": 640
},
{
"epoch": 0.55,
"learning_rate": 4.965674755358296e-05,
"loss": 1.5577,
"step": 660
},
{
"epoch": 0.57,
"learning_rate": 4.9634661890761634e-05,
"loss": 1.5299,
"step": 680
},
{
"epoch": 0.59,
"learning_rate": 4.9611892907437974e-05,
"loss": 1.5721,
"step": 700
},
{
"epoch": 0.6,
"learning_rate": 4.958844123518197e-05,
"loss": 1.5301,
"step": 720
},
{
"epoch": 0.62,
"learning_rate": 4.956430752450014e-05,
"loss": 1.5555,
"step": 740
},
{
"epoch": 0.64,
"learning_rate": 4.953949244481754e-05,
"loss": 1.5226,
"step": 760
},
{
"epoch": 0.65,
"learning_rate": 4.9513996684459105e-05,
"loss": 1.5321,
"step": 780
},
{
"epoch": 0.67,
"learning_rate": 4.948782095063066e-05,
"loss": 1.5388,
"step": 800
},
{
"epoch": 0.69,
"learning_rate": 4.946096596939921e-05,
"loss": 1.516,
"step": 820
},
{
"epoch": 0.7,
"learning_rate": 4.9433432485672864e-05,
"loss": 1.5097,
"step": 840
},
{
"epoch": 0.72,
"learning_rate": 4.940522126318015e-05,
"loss": 1.5212,
"step": 860
},
{
"epoch": 0.74,
"learning_rate": 4.9376333084448806e-05,
"loss": 1.5462,
"step": 880
},
{
"epoch": 0.75,
"learning_rate": 4.934676875078414e-05,
"loss": 1.5401,
"step": 900
},
{
"epoch": 0.77,
"learning_rate": 4.931652908224673e-05,
"loss": 1.5323,
"step": 920
},
{
"epoch": 0.79,
"learning_rate": 4.928561491762973e-05,
"loss": 1.5097,
"step": 940
},
{
"epoch": 0.8,
"learning_rate": 4.9254027114435554e-05,
"loss": 1.5277,
"step": 960
},
{
"epoch": 0.82,
"learning_rate": 4.922176654885215e-05,
"loss": 1.5018,
"step": 980
},
{
"epoch": 0.84,
"learning_rate": 4.9188834115728653e-05,
"loss": 1.522,
"step": 1000
},
{
"epoch": 0.85,
"learning_rate": 4.9155230728550584e-05,
"loss": 1.5077,
"step": 1020
},
{
"epoch": 0.87,
"learning_rate": 4.912095731941447e-05,
"loss": 1.4862,
"step": 1040
},
{
"epoch": 0.89,
"learning_rate": 4.908601483900207e-05,
"loss": 1.5044,
"step": 1060
},
{
"epoch": 0.9,
"learning_rate": 4.9050404256553925e-05,
"loss": 1.524,
"step": 1080
},
{
"epoch": 0.92,
"learning_rate": 4.901412655984252e-05,
"loss": 1.5035,
"step": 1100
},
{
"epoch": 0.94,
"learning_rate": 4.897718275514487e-05,
"loss": 1.5023,
"step": 1120
},
{
"epoch": 0.95,
"learning_rate": 4.893957386721459e-05,
"loss": 1.5072,
"step": 1140
},
{
"epoch": 0.97,
"learning_rate": 4.8901300939253516e-05,
"loss": 1.493,
"step": 1160
},
{
"epoch": 0.99,
"learning_rate": 4.8862365032882726e-05,
"loss": 1.5029,
"step": 1180
},
{
"epoch": 1.0,
"eval_loss": 1.4619702100753784,
"eval_runtime": 236.5249,
"eval_samples_per_second": 20.006,
"eval_steps_per_second": 20.006,
"step": 1196
},
{
"epoch": 1.0,
"learning_rate": 4.882276722811311e-05,
"loss": 1.5032,
"step": 1200
},
{
"epoch": 1.02,
"learning_rate": 4.8782508623315396e-05,
"loss": 1.492,
"step": 1220
},
{
"epoch": 1.04,
"learning_rate": 4.874159033518973e-05,
"loss": 1.4519,
"step": 1240
},
{
"epoch": 1.05,
"learning_rate": 4.870001349873464e-05,
"loss": 1.489,
"step": 1260
},
{
"epoch": 1.07,
"learning_rate": 4.865777926721559e-05,
"loss": 1.4884,
"step": 1280
},
{
"epoch": 1.09,
"learning_rate": 4.8614888812132976e-05,
"loss": 1.4848,
"step": 1300
},
{
"epoch": 1.1,
"learning_rate": 4.8571343323189654e-05,
"loss": 1.4941,
"step": 1320
},
{
"epoch": 1.12,
"learning_rate": 4.85271440082579e-05,
"loss": 1.455,
"step": 1340
},
{
"epoch": 1.14,
"learning_rate": 4.8482292093345944e-05,
"loss": 1.4446,
"step": 1360
},
{
"epoch": 1.15,
"learning_rate": 4.843678882256394e-05,
"loss": 1.4752,
"step": 1380
},
{
"epoch": 1.17,
"learning_rate": 4.8390635458089465e-05,
"loss": 1.4623,
"step": 1400
},
{
"epoch": 1.19,
"learning_rate": 4.83438332801325e-05,
"loss": 1.4582,
"step": 1420
},
{
"epoch": 1.2,
"learning_rate": 4.829638358689995e-05,
"loss": 1.4725,
"step": 1440
},
{
"epoch": 1.22,
"learning_rate": 4.824828769455957e-05,
"loss": 1.4665,
"step": 1460
},
{
"epoch": 1.24,
"learning_rate": 4.8199546937203546e-05,
"loss": 1.4658,
"step": 1480
},
{
"epoch": 1.25,
"learning_rate": 4.8150162666811395e-05,
"loss": 1.4589,
"step": 1500
},
{
"epoch": 1.27,
"learning_rate": 4.810013625321253e-05,
"loss": 1.4555,
"step": 1520
},
{
"epoch": 1.29,
"learning_rate": 4.804946908404827e-05,
"loss": 1.4614,
"step": 1540
},
{
"epoch": 1.3,
"learning_rate": 4.799816256473327e-05,
"loss": 1.4733,
"step": 1560
},
{
"epoch": 1.32,
"learning_rate": 4.794621811841663e-05,
"loss": 1.4583,
"step": 1580
},
{
"epoch": 1.34,
"learning_rate": 4.789363718594235e-05,
"loss": 1.4595,
"step": 1600
},
{
"epoch": 1.35,
"learning_rate": 4.784042122580943e-05,
"loss": 1.4527,
"step": 1620
},
{
"epoch": 1.37,
"learning_rate": 4.778657171413133e-05,
"loss": 1.4461,
"step": 1640
},
{
"epoch": 1.39,
"learning_rate": 4.773209014459512e-05,
"loss": 1.4673,
"step": 1660
},
{
"epoch": 1.4,
"learning_rate": 4.767697802841996e-05,
"loss": 1.4424,
"step": 1680
},
{
"epoch": 1.42,
"learning_rate": 4.7621236894315244e-05,
"loss": 1.474,
"step": 1700
},
{
"epoch": 1.44,
"learning_rate": 4.756486828843818e-05,
"loss": 1.4505,
"step": 1720
},
{
"epoch": 1.45,
"learning_rate": 4.7507873774350865e-05,
"loss": 1.445,
"step": 1740
},
{
"epoch": 1.47,
"learning_rate": 4.7450254932976965e-05,
"loss": 1.4754,
"step": 1760
},
{
"epoch": 1.49,
"learning_rate": 4.739201336255786e-05,
"loss": 1.4568,
"step": 1780
},
{
"epoch": 1.5,
"learning_rate": 4.733315067860828e-05,
"loss": 1.4516,
"step": 1800
},
{
"epoch": 1.52,
"learning_rate": 4.727366851387149e-05,
"loss": 1.4484,
"step": 1820
},
{
"epoch": 1.54,
"learning_rate": 4.721356851827407e-05,
"loss": 1.4705,
"step": 1840
},
{
"epoch": 1.55,
"learning_rate": 4.715285235888003e-05,
"loss": 1.4334,
"step": 1860
},
{
"epoch": 1.57,
"learning_rate": 4.709152171984471e-05,
"loss": 1.4654,
"step": 1880
},
{
"epoch": 1.59,
"learning_rate": 4.702957830236794e-05,
"loss": 1.4218,
"step": 1900
},
{
"epoch": 1.6,
"learning_rate": 4.696702382464692e-05,
"loss": 1.4387,
"step": 1920
},
{
"epoch": 1.62,
"learning_rate": 4.690386002182856e-05,
"loss": 1.4525,
"step": 1940
},
{
"epoch": 1.64,
"learning_rate": 4.6840088645961325e-05,
"loss": 1.4473,
"step": 1960
},
{
"epoch": 1.66,
"learning_rate": 4.6775711465946614e-05,
"loss": 1.4581,
"step": 1980
},
{
"epoch": 1.67,
"learning_rate": 4.671073026748979e-05,
"loss": 1.4407,
"step": 2000
},
{
"epoch": 1.69,
"learning_rate": 4.6645146853050524e-05,
"loss": 1.4363,
"step": 2020
},
{
"epoch": 1.71,
"learning_rate": 4.65789630417929e-05,
"loss": 1.4279,
"step": 2040
},
{
"epoch": 1.72,
"learning_rate": 4.6512180669534886e-05,
"loss": 1.4411,
"step": 2060
},
{
"epoch": 1.74,
"learning_rate": 4.644480158869744e-05,
"loss": 1.4412,
"step": 2080
},
{
"epoch": 1.76,
"learning_rate": 4.6376827668253145e-05,
"loss": 1.4367,
"step": 2100
},
{
"epoch": 1.77,
"learning_rate": 4.630826079367433e-05,
"loss": 1.4217,
"step": 2120
},
{
"epoch": 1.79,
"learning_rate": 4.62391028668808e-05,
"loss": 1.4305,
"step": 2140
},
{
"epoch": 1.81,
"learning_rate": 4.6169355806187056e-05,
"loss": 1.4754,
"step": 2160
},
{
"epoch": 1.82,
"learning_rate": 4.609902154624909e-05,
"loss": 1.443,
"step": 2180
},
{
"epoch": 1.84,
"learning_rate": 4.6028102038010766e-05,
"loss": 1.4687,
"step": 2200
},
{
"epoch": 1.86,
"learning_rate": 4.595659924864962e-05,
"loss": 1.4716,
"step": 2220
},
{
"epoch": 1.87,
"learning_rate": 4.588451516152238e-05,
"loss": 1.432,
"step": 2240
},
{
"epoch": 1.89,
"learning_rate": 4.581185177610988e-05,
"loss": 1.4254,
"step": 2260
},
{
"epoch": 1.91,
"learning_rate": 4.573861110796165e-05,
"loss": 1.4457,
"step": 2280
},
{
"epoch": 1.92,
"learning_rate": 4.5664795188639967e-05,
"loss": 1.4314,
"step": 2300
},
{
"epoch": 1.94,
"learning_rate": 4.559040606566355e-05,
"loss": 1.4295,
"step": 2320
},
{
"epoch": 1.96,
"learning_rate": 4.5515445802450735e-05,
"loss": 1.4454,
"step": 2340
},
{
"epoch": 1.97,
"learning_rate": 4.543991647826222e-05,
"loss": 1.4457,
"step": 2360
},
{
"epoch": 1.99,
"learning_rate": 4.536382018814345e-05,
"loss": 1.4631,
"step": 2380
},
{
"epoch": 2.0,
"eval_loss": 1.3995075225830078,
"eval_runtime": 236.5375,
"eval_samples_per_second": 20.005,
"eval_steps_per_second": 20.005,
"step": 2392
},
{
"epoch": 2.01,
"learning_rate": 4.528715904286644e-05,
"loss": 1.4113,
"step": 2400
},
{
"epoch": 2.02,
"learning_rate": 4.520993516887126e-05,
"loss": 1.3871,
"step": 2420
},
{
"epoch": 2.04,
"learning_rate": 4.513215070820708e-05,
"loss": 1.4158,
"step": 2440
},
{
"epoch": 2.06,
"learning_rate": 4.505380781847266e-05,
"loss": 1.4007,
"step": 2460
},
{
"epoch": 2.07,
"learning_rate": 4.497490867275661e-05,
"loss": 1.4108,
"step": 2480
},
{
"epoch": 2.09,
"learning_rate": 4.489545545957704e-05,
"loss": 1.4074,
"step": 2500
},
{
"epoch": 2.11,
"learning_rate": 4.481545038282089e-05,
"loss": 1.3929,
"step": 2520
},
{
"epoch": 2.12,
"learning_rate": 4.4734895661682796e-05,
"loss": 1.4149,
"step": 2540
},
{
"epoch": 2.14,
"learning_rate": 4.465379353060349e-05,
"loss": 1.4191,
"step": 2560
},
{
"epoch": 2.16,
"learning_rate": 4.4572146239207904e-05,
"loss": 1.4251,
"step": 2580
},
{
"epoch": 2.17,
"learning_rate": 4.448995605224268e-05,
"loss": 1.431,
"step": 2600
},
{
"epoch": 2.19,
"learning_rate": 4.440722524951341e-05,
"loss": 1.4397,
"step": 2620
},
{
"epoch": 2.21,
"learning_rate": 4.432395612582136e-05,
"loss": 1.4091,
"step": 2640
},
{
"epoch": 2.22,
"learning_rate": 4.424015099089989e-05,
"loss": 1.4215,
"step": 2660
},
{
"epoch": 2.24,
"learning_rate": 4.415581216935025e-05,
"loss": 1.4035,
"step": 2680
},
{
"epoch": 2.26,
"learning_rate": 4.4070942000577256e-05,
"loss": 1.3935,
"step": 2700
},
{
"epoch": 2.27,
"learning_rate": 4.398554283872428e-05,
"loss": 1.4288,
"step": 2720
},
{
"epoch": 2.29,
"learning_rate": 4.389961705260801e-05,
"loss": 1.392,
"step": 2740
},
{
"epoch": 2.31,
"learning_rate": 4.381316702565274e-05,
"loss": 1.392,
"step": 2760
},
{
"epoch": 2.32,
"learning_rate": 4.3726195155824214e-05,
"loss": 1.4023,
"step": 2780
},
{
"epoch": 2.34,
"learning_rate": 4.363870385556318e-05,
"loss": 1.411,
"step": 2800
},
{
"epoch": 2.36,
"learning_rate": 4.355069555171841e-05,
"loss": 1.433,
"step": 2820
},
{
"epoch": 2.37,
"learning_rate": 4.346217268547944e-05,
"loss": 1.4109,
"step": 2840
},
{
"epoch": 2.39,
"learning_rate": 4.3373137712308794e-05,
"loss": 1.425,
"step": 2860
},
{
"epoch": 2.41,
"learning_rate": 4.328359310187393e-05,
"loss": 1.4118,
"step": 2880
},
{
"epoch": 2.42,
"learning_rate": 4.3193541337978693e-05,
"loss": 1.402,
"step": 2900
},
{
"epoch": 2.44,
"learning_rate": 4.3102984918494454e-05,
"loss": 1.4019,
"step": 2920
},
{
"epoch": 2.46,
"learning_rate": 4.301192635529081e-05,
"loss": 1.3802,
"step": 2940
},
{
"epoch": 2.47,
"learning_rate": 4.292036817416589e-05,
"loss": 1.3901,
"step": 2960
},
{
"epoch": 2.49,
"learning_rate": 4.282831291477632e-05,
"loss": 1.3874,
"step": 2980
},
{
"epoch": 2.51,
"learning_rate": 4.273576313056678e-05,
"loss": 1.3871,
"step": 3000
},
{
"epoch": 2.52,
"learning_rate": 4.2642721388699145e-05,
"loss": 1.4339,
"step": 3020
},
{
"epoch": 2.54,
"learning_rate": 4.254919026998131e-05,
"loss": 1.4115,
"step": 3040
},
{
"epoch": 2.56,
"learning_rate": 4.245517236879558e-05,
"loss": 1.3741,
"step": 3060
},
{
"epoch": 2.57,
"learning_rate": 4.2360670293026725e-05,
"loss": 1.4006,
"step": 3080
},
{
"epoch": 2.59,
"learning_rate": 4.2265686663989635e-05,
"loss": 1.4095,
"step": 3100
},
{
"epoch": 2.61,
"learning_rate": 4.217022411635658e-05,
"loss": 1.3955,
"step": 3120
},
{
"epoch": 2.62,
"learning_rate": 4.207428529808421e-05,
"loss": 1.4066,
"step": 3140
},
{
"epoch": 2.64,
"learning_rate": 4.197787287034001e-05,
"loss": 1.3982,
"step": 3160
},
{
"epoch": 2.66,
"learning_rate": 4.188098950742852e-05,
"loss": 1.4046,
"step": 3180
},
{
"epoch": 2.67,
"learning_rate": 4.1783637896717195e-05,
"loss": 1.4145,
"step": 3200
},
{
"epoch": 2.69,
"learning_rate": 4.16858207385618e-05,
"loss": 1.4142,
"step": 3220
},
{
"epoch": 2.71,
"learning_rate": 4.1587540746231565e-05,
"loss": 1.3861,
"step": 3240
},
{
"epoch": 2.72,
"learning_rate": 4.148880064583386e-05,
"loss": 1.3918,
"step": 3260
},
{
"epoch": 2.74,
"learning_rate": 4.138960317623863e-05,
"loss": 1.4161,
"step": 3280
},
{
"epoch": 2.76,
"learning_rate": 4.128995108900241e-05,
"loss": 1.4368,
"step": 3300
},
{
"epoch": 2.78,
"learning_rate": 4.118984714829199e-05,
"loss": 1.4245,
"step": 3320
},
{
"epoch": 2.79,
"learning_rate": 4.108929413080774e-05,
"loss": 1.4068,
"step": 3340
},
{
"epoch": 2.81,
"learning_rate": 4.098829482570662e-05,
"loss": 1.3963,
"step": 3360
},
{
"epoch": 2.83,
"learning_rate": 4.088685203452479e-05,
"loss": 1.3875,
"step": 3380
},
{
"epoch": 2.84,
"learning_rate": 4.078496857109987e-05,
"loss": 1.3925,
"step": 3400
},
{
"epoch": 2.86,
"learning_rate": 4.068264726149298e-05,
"loss": 1.3841,
"step": 3420
},
{
"epoch": 2.88,
"learning_rate": 4.057989094391024e-05,
"loss": 1.3875,
"step": 3440
},
{
"epoch": 2.89,
"learning_rate": 4.0476702468624126e-05,
"loss": 1.4244,
"step": 3460
},
{
"epoch": 2.91,
"learning_rate": 4.037308469789437e-05,
"loss": 1.4129,
"step": 3480
},
{
"epoch": 2.93,
"learning_rate": 4.026904050588858e-05,
"loss": 1.3921,
"step": 3500
},
{
"epoch": 2.94,
"learning_rate": 4.01645727786025e-05,
"loss": 1.4025,
"step": 3520
},
{
"epoch": 2.96,
"learning_rate": 4.005968441377998e-05,
"loss": 1.3919,
"step": 3540
},
{
"epoch": 2.98,
"learning_rate": 3.9954378320832585e-05,
"loss": 1.4004,
"step": 3560
},
{
"epoch": 2.99,
"learning_rate": 3.9848657420758886e-05,
"loss": 1.3788,
"step": 3580
},
{
"epoch": 3.0,
"eval_loss": 1.362040400505066,
"eval_runtime": 237.5985,
"eval_samples_per_second": 19.916,
"eval_steps_per_second": 19.916,
"step": 3589
},
{
"epoch": 3.01,
"learning_rate": 3.974252464606345e-05,
"loss": 1.3835,
"step": 3600
},
{
"epoch": 3.03,
"learning_rate": 3.963598294067551e-05,
"loss": 1.4022,
"step": 3620
},
{
"epoch": 3.04,
"learning_rate": 3.9529035259867265e-05,
"loss": 1.3745,
"step": 3640
},
{
"epoch": 3.06,
"learning_rate": 3.9421684570171926e-05,
"loss": 1.4041,
"step": 3660
},
{
"epoch": 3.08,
"learning_rate": 3.931393384930148e-05,
"loss": 1.3847,
"step": 3680
},
{
"epoch": 3.09,
"learning_rate": 3.920578608606398e-05,
"loss": 1.3854,
"step": 3700
},
{
"epoch": 3.11,
"learning_rate": 3.909724428028076e-05,
"loss": 1.3776,
"step": 3720
},
{
"epoch": 3.13,
"learning_rate": 3.898831144270316e-05,
"loss": 1.3884,
"step": 3740
},
{
"epoch": 3.14,
"learning_rate": 3.8878990594929024e-05,
"loss": 1.366,
"step": 3760
},
{
"epoch": 3.16,
"learning_rate": 3.876928476931889e-05,
"loss": 1.3908,
"step": 3780
},
{
"epoch": 3.18,
"learning_rate": 3.865919700891188e-05,
"loss": 1.3965,
"step": 3800
},
{
"epoch": 3.19,
"learning_rate": 3.854873036734129e-05,
"loss": 1.3454,
"step": 3820
},
{
"epoch": 3.21,
"learning_rate": 3.843788790874988e-05,
"loss": 1.3825,
"step": 3840
},
{
"epoch": 3.23,
"learning_rate": 3.8326672707704894e-05,
"loss": 1.3619,
"step": 3860
},
{
"epoch": 3.24,
"learning_rate": 3.8215087849112776e-05,
"loss": 1.3683,
"step": 3880
},
{
"epoch": 3.26,
"learning_rate": 3.810313642813358e-05,
"loss": 1.3569,
"step": 3900
},
{
"epoch": 3.28,
"learning_rate": 3.7990821550095146e-05,
"loss": 1.3777,
"step": 3920
},
{
"epoch": 3.29,
"learning_rate": 3.7878146330406924e-05,
"loss": 1.3813,
"step": 3940
},
{
"epoch": 3.31,
"learning_rate": 3.7765113894473634e-05,
"loss": 1.3592,
"step": 3960
},
{
"epoch": 3.33,
"learning_rate": 3.765172737760846e-05,
"loss": 1.3853,
"step": 3980
},
{
"epoch": 3.34,
"learning_rate": 3.753798992494617e-05,
"loss": 1.4033,
"step": 4000
},
{
"epoch": 3.36,
"learning_rate": 3.742390469135587e-05,
"loss": 1.411,
"step": 4020
},
{
"epoch": 3.38,
"learning_rate": 3.7309474841353444e-05,
"loss": 1.3916,
"step": 4040
},
{
"epoch": 3.39,
"learning_rate": 3.7194703549013823e-05,
"loss": 1.388,
"step": 4060
},
{
"epoch": 3.41,
"learning_rate": 3.707959399788291e-05,
"loss": 1.3733,
"step": 4080
},
{
"epoch": 3.43,
"learning_rate": 3.6964149380889305e-05,
"loss": 1.3736,
"step": 4100
},
{
"epoch": 3.44,
"learning_rate": 3.6848372900255715e-05,
"loss": 1.3734,
"step": 4120
},
{
"epoch": 3.46,
"learning_rate": 3.6732267767410126e-05,
"loss": 1.3917,
"step": 4140
},
{
"epoch": 3.48,
"learning_rate": 3.661583720289676e-05,
"loss": 1.3626,
"step": 4160
},
{
"epoch": 3.49,
"learning_rate": 3.64990844362867e-05,
"loss": 1.3971,
"step": 4180
},
{
"epoch": 3.51,
"learning_rate": 3.638201270608833e-05,
"loss": 1.3953,
"step": 4200
},
{
"epoch": 3.53,
"learning_rate": 3.6264625259657516e-05,
"loss": 1.4117,
"step": 4220
},
{
"epoch": 3.54,
"learning_rate": 3.614692535310748e-05,
"loss": 1.3811,
"step": 4240
},
{
"epoch": 3.56,
"learning_rate": 3.602891625121856e-05,
"loss": 1.3744,
"step": 4260
},
{
"epoch": 3.58,
"learning_rate": 3.591060122734758e-05,
"loss": 1.3807,
"step": 4280
},
{
"epoch": 3.59,
"learning_rate": 3.579198356333709e-05,
"loss": 1.3642,
"step": 4300
},
{
"epoch": 3.61,
"learning_rate": 3.567306654942432e-05,
"loss": 1.3772,
"step": 4320
},
{
"epoch": 3.63,
"learning_rate": 3.5553853484149914e-05,
"loss": 1.3865,
"step": 4340
},
{
"epoch": 3.64,
"learning_rate": 3.5434347674266465e-05,
"loss": 1.3467,
"step": 4360
},
{
"epoch": 3.66,
"learning_rate": 3.531455243464673e-05,
"loss": 1.3752,
"step": 4380
},
{
"epoch": 3.68,
"learning_rate": 3.5194471088191746e-05,
"loss": 1.3689,
"step": 4400
},
{
"epoch": 3.69,
"learning_rate": 3.507410696573863e-05,
"loss": 1.3781,
"step": 4420
},
{
"epoch": 3.71,
"learning_rate": 3.495346340596817e-05,
"loss": 1.3871,
"step": 4440
},
{
"epoch": 3.73,
"learning_rate": 3.483254375531224e-05,
"loss": 1.3645,
"step": 4460
},
{
"epoch": 3.74,
"learning_rate": 3.471135136786098e-05,
"loss": 1.3737,
"step": 4480
},
{
"epoch": 3.76,
"learning_rate": 3.458988960526974e-05,
"loss": 1.3537,
"step": 4500
},
{
"epoch": 3.78,
"learning_rate": 3.446816183666588e-05,
"loss": 1.3596,
"step": 4520
},
{
"epoch": 3.79,
"learning_rate": 3.43461714385552e-05,
"loss": 1.3672,
"step": 4540
},
{
"epoch": 3.81,
"learning_rate": 3.422392179472845e-05,
"loss": 1.3626,
"step": 4560
},
{
"epoch": 3.83,
"learning_rate": 3.410141629616733e-05,
"loss": 1.3633,
"step": 4580
},
{
"epoch": 3.85,
"learning_rate": 3.39786583409505e-05,
"loss": 1.3763,
"step": 4600
},
{
"epoch": 3.86,
"learning_rate": 3.38556513341593e-05,
"loss": 1.3767,
"step": 4620
},
{
"epoch": 3.88,
"learning_rate": 3.373239868778333e-05,
"loss": 1.3586,
"step": 4640
},
{
"epoch": 3.9,
"learning_rate": 3.360890382062574e-05,
"loss": 1.3613,
"step": 4660
},
{
"epoch": 3.91,
"learning_rate": 3.348517015820847e-05,
"loss": 1.3741,
"step": 4680
},
{
"epoch": 3.93,
"learning_rate": 3.33612011326772e-05,
"loss": 1.3534,
"step": 4700
},
{
"epoch": 3.95,
"learning_rate": 3.323700018270616e-05,
"loss": 1.3694,
"step": 4720
},
{
"epoch": 3.96,
"learning_rate": 3.3112570753402715e-05,
"loss": 1.3518,
"step": 4740
},
{
"epoch": 3.98,
"learning_rate": 3.298791629621187e-05,
"loss": 1.3599,
"step": 4760
},
{
"epoch": 4.0,
"learning_rate": 3.2863040268820444e-05,
"loss": 1.3872,
"step": 4780
},
{
"epoch": 4.0,
"eval_loss": 1.3402212858200073,
"eval_runtime": 236.9499,
"eval_samples_per_second": 19.97,
"eval_steps_per_second": 19.97,
"step": 4785
},
{
"epoch": 4.01,
"learning_rate": 3.2737946135061236e-05,
"loss": 1.3592,
"step": 4800
},
{
"epoch": 4.03,
"learning_rate": 3.2612637364816936e-05,
"loss": 1.3615,
"step": 4820
},
{
"epoch": 4.05,
"learning_rate": 3.248711743392381e-05,
"loss": 1.3519,
"step": 4840
},
{
"epoch": 4.06,
"learning_rate": 3.2361389824075374e-05,
"loss": 1.3394,
"step": 4860
},
{
"epoch": 4.08,
"learning_rate": 3.2235458022725764e-05,
"loss": 1.3626,
"step": 4880
},
{
"epoch": 4.1,
"learning_rate": 3.210932552299301e-05,
"loss": 1.3551,
"step": 4900
},
{
"epoch": 4.11,
"learning_rate": 3.198299582356215e-05,
"loss": 1.3463,
"step": 4920
},
{
"epoch": 4.13,
"learning_rate": 3.1856472428588194e-05,
"loss": 1.3399,
"step": 4940
},
{
"epoch": 4.15,
"learning_rate": 3.172975884759891e-05,
"loss": 1.3727,
"step": 4960
},
{
"epoch": 4.16,
"learning_rate": 3.160285859539745e-05,
"loss": 1.3612,
"step": 4980
},
{
"epoch": 4.18,
"learning_rate": 3.147577519196493e-05,
"loss": 1.3212,
"step": 5000
},
{
"epoch": 4.2,
"learning_rate": 3.134851216236272e-05,
"loss": 1.322,
"step": 5020
},
{
"epoch": 4.21,
"learning_rate": 3.122107303663468e-05,
"loss": 1.3565,
"step": 5040
},
{
"epoch": 4.23,
"learning_rate": 3.1093461349709285e-05,
"loss": 1.3802,
"step": 5060
},
{
"epoch": 4.25,
"learning_rate": 3.096568064130151e-05,
"loss": 1.3689,
"step": 5080
},
{
"epoch": 4.26,
"learning_rate": 3.083773445581472e-05,
"loss": 1.354,
"step": 5100
},
{
"epoch": 4.28,
"learning_rate": 3.0709626342242266e-05,
"loss": 1.3688,
"step": 5120
},
{
"epoch": 4.3,
"learning_rate": 3.05813598540691e-05,
"loss": 1.3393,
"step": 5140
},
{
"epoch": 4.31,
"learning_rate": 3.0452938549173234e-05,
"loss": 1.3387,
"step": 5160
},
{
"epoch": 4.33,
"learning_rate": 3.0324365989726948e-05,
"loss": 1.3597,
"step": 5180
},
{
"epoch": 4.35,
"learning_rate": 3.019564574209811e-05,
"loss": 1.3453,
"step": 5200
},
{
"epoch": 4.36,
"learning_rate": 3.006678137675114e-05,
"loss": 1.3612,
"step": 5220
},
{
"epoch": 4.38,
"learning_rate": 2.9937776468148053e-05,
"loss": 1.385,
"step": 5240
},
{
"epoch": 4.4,
"learning_rate": 2.9808634594649266e-05,
"loss": 1.3598,
"step": 5260
},
{
"epoch": 4.41,
"learning_rate": 2.9679359338414335e-05,
"loss": 1.3522,
"step": 5280
},
{
"epoch": 4.43,
"learning_rate": 2.9549954285302632e-05,
"loss": 1.3532,
"step": 5300
},
{
"epoch": 4.45,
"learning_rate": 2.9420423024773854e-05,
"loss": 1.3726,
"step": 5320
},
{
"epoch": 4.46,
"learning_rate": 2.929076914978845e-05,
"loss": 1.3846,
"step": 5340
},
{
"epoch": 4.48,
"learning_rate": 2.9160996256707985e-05,
"loss": 1.3517,
"step": 5360
},
{
"epoch": 4.5,
"learning_rate": 2.9031107945195345e-05,
"loss": 1.3391,
"step": 5380
},
{
"epoch": 4.51,
"learning_rate": 2.8901107818114947e-05,
"loss": 1.3516,
"step": 5400
},
{
"epoch": 4.53,
"learning_rate": 2.8770999481432738e-05,
"loss": 1.3599,
"step": 5420
},
{
"epoch": 4.55,
"learning_rate": 2.8640786544116205e-05,
"loss": 1.3634,
"step": 5440
},
{
"epoch": 4.56,
"learning_rate": 2.851047261803429e-05,
"loss": 1.3657,
"step": 5460
},
{
"epoch": 4.58,
"learning_rate": 2.8380061317857136e-05,
"loss": 1.3537,
"step": 5480
},
{
"epoch": 4.6,
"learning_rate": 2.8249556260955924e-05,
"loss": 1.3515,
"step": 5500
},
{
"epoch": 4.61,
"learning_rate": 2.8118961067302402e-05,
"loss": 1.3557,
"step": 5520
},
{
"epoch": 4.63,
"learning_rate": 2.7988279359368612e-05,
"loss": 1.3732,
"step": 5540
},
{
"epoch": 4.65,
"learning_rate": 2.7857514762026317e-05,
"loss": 1.3764,
"step": 5560
},
{
"epoch": 4.66,
"learning_rate": 2.772667090244647e-05,
"loss": 1.3528,
"step": 5580
},
{
"epoch": 4.68,
"learning_rate": 2.7595751409998638e-05,
"loss": 1.3345,
"step": 5600
},
{
"epoch": 4.7,
"learning_rate": 2.7464759916150283e-05,
"loss": 1.3708,
"step": 5620
},
{
"epoch": 4.71,
"learning_rate": 2.733370005436608e-05,
"loss": 1.3617,
"step": 5640
},
{
"epoch": 4.73,
"learning_rate": 2.7202575460007067e-05,
"loss": 1.3728,
"step": 5660
},
{
"epoch": 4.75,
"learning_rate": 2.7071389770229895e-05,
"loss": 1.3655,
"step": 5680
},
{
"epoch": 4.76,
"learning_rate": 2.6940146623885836e-05,
"loss": 1.3649,
"step": 5700
},
{
"epoch": 4.78,
"learning_rate": 2.6808849661419955e-05,
"loss": 1.355,
"step": 5720
},
{
"epoch": 4.8,
"learning_rate": 2.667750252477004e-05,
"loss": 1.3699,
"step": 5740
},
{
"epoch": 4.81,
"learning_rate": 2.654610885726563e-05,
"loss": 1.3492,
"step": 5760
},
{
"epoch": 4.83,
"learning_rate": 2.6414672303526938e-05,
"loss": 1.3645,
"step": 5780
},
{
"epoch": 4.85,
"learning_rate": 2.6283196509363807e-05,
"loss": 1.3415,
"step": 5800
},
{
"epoch": 4.86,
"learning_rate": 2.6151685121674458e-05,
"loss": 1.3781,
"step": 5820
},
{
"epoch": 4.88,
"learning_rate": 2.6020141788344495e-05,
"loss": 1.35,
"step": 5840
},
{
"epoch": 4.9,
"learning_rate": 2.588857015814556e-05,
"loss": 1.3811,
"step": 5860
},
{
"epoch": 4.91,
"learning_rate": 2.5756973880634257e-05,
"loss": 1.3659,
"step": 5880
},
{
"epoch": 4.93,
"learning_rate": 2.5625356606050837e-05,
"loss": 1.3805,
"step": 5900
},
{
"epoch": 4.95,
"learning_rate": 2.5493721985217974e-05,
"loss": 1.3721,
"step": 5920
},
{
"epoch": 4.97,
"learning_rate": 2.5362073669439485e-05,
"loss": 1.356,
"step": 5940
},
{
"epoch": 4.98,
"learning_rate": 2.5230415310399068e-05,
"loss": 1.3602,
"step": 5960
},
{
"epoch": 5.0,
"learning_rate": 2.5098750560059026e-05,
"loss": 1.3729,
"step": 5980
},
{
"epoch": 5.0,
"eval_loss": 1.329305648803711,
"eval_runtime": 236.2546,
"eval_samples_per_second": 20.029,
"eval_steps_per_second": 20.029,
"step": 5981
},
{
"epoch": 5.02,
"learning_rate": 2.4967083070558905e-05,
"loss": 1.3618,
"step": 6000
},
{
"epoch": 5.03,
"learning_rate": 2.4835416494114254e-05,
"loss": 1.369,
"step": 6020
},
{
"epoch": 5.05,
"learning_rate": 2.470375448291529e-05,
"loss": 1.3481,
"step": 6040
},
{
"epoch": 5.07,
"learning_rate": 2.45721006890256e-05,
"loss": 1.3484,
"step": 6060
},
{
"epoch": 5.08,
"learning_rate": 2.444045876428082e-05,
"loss": 1.344,
"step": 6080
},
{
"epoch": 5.1,
"learning_rate": 2.43088323601874e-05,
"loss": 1.3485,
"step": 6100
},
{
"epoch": 5.12,
"learning_rate": 2.417722512782123e-05,
"loss": 1.3411,
"step": 6120
},
{
"epoch": 5.13,
"learning_rate": 2.404564071772644e-05,
"loss": 1.3438,
"step": 6140
},
{
"epoch": 5.15,
"learning_rate": 2.3914082779814103e-05,
"loss": 1.3745,
"step": 6160
},
{
"epoch": 5.17,
"learning_rate": 2.3782554963260995e-05,
"loss": 1.3419,
"step": 6180
},
{
"epoch": 5.18,
"learning_rate": 2.3651060916408386e-05,
"loss": 1.367,
"step": 6200
},
{
"epoch": 5.2,
"learning_rate": 2.3519604286660857e-05,
"loss": 1.3439,
"step": 6220
},
{
"epoch": 5.22,
"learning_rate": 2.3388188720385063e-05,
"loss": 1.378,
"step": 6240
},
{
"epoch": 5.23,
"learning_rate": 2.3256817862808672e-05,
"loss": 1.3298,
"step": 6260
},
{
"epoch": 5.25,
"learning_rate": 2.3125495357919187e-05,
"loss": 1.351,
"step": 6280
},
{
"epoch": 5.27,
"learning_rate": 2.299422484836292e-05,
"loss": 1.34,
"step": 6300
},
{
"epoch": 5.28,
"learning_rate": 2.2863009975343926e-05,
"loss": 1.3157,
"step": 6320
},
{
"epoch": 5.3,
"learning_rate": 2.2731854378522964e-05,
"loss": 1.3282,
"step": 6340
},
{
"epoch": 5.32,
"learning_rate": 2.260076169591664e-05,
"loss": 1.3677,
"step": 6360
},
{
"epoch": 5.33,
"learning_rate": 2.246973556379639e-05,
"loss": 1.3632,
"step": 6380
},
{
"epoch": 5.35,
"learning_rate": 2.2338779616587672e-05,
"loss": 1.3629,
"step": 6400
},
{
"epoch": 5.37,
"learning_rate": 2.2207897486769164e-05,
"loss": 1.3546,
"step": 6420
},
{
"epoch": 5.38,
"learning_rate": 2.2077092804771957e-05,
"loss": 1.3497,
"step": 6440
},
{
"epoch": 5.4,
"learning_rate": 2.1946369198878886e-05,
"loss": 1.3486,
"step": 6460
},
{
"epoch": 5.42,
"learning_rate": 2.1815730295123888e-05,
"loss": 1.3636,
"step": 6480
},
{
"epoch": 5.43,
"learning_rate": 2.168517971719143e-05,
"loss": 1.3515,
"step": 6500
},
{
"epoch": 5.45,
"learning_rate": 2.1554721086315957e-05,
"loss": 1.3323,
"step": 6520
},
{
"epoch": 5.47,
"learning_rate": 2.1424358021181485e-05,
"loss": 1.3429,
"step": 6540
},
{
"epoch": 5.48,
"learning_rate": 2.1294094137821226e-05,
"loss": 1.3395,
"step": 6560
},
{
"epoch": 5.5,
"learning_rate": 2.116393304951723e-05,
"loss": 1.3846,
"step": 6580
},
{
"epoch": 5.52,
"learning_rate": 2.103387836670024e-05,
"loss": 1.3357,
"step": 6600
},
{
"epoch": 5.53,
"learning_rate": 2.0903933696849474e-05,
"loss": 1.3327,
"step": 6620
},
{
"epoch": 5.55,
"learning_rate": 2.0774102644392627e-05,
"loss": 1.3538,
"step": 6640
},
{
"epoch": 5.57,
"learning_rate": 2.0644388810605833e-05,
"loss": 1.3594,
"step": 6660
},
{
"epoch": 5.58,
"learning_rate": 2.051479579351377e-05,
"loss": 1.3474,
"step": 6680
},
{
"epoch": 5.6,
"learning_rate": 2.0385327187789942e-05,
"loss": 1.361,
"step": 6700
},
{
"epoch": 5.62,
"learning_rate": 2.0255986584656854e-05,
"loss": 1.3664,
"step": 6720
},
{
"epoch": 5.63,
"learning_rate": 2.0126777571786473e-05,
"loss": 1.3675,
"step": 6740
},
{
"epoch": 5.65,
"learning_rate": 1.9997703733200706e-05,
"loss": 1.3438,
"step": 6760
},
{
"epoch": 5.67,
"learning_rate": 1.986876864917196e-05,
"loss": 1.3312,
"step": 6780
},
{
"epoch": 5.68,
"learning_rate": 1.973997589612383e-05,
"loss": 1.3318,
"step": 6800
},
{
"epoch": 5.7,
"learning_rate": 1.961132904653193e-05,
"loss": 1.3276,
"step": 6820
},
{
"epoch": 5.72,
"learning_rate": 1.9482831668824778e-05,
"loss": 1.3509,
"step": 6840
},
{
"epoch": 5.73,
"learning_rate": 1.9354487327284803e-05,
"loss": 1.3464,
"step": 6860
},
{
"epoch": 5.75,
"learning_rate": 1.9226299581949497e-05,
"loss": 1.3453,
"step": 6880
},
{
"epoch": 5.77,
"learning_rate": 1.9098271988512664e-05,
"loss": 1.345,
"step": 6900
},
{
"epoch": 5.78,
"learning_rate": 1.897040809822579e-05,
"loss": 1.34,
"step": 6920
},
{
"epoch": 5.8,
"learning_rate": 1.8842711457799504e-05,
"loss": 1.3472,
"step": 6940
},
{
"epoch": 5.82,
"learning_rate": 1.8715185609305272e-05,
"loss": 1.3425,
"step": 6960
},
{
"epoch": 5.83,
"learning_rate": 1.8587834090077077e-05,
"loss": 1.3376,
"step": 6980
},
{
"epoch": 5.85,
"learning_rate": 1.8460660432613348e-05,
"loss": 1.3281,
"step": 7000
},
{
"epoch": 5.87,
"learning_rate": 1.8333668164478907e-05,
"loss": 1.354,
"step": 7020
},
{
"epoch": 5.88,
"learning_rate": 1.8206860808207238e-05,
"loss": 1.3327,
"step": 7040
},
{
"epoch": 5.9,
"learning_rate": 1.808024188120265e-05,
"loss": 1.3603,
"step": 7060
},
{
"epoch": 5.92,
"learning_rate": 1.7953814895642788e-05,
"loss": 1.3305,
"step": 7080
},
{
"epoch": 5.93,
"learning_rate": 1.7827583358381207e-05,
"loss": 1.3339,
"step": 7100
},
{
"epoch": 5.95,
"learning_rate": 1.770155077085006e-05,
"loss": 1.3557,
"step": 7120
},
{
"epoch": 5.97,
"learning_rate": 1.7575720628963004e-05,
"loss": 1.358,
"step": 7140
},
{
"epoch": 5.98,
"learning_rate": 1.7450096423018225e-05,
"loss": 1.3734,
"step": 7160
},
{
"epoch": 6.0,
"eval_loss": 1.3229433298110962,
"eval_runtime": 236.1592,
"eval_samples_per_second": 20.037,
"eval_steps_per_second": 20.037,
"step": 7178
},
{
"epoch": 6.0,
"learning_rate": 1.7324681637601637e-05,
"loss": 1.3616,
"step": 7180
},
{
"epoch": 6.02,
"learning_rate": 1.719947975149019e-05,
"loss": 1.3467,
"step": 7200
},
{
"epoch": 6.04,
"learning_rate": 1.7074494237555405e-05,
"loss": 1.3562,
"step": 7220
},
{
"epoch": 6.05,
"learning_rate": 1.6949728562667037e-05,
"loss": 1.3243,
"step": 7240
},
{
"epoch": 6.07,
"learning_rate": 1.6825186187596915e-05,
"loss": 1.3384,
"step": 7260
},
{
"epoch": 6.09,
"learning_rate": 1.6700870566922905e-05,
"loss": 1.3427,
"step": 7280
},
{
"epoch": 6.1,
"learning_rate": 1.6576785148933165e-05,
"loss": 1.3458,
"step": 7300
},
{
"epoch": 6.12,
"learning_rate": 1.645293337553042e-05,
"loss": 1.336,
"step": 7320
},
{
"epoch": 6.14,
"learning_rate": 1.632931868213654e-05,
"loss": 1.3399,
"step": 7340
},
{
"epoch": 6.15,
"learning_rate": 1.6205944497597203e-05,
"loss": 1.3481,
"step": 7360
},
{
"epoch": 6.17,
"learning_rate": 1.6082814244086858e-05,
"loss": 1.3255,
"step": 7380
},
{
"epoch": 6.19,
"learning_rate": 1.5959931337013696e-05,
"loss": 1.355,
"step": 7400
},
{
"epoch": 6.2,
"learning_rate": 1.5837299184925004e-05,
"loss": 1.3351,
"step": 7420
},
{
"epoch": 6.22,
"learning_rate": 1.571492118941259e-05,
"loss": 1.3562,
"step": 7440
},
{
"epoch": 6.24,
"learning_rate": 1.5592800745018397e-05,
"loss": 1.3578,
"step": 7460
},
{
"epoch": 6.25,
"learning_rate": 1.547094123914039e-05,
"loss": 1.3232,
"step": 7480
},
{
"epoch": 6.27,
"learning_rate": 1.5349346051938574e-05,
"loss": 1.3311,
"step": 7500
},
{
"epoch": 6.29,
"learning_rate": 1.5228018556241222e-05,
"loss": 1.3278,
"step": 7520
},
{
"epoch": 6.3,
"learning_rate": 1.5106962117451354e-05,
"loss": 1.3511,
"step": 7540
},
{
"epoch": 6.32,
"learning_rate": 1.4986180093453351e-05,
"loss": 1.357,
"step": 7560
},
{
"epoch": 6.34,
"learning_rate": 1.4865675834519844e-05,
"loss": 1.3411,
"step": 7580
},
{
"epoch": 6.35,
"learning_rate": 1.474545268321876e-05,
"loss": 1.3678,
"step": 7600
},
{
"epoch": 6.37,
"learning_rate": 1.4625513974320598e-05,
"loss": 1.3623,
"step": 7620
},
{
"epoch": 6.39,
"learning_rate": 1.4505863034705987e-05,
"loss": 1.3239,
"step": 7640
},
{
"epoch": 6.4,
"learning_rate": 1.438650318327333e-05,
"loss": 1.3571,
"step": 7660
},
{
"epoch": 6.42,
"learning_rate": 1.4267437730846776e-05,
"loss": 1.369,
"step": 7680
},
{
"epoch": 6.44,
"learning_rate": 1.4148669980084379e-05,
"loss": 1.3269,
"step": 7700
},
{
"epoch": 6.45,
"learning_rate": 1.4030203225386517e-05,
"loss": 1.3625,
"step": 7720
},
{
"epoch": 6.47,
"learning_rate": 1.3912040752804478e-05,
"loss": 1.3527,
"step": 7740
},
{
"epoch": 6.49,
"learning_rate": 1.3794185839949304e-05,
"loss": 1.3504,
"step": 7760
},
{
"epoch": 6.5,
"learning_rate": 1.3676641755900916e-05,
"loss": 1.3297,
"step": 7780
},
{
"epoch": 6.52,
"learning_rate": 1.3559411761117385e-05,
"loss": 1.3519,
"step": 7800
},
{
"epoch": 6.54,
"learning_rate": 1.3442499107344542e-05,
"loss": 1.3618,
"step": 7820
},
{
"epoch": 6.55,
"learning_rate": 1.3325907037525765e-05,
"loss": 1.3427,
"step": 7840
},
{
"epoch": 6.57,
"learning_rate": 1.320963878571198e-05,
"loss": 1.3427,
"step": 7860
},
{
"epoch": 6.59,
"learning_rate": 1.3093697576972042e-05,
"loss": 1.3531,
"step": 7880
},
{
"epoch": 6.6,
"learning_rate": 1.2978086627303182e-05,
"loss": 1.3334,
"step": 7900
},
{
"epoch": 6.62,
"learning_rate": 1.2862809143541896e-05,
"loss": 1.3411,
"step": 7920
},
{
"epoch": 6.64,
"learning_rate": 1.2747868323274898e-05,
"loss": 1.357,
"step": 7940
},
{
"epoch": 6.65,
"learning_rate": 1.26332673547505e-05,
"loss": 1.3502,
"step": 7960
},
{
"epoch": 6.67,
"learning_rate": 1.2519009416790156e-05,
"loss": 1.3595,
"step": 7980
},
{
"epoch": 6.69,
"learning_rate": 1.2405097678700253e-05,
"loss": 1.363,
"step": 8000
},
{
"epoch": 6.7,
"learning_rate": 1.2291535300184221e-05,
"loss": 1.3261,
"step": 8020
},
{
"epoch": 6.72,
"learning_rate": 1.2178325431254948e-05,
"loss": 1.338,
"step": 8040
},
{
"epoch": 6.74,
"learning_rate": 1.2065471212147295e-05,
"loss": 1.3243,
"step": 8060
},
{
"epoch": 6.75,
"learning_rate": 1.1952975773231052e-05,
"loss": 1.3326,
"step": 8080
},
{
"epoch": 6.77,
"learning_rate": 1.1840842234924129e-05,
"loss": 1.3452,
"step": 8100
},
{
"epoch": 6.79,
"learning_rate": 1.1729073707605966e-05,
"loss": 1.343,
"step": 8120
},
{
"epoch": 6.8,
"learning_rate": 1.1617673291531256e-05,
"loss": 1.3447,
"step": 8140
},
{
"epoch": 6.82,
"learning_rate": 1.150664407674395e-05,
"loss": 1.3335,
"step": 8160
},
{
"epoch": 6.84,
"learning_rate": 1.1395989142991573e-05,
"loss": 1.3491,
"step": 8180
},
{
"epoch": 6.85,
"learning_rate": 1.1285711559639785e-05,
"loss": 1.3639,
"step": 8200
},
{
"epoch": 6.87,
"learning_rate": 1.1175814385587203e-05,
"loss": 1.3351,
"step": 8220
},
{
"epoch": 6.89,
"learning_rate": 1.106630066918061e-05,
"loss": 1.328,
"step": 8240
},
{
"epoch": 6.9,
"learning_rate": 1.095717344813038e-05,
"loss": 1.3412,
"step": 8260
},
{
"epoch": 6.92,
"learning_rate": 1.0848435749426192e-05,
"loss": 1.3248,
"step": 8280
},
{
"epoch": 6.94,
"learning_rate": 1.0740090589253088e-05,
"loss": 1.338,
"step": 8300
},
{
"epoch": 6.95,
"learning_rate": 1.063214097290782e-05,
"loss": 1.335,
"step": 8320
},
{
"epoch": 6.97,
"learning_rate": 1.0524589894715489e-05,
"loss": 1.3472,
"step": 8340
},
{
"epoch": 6.99,
"learning_rate": 1.0417440337946438e-05,
"loss": 1.3504,
"step": 8360
},
{
"epoch": 7.0,
"eval_loss": 1.3214781284332275,
"eval_runtime": 235.9763,
"eval_samples_per_second": 20.053,
"eval_steps_per_second": 20.053,
"step": 8374
},
{
"epoch": 7.0,
"learning_rate": 1.031069527473358e-05,
"loss": 1.3426,
"step": 8380
},
{
"epoch": 7.02,
"learning_rate": 1.0204357665989883e-05,
"loss": 1.3385,
"step": 8400
},
{
"epoch": 7.04,
"learning_rate": 1.0098430461326303e-05,
"loss": 1.3513,
"step": 8420
},
{
"epoch": 7.05,
"learning_rate": 9.9929165989699e-06,
"loss": 1.3356,
"step": 8440
},
{
"epoch": 7.07,
"learning_rate": 9.887819005682411e-06,
"loss": 1.3433,
"step": 8460
},
{
"epoch": 7.09,
"learning_rate": 9.783140596679016e-06,
"loss": 1.3322,
"step": 8480
},
{
"epoch": 7.1,
"learning_rate": 9.678884275547471e-06,
"loss": 1.3379,
"step": 8500
},
{
"epoch": 7.12,
"learning_rate": 9.575052934167619e-06,
"loss": 1.3551,
"step": 8520
},
{
"epoch": 7.14,
"learning_rate": 9.47164945263111e-06,
"loss": 1.3579,
"step": 8540
},
{
"epoch": 7.16,
"learning_rate": 9.36867669916156e-06,
"loss": 1.3453,
"step": 8560
},
{
"epoch": 7.17,
"learning_rate": 9.266137530034986e-06,
"loss": 1.3371,
"step": 8580
},
{
"epoch": 7.19,
"learning_rate": 9.164034789500542e-06,
"loss": 1.3608,
"step": 8600
},
{
"epoch": 7.21,
"learning_rate": 9.062371309701658e-06,
"loss": 1.3333,
"step": 8620
},
{
"epoch": 7.22,
"learning_rate": 8.961149910597492e-06,
"loss": 1.3476,
"step": 8640
},
{
"epoch": 7.24,
"learning_rate": 8.860373399884675e-06,
"loss": 1.3341,
"step": 8660
},
{
"epoch": 7.26,
"learning_rate": 8.760044572919455e-06,
"loss": 1.3569,
"step": 8680
},
{
"epoch": 7.27,
"learning_rate": 8.66016621264012e-06,
"loss": 1.3357,
"step": 8700
},
{
"epoch": 7.29,
"learning_rate": 8.560741089489898e-06,
"loss": 1.3433,
"step": 8720
},
{
"epoch": 7.31,
"learning_rate": 8.461771961339998e-06,
"loss": 1.3341,
"step": 8740
},
{
"epoch": 7.32,
"learning_rate": 8.363261573413163e-06,
"loss": 1.3579,
"step": 8760
},
{
"epoch": 7.34,
"learning_rate": 8.265212658207541e-06,
"loss": 1.3304,
"step": 8780
},
{
"epoch": 7.36,
"learning_rate": 8.16762793542087e-06,
"loss": 1.3548,
"step": 8800
},
{
"epoch": 7.37,
"learning_rate": 8.070510111875015e-06,
"loss": 1.3316,
"step": 8820
},
{
"epoch": 7.39,
"learning_rate": 7.973861881440921e-06,
"loss": 1.3292,
"step": 8840
},
{
"epoch": 7.41,
"learning_rate": 7.87768592496389e-06,
"loss": 1.3352,
"step": 8860
},
{
"epoch": 7.42,
"learning_rate": 7.781984910189202e-06,
"loss": 1.3351,
"step": 8880
},
{
"epoch": 7.44,
"learning_rate": 7.686761491688105e-06,
"loss": 1.3313,
"step": 8900
},
{
"epoch": 7.46,
"learning_rate": 7.592018310784219e-06,
"loss": 1.3733,
"step": 8920
},
{
"epoch": 7.47,
"learning_rate": 7.4977579954802565e-06,
"loss": 1.3178,
"step": 8940
},
{
"epoch": 7.49,
"learning_rate": 7.403983160385095e-06,
"loss": 1.3387,
"step": 8960
},
{
"epoch": 7.51,
"learning_rate": 7.3106964066412844e-06,
"loss": 1.3389,
"step": 8980
},
{
"epoch": 7.52,
"learning_rate": 7.217900321852908e-06,
"loss": 1.3669,
"step": 9000
},
{
"epoch": 7.54,
"learning_rate": 7.125597480013785e-06,
"loss": 1.3473,
"step": 9020
},
{
"epoch": 7.56,
"learning_rate": 7.03379044143605e-06,
"loss": 1.3328,
"step": 9040
},
{
"epoch": 7.57,
"learning_rate": 6.942481752679203e-06,
"loss": 1.3339,
"step": 9060
},
{
"epoch": 7.59,
"learning_rate": 6.851673946479387e-06,
"loss": 1.3615,
"step": 9080
},
{
"epoch": 7.61,
"learning_rate": 6.761369541679211e-06,
"loss": 1.3405,
"step": 9100
},
{
"epoch": 7.62,
"learning_rate": 6.671571043157843e-06,
"loss": 1.3223,
"step": 9120
},
{
"epoch": 7.64,
"learning_rate": 6.582280941761518e-06,
"loss": 1.3339,
"step": 9140
},
{
"epoch": 7.66,
"learning_rate": 6.493501714234487e-06,
"loss": 1.3414,
"step": 9160
},
{
"epoch": 7.67,
"learning_rate": 6.405235823150269e-06,
"loss": 1.3361,
"step": 9180
},
{
"epoch": 7.69,
"learning_rate": 6.3174857168433995e-06,
"loss": 1.3379,
"step": 9200
},
{
"epoch": 7.71,
"learning_rate": 6.230253829341448e-06,
"loss": 1.3224,
"step": 9220
},
{
"epoch": 7.72,
"learning_rate": 6.143542580297576e-06,
"loss": 1.3541,
"step": 9240
},
{
"epoch": 7.74,
"learning_rate": 6.057354374923374e-06,
"loss": 1.3647,
"step": 9260
},
{
"epoch": 7.76,
"learning_rate": 5.97169160392215e-06,
"loss": 1.3009,
"step": 9280
},
{
"epoch": 7.77,
"learning_rate": 5.886556643422611e-06,
"loss": 1.3472,
"step": 9300
},
{
"epoch": 7.79,
"learning_rate": 5.801951854913016e-06,
"loss": 1.3575,
"step": 9320
},
{
"epoch": 7.81,
"learning_rate": 5.717879585175564e-06,
"loss": 1.3363,
"step": 9340
},
{
"epoch": 7.82,
"learning_rate": 5.634342166221382e-06,
"loss": 1.3387,
"step": 9360
},
{
"epoch": 7.84,
"learning_rate": 5.551341915225816e-06,
"loss": 1.3328,
"step": 9380
},
{
"epoch": 7.86,
"learning_rate": 5.468881134464154e-06,
"loss": 1.3708,
"step": 9400
},
{
"epoch": 7.87,
"learning_rate": 5.386962111247756e-06,
"loss": 1.324,
"step": 9420
},
{
"epoch": 7.89,
"learning_rate": 5.30558711786062e-06,
"loss": 1.3397,
"step": 9440
},
{
"epoch": 7.91,
"learning_rate": 5.224758411496356e-06,
"loss": 1.3569,
"step": 9460
},
{
"epoch": 7.92,
"learning_rate": 5.144478234195579e-06,
"loss": 1.3292,
"step": 9480
},
{
"epoch": 7.94,
"learning_rate": 5.064748812783685e-06,
"loss": 1.3404,
"step": 9500
},
{
"epoch": 7.96,
"learning_rate": 4.985572358809127e-06,
"loss": 1.351,
"step": 9520
},
{
"epoch": 7.97,
"learning_rate": 4.906951068482057e-06,
"loss": 1.3712,
"step": 9540
},
{
"epoch": 7.99,
"learning_rate": 4.8288871226133875e-06,
"loss": 1.3226,
"step": 9560
},
{
"epoch": 8.0,
"eval_loss": 1.3205684423446655,
"eval_runtime": 237.1121,
"eval_samples_per_second": 19.957,
"eval_steps_per_second": 19.957,
"step": 9570
},
{
"epoch": 8.01,
"learning_rate": 4.751382686554309e-06,
"loss": 1.3245,
"step": 9580
},
{
"epoch": 8.02,
"learning_rate": 4.674439910136253e-06,
"loss": 1.3206,
"step": 9600
},
{
"epoch": 8.04,
"learning_rate": 4.598060927611228e-06,
"loss": 1.3422,
"step": 9620
},
{
"epoch": 8.06,
"learning_rate": 4.5222478575926265e-06,
"loss": 1.3173,
"step": 9640
},
{
"epoch": 8.07,
"learning_rate": 4.447002802996475e-06,
"loss": 1.3481,
"step": 9660
},
{
"epoch": 8.09,
"learning_rate": 4.372327850983069e-06,
"loss": 1.3385,
"step": 9680
},
{
"epoch": 8.11,
"learning_rate": 4.298225072899123e-06,
"loss": 1.3304,
"step": 9700
},
{
"epoch": 8.12,
"learning_rate": 4.224696524220276e-06,
"loss": 1.3349,
"step": 9720
},
{
"epoch": 8.14,
"learning_rate": 4.151744244494102e-06,
"loss": 1.366,
"step": 9740
},
{
"epoch": 8.16,
"learning_rate": 4.079370257283529e-06,
"loss": 1.3394,
"step": 9760
},
{
"epoch": 8.17,
"learning_rate": 4.007576570110691e-06,
"loss": 1.3385,
"step": 9780
},
{
"epoch": 8.19,
"learning_rate": 3.9363651744012855e-06,
"loss": 1.3182,
"step": 9800
},
{
"epoch": 8.21,
"learning_rate": 3.865738045429279e-06,
"loss": 1.33,
"step": 9820
},
{
"epoch": 8.23,
"learning_rate": 3.7956971422621723e-06,
"loss": 1.3453,
"step": 9840
},
{
"epoch": 8.24,
"learning_rate": 3.7262444077066248e-06,
"loss": 1.3338,
"step": 9860
},
{
"epoch": 8.26,
"learning_rate": 3.6573817682545667e-06,
"loss": 1.3569,
"step": 9880
},
{
"epoch": 8.28,
"learning_rate": 3.589111134029771e-06,
"loss": 1.3422,
"step": 9900
},
{
"epoch": 8.29,
"learning_rate": 3.5214343987348743e-06,
"loss": 1.3345,
"step": 9920
},
{
"epoch": 8.31,
"learning_rate": 3.4543534395988427e-06,
"loss": 1.3087,
"step": 9940
},
{
"epoch": 8.33,
"learning_rate": 3.38787011732489e-06,
"loss": 1.3302,
"step": 9960
},
{
"epoch": 8.34,
"learning_rate": 3.3219862760388715e-06,
"loss": 1.3326,
"step": 9980
},
{
"epoch": 8.36,
"learning_rate": 3.256703743238168e-06,
"loss": 1.3403,
"step": 10000
},
{
"epoch": 8.38,
"learning_rate": 3.1920243297409253e-06,
"loss": 1.3399,
"step": 10020
},
{
"epoch": 8.39,
"learning_rate": 3.1279498296358735e-06,
"loss": 1.3566,
"step": 10040
},
{
"epoch": 8.41,
"learning_rate": 3.0644820202325593e-06,
"loss": 1.3138,
"step": 10060
},
{
"epoch": 8.43,
"learning_rate": 3.0016226620120296e-06,
"loss": 1.3172,
"step": 10080
},
{
"epoch": 8.44,
"learning_rate": 2.939373498578013e-06,
"loss": 1.3252,
"step": 10100
},
{
"epoch": 8.46,
"learning_rate": 2.877736256608535e-06,
"loss": 1.3527,
"step": 10120
},
{
"epoch": 8.48,
"learning_rate": 2.816712645808056e-06,
"loss": 1.3656,
"step": 10140
},
{
"epoch": 8.49,
"learning_rate": 2.756304358860029e-06,
"loss": 1.3394,
"step": 10160
},
{
"epoch": 8.51,
"learning_rate": 2.6965130713799273e-06,
"loss": 1.3403,
"step": 10180
},
{
"epoch": 8.53,
"learning_rate": 2.6373404418688104e-06,
"loss": 1.3814,
"step": 10200
},
{
"epoch": 8.54,
"learning_rate": 2.5787881116672853e-06,
"loss": 1.3876,
"step": 10220
},
{
"epoch": 8.56,
"learning_rate": 2.52085770490999e-06,
"loss": 1.3584,
"step": 10240
},
{
"epoch": 8.58,
"learning_rate": 2.463550828480535e-06,
"loss": 1.3301,
"step": 10260
},
{
"epoch": 8.59,
"learning_rate": 2.406869071966955e-06,
"loss": 1.3738,
"step": 10280
},
{
"epoch": 8.61,
"learning_rate": 2.350814007617597e-06,
"loss": 1.3398,
"step": 10300
},
{
"epoch": 8.63,
"learning_rate": 2.2953871902974987e-06,
"loss": 1.3418,
"step": 10320
},
{
"epoch": 8.64,
"learning_rate": 2.2405901574452907e-06,
"loss": 1.3211,
"step": 10340
},
{
"epoch": 8.66,
"learning_rate": 2.1864244290305202e-06,
"loss": 1.3445,
"step": 10360
},
{
"epoch": 8.68,
"learning_rate": 2.1328915075115085e-06,
"loss": 1.3337,
"step": 10380
},
{
"epoch": 8.69,
"learning_rate": 2.0799928777936795e-06,
"loss": 1.323,
"step": 10400
},
{
"epoch": 8.71,
"learning_rate": 2.027730007188339e-06,
"loss": 1.3152,
"step": 10420
},
{
"epoch": 8.73,
"learning_rate": 1.9761043453720207e-06,
"loss": 1.3771,
"step": 10440
},
{
"epoch": 8.74,
"learning_rate": 1.9251173243462317e-06,
"loss": 1.3734,
"step": 10460
},
{
"epoch": 8.76,
"learning_rate": 1.8747703583977678e-06,
"loss": 1.3167,
"step": 10480
},
{
"epoch": 8.78,
"learning_rate": 1.8250648440594486e-06,
"loss": 1.3256,
"step": 10500
},
{
"epoch": 8.79,
"learning_rate": 1.7760021600714106e-06,
"loss": 1.3309,
"step": 10520
},
{
"epoch": 8.81,
"learning_rate": 1.7275836673428581e-06,
"loss": 1.3264,
"step": 10540
},
{
"epoch": 8.83,
"learning_rate": 1.6798107089142868e-06,
"loss": 1.3336,
"step": 10560
},
{
"epoch": 8.84,
"learning_rate": 1.632684609920254e-06,
"loss": 1.3105,
"step": 10580
},
{
"epoch": 8.86,
"learning_rate": 1.5862066775526457e-06,
"loss": 1.3703,
"step": 10600
},
{
"epoch": 8.88,
"learning_rate": 1.5403782010243589e-06,
"loss": 1.3667,
"step": 10620
},
{
"epoch": 8.89,
"learning_rate": 1.4952004515335794e-06,
"loss": 1.3438,
"step": 10640
},
{
"epoch": 8.91,
"learning_rate": 1.45067468222852e-06,
"loss": 1.3523,
"step": 10660
},
{
"epoch": 8.93,
"learning_rate": 1.4068021281726602e-06,
"loss": 1.3406,
"step": 10680
},
{
"epoch": 8.94,
"learning_rate": 1.36358400631047e-06,
"loss": 1.3454,
"step": 10700
},
{
"epoch": 8.96,
"learning_rate": 1.3210215154336681e-06,
"loss": 1.3453,
"step": 10720
},
{
"epoch": 8.98,
"learning_rate": 1.2791158361479733e-06,
"loss": 1.3294,
"step": 10740
},
{
"epoch": 8.99,
"learning_rate": 1.2378681308403501e-06,
"loss": 1.3411,
"step": 10760
},
{
"epoch": 9.0,
"eval_loss": 1.3205618858337402,
"eval_runtime": 237.2113,
"eval_samples_per_second": 19.948,
"eval_steps_per_second": 19.948,
"step": 10767
},
{
"epoch": 9.01,
"learning_rate": 1.1972795436467676e-06,
"loss": 1.3455,
"step": 10780
},
{
"epoch": 9.03,
"learning_rate": 1.1573512004204478e-06,
"loss": 1.3268,
"step": 10800
},
{
"epoch": 9.04,
"learning_rate": 1.1180842087006843e-06,
"loss": 1.337,
"step": 10820
},
{
"epoch": 9.06,
"learning_rate": 1.0794796576820587e-06,
"loss": 1.3206,
"step": 10840
},
{
"epoch": 9.08,
"learning_rate": 1.041538618184265e-06,
"loss": 1.3067,
"step": 10860
},
{
"epoch": 9.09,
"learning_rate": 1.0042621426224114e-06,
"loss": 1.3394,
"step": 10880
},
{
"epoch": 9.11,
"learning_rate": 9.676512649778092e-07,
"loss": 1.3166,
"step": 10900
},
{
"epoch": 9.13,
"learning_rate": 9.317070007692913e-07,
"loss": 1.3518,
"step": 10920
},
{
"epoch": 9.14,
"learning_rate": 8.964303470250707e-07,
"loss": 1.3584,
"step": 10940
},
{
"epoch": 9.16,
"learning_rate": 8.618222822550482e-07,
"loss": 1.3336,
"step": 10960
},
{
"epoch": 9.18,
"learning_rate": 8.27883766423701e-07,
"loss": 1.3392,
"step": 10980
},
{
"epoch": 9.19,
"learning_rate": 7.94615740923435e-07,
"loss": 1.3356,
"step": 11000
},
{
"epoch": 9.21,
"learning_rate": 7.620191285484828e-07,
"loss": 1.3412,
"step": 11020
},
{
"epoch": 9.23,
"learning_rate": 7.300948334693053e-07,
"loss": 1.3473,
"step": 11040
},
{
"epoch": 9.24,
"learning_rate": 6.988437412075055e-07,
"loss": 1.3129,
"step": 11060
},
{
"epoch": 9.26,
"learning_rate": 6.68266718611274e-07,
"loss": 1.353,
"step": 11080
},
{
"epoch": 9.28,
"learning_rate": 6.383646138313381e-07,
"loss": 1.3592,
"step": 11100
},
{
"epoch": 9.29,
"learning_rate": 6.091382562974396e-07,
"loss": 1.3318,
"step": 11120
},
{
"epoch": 9.31,
"learning_rate": 5.805884566953329e-07,
"loss": 1.3179,
"step": 11140
},
{
"epoch": 9.33,
"learning_rate": 5.527160069442788e-07,
"loss": 1.3383,
"step": 11160
},
{
"epoch": 9.35,
"learning_rate": 5.255216801751006e-07,
"loss": 1.3502,
"step": 11180
},
{
"epoch": 9.36,
"learning_rate": 4.990062307087262e-07,
"loss": 1.3378,
"step": 11200
},
{
"epoch": 9.38,
"learning_rate": 4.731703940352716e-07,
"loss": 1.3303,
"step": 11220
},
{
"epoch": 9.4,
"learning_rate": 4.480148867936268e-07,
"loss": 1.3175,
"step": 11240
},
{
"epoch": 9.41,
"learning_rate": 4.2354040675159635e-07,
"loss": 1.3295,
"step": 11260
},
{
"epoch": 9.43,
"learning_rate": 3.997476327865318e-07,
"loss": 1.354,
"step": 11280
},
{
"epoch": 9.45,
"learning_rate": 3.7663722486649957e-07,
"loss": 1.3428,
"step": 11300
},
{
"epoch": 9.46,
"learning_rate": 3.542098240319813e-07,
"loss": 1.3224,
"step": 11320
},
{
"epoch": 9.48,
"learning_rate": 3.3246605237809426e-07,
"loss": 1.3292,
"step": 11340
},
{
"epoch": 9.5,
"learning_rate": 3.114065130373295e-07,
"loss": 1.3407,
"step": 11360
},
{
"epoch": 9.51,
"learning_rate": 2.9103179016282124e-07,
"loss": 1.3524,
"step": 11380
},
{
"epoch": 9.53,
"learning_rate": 2.7134244891214853e-07,
"loss": 1.3547,
"step": 11400
},
{
"epoch": 9.55,
"learning_rate": 2.523390354316535e-07,
"loss": 1.3528,
"step": 11420
},
{
"epoch": 9.56,
"learning_rate": 2.3402207684130596e-07,
"loss": 1.3783,
"step": 11440
},
{
"epoch": 9.58,
"learning_rate": 2.1639208122005704e-07,
"loss": 1.3276,
"step": 11460
},
{
"epoch": 9.6,
"learning_rate": 1.9944953759176987e-07,
"loss": 1.3325,
"step": 11480
},
{
"epoch": 9.61,
"learning_rate": 1.8319491591164417e-07,
"loss": 1.3383,
"step": 11500
},
{
"epoch": 9.63,
"learning_rate": 1.676286670531796e-07,
"loss": 1.3339,
"step": 11520
},
{
"epoch": 9.65,
"learning_rate": 1.5275122279567178e-07,
"loss": 1.3711,
"step": 11540
},
{
"epoch": 9.66,
"learning_rate": 1.3856299581223864e-07,
"loss": 1.3468,
"step": 11560
},
{
"epoch": 9.68,
"learning_rate": 1.2506437965836836e-07,
"loss": 1.3427,
"step": 11580
},
{
"epoch": 9.7,
"learning_rate": 1.1225574876100597e-07,
"loss": 1.3551,
"step": 11600
},
{
"epoch": 9.71,
"learning_rate": 1.0013745840816158e-07,
"loss": 1.3338,
"step": 11620
},
{
"epoch": 9.73,
"learning_rate": 8.87098447390683e-08,
"loss": 1.3204,
"step": 11640
},
{
"epoch": 9.75,
"learning_rate": 7.797322473484248e-08,
"loss": 1.3483,
"step": 11660
},
{
"epoch": 9.76,
"learning_rate": 6.79278962097074e-08,
"loss": 1.3157,
"step": 11680
},
{
"epoch": 9.78,
"learning_rate": 5.8574137802713814e-08,
"loss": 1.349,
"step": 11700
},
{
"epoch": 9.8,
"learning_rate": 4.991220897002935e-08,
"loss": 1.3398,
"step": 11720
},
{
"epoch": 9.81,
"learning_rate": 4.1942349977727724e-08,
"loss": 1.3373,
"step": 11740
},
{
"epoch": 9.83,
"learning_rate": 3.466478189513567e-08,
"loss": 1.3389,
"step": 11760
},
{
"epoch": 9.85,
"learning_rate": 2.807970658869341e-08,
"loss": 1.3598,
"step": 11780
},
{
"epoch": 9.86,
"learning_rate": 2.2187306716353608e-08,
"loss": 1.3674,
"step": 11800
},
{
"epoch": 9.88,
"learning_rate": 1.6987745722521487e-08,
"loss": 1.3514,
"step": 11820
},
{
"epoch": 9.9,
"learning_rate": 1.2481167833516827e-08,
"loss": 1.351,
"step": 11840
},
{
"epoch": 9.91,
"learning_rate": 8.667698053574369e-09,
"loss": 1.3584,
"step": 11860
},
{
"epoch": 9.93,
"learning_rate": 5.5474421613799235e-09,
"loss": 1.3529,
"step": 11880
},
{
"epoch": 9.95,
"learning_rate": 3.120486707125503e-09,
"loss": 1.3444,
"step": 11900
},
{
"epoch": 9.96,
"learning_rate": 1.3868990101223445e-09,
"loss": 1.3519,
"step": 11920
},
{
"epoch": 9.98,
"learning_rate": 3.467271569246311e-10,
"loss": 1.3642,
"step": 11940
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 1.3294,
"step": 11960
},
{
"epoch": 10.0,
"eval_loss": 1.3205376863479614,
"eval_runtime": 236.9642,
"eval_samples_per_second": 19.969,
"eval_steps_per_second": 19.969,
"step": 11960
},
{
"epoch": 10.0,
"step": 11960,
"total_flos": 1.6483225760687555e+18,
"train_loss": 1.3917951301587466,
"train_runtime": 61679.6637,
"train_samples_per_second": 6.207,
"train_steps_per_second": 0.194
}
],
"logging_steps": 20,
"max_steps": 11960,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.6483225760687555e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}