baichuan-7b-sft / trainer_state.json
liangchen1225's picture
Upload 8 files
12e9693
{
"best_metric": 1.5761640071868896,
"best_model_checkpoint": "hsb_baichuan/checkpoint-12000",
"epoch": 2.8293545534924847,
"global_step": 12000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.999992378674973e-05,
"loss": 1.8463,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 4.999969514746361e-05,
"loss": 1.7893,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.999931408353566e-05,
"loss": 1.7552,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 4.999878059728925e-05,
"loss": 1.7823,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 4.999809469197708e-05,
"loss": 1.7552,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 4.9997256371781154e-05,
"loss": 1.7256,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 4.999626564181277e-05,
"loss": 1.7518,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 4.9995122508112445e-05,
"loss": 1.6769,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 4.9993826977649954e-05,
"loss": 1.6256,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 4.999237905832422e-05,
"loss": 1.7261,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 4.999077875896329e-05,
"loss": 1.678,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 4.998902608932429e-05,
"loss": 1.6767,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 4.998712106009335e-05,
"loss": 1.6371,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 4.9985063682885534e-05,
"loss": 1.7198,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 4.9982853970244816e-05,
"loss": 1.6614,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 4.998049193564394e-05,
"loss": 1.6369,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 4.9977977593484373e-05,
"loss": 1.6423,
"step": 170
},
{
"epoch": 0.04,
"learning_rate": 4.99753109590962e-05,
"loss": 1.7224,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 4.997249204873807e-05,
"loss": 1.633,
"step": 190
},
{
"epoch": 0.05,
"learning_rate": 4.9969520879597025e-05,
"loss": 1.6923,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 4.996639746978848e-05,
"loss": 1.6915,
"step": 210
},
{
"epoch": 0.05,
"learning_rate": 4.996312183835605e-05,
"loss": 1.6234,
"step": 220
},
{
"epoch": 0.05,
"learning_rate": 4.995969400527144e-05,
"loss": 1.6664,
"step": 230
},
{
"epoch": 0.06,
"learning_rate": 4.9956113991434375e-05,
"loss": 1.7018,
"step": 240
},
{
"epoch": 0.06,
"learning_rate": 4.995238181867241e-05,
"loss": 1.6782,
"step": 250
},
{
"epoch": 0.06,
"learning_rate": 4.994849750974081e-05,
"loss": 1.6428,
"step": 260
},
{
"epoch": 0.06,
"learning_rate": 4.994446108832246e-05,
"loss": 1.6444,
"step": 270
},
{
"epoch": 0.07,
"learning_rate": 4.994027257902766e-05,
"loss": 1.7016,
"step": 280
},
{
"epoch": 0.07,
"learning_rate": 4.9935932007393986e-05,
"loss": 1.6771,
"step": 290
},
{
"epoch": 0.07,
"learning_rate": 4.993143939988618e-05,
"loss": 1.6449,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 4.992679478389593e-05,
"loss": 1.6916,
"step": 310
},
{
"epoch": 0.08,
"learning_rate": 4.992199818774176e-05,
"loss": 1.6751,
"step": 320
},
{
"epoch": 0.08,
"learning_rate": 4.9917049640668776e-05,
"loss": 1.6137,
"step": 330
},
{
"epoch": 0.08,
"learning_rate": 4.9911949172848585e-05,
"loss": 1.67,
"step": 340
},
{
"epoch": 0.08,
"learning_rate": 4.990669681537903e-05,
"loss": 1.6393,
"step": 350
},
{
"epoch": 0.08,
"learning_rate": 4.9901292600284065e-05,
"loss": 1.5925,
"step": 360
},
{
"epoch": 0.09,
"learning_rate": 4.989573656051351e-05,
"loss": 1.6982,
"step": 370
},
{
"epoch": 0.09,
"learning_rate": 4.9890028729942875e-05,
"loss": 1.6566,
"step": 380
},
{
"epoch": 0.09,
"learning_rate": 4.9884169143373135e-05,
"loss": 1.6258,
"step": 390
},
{
"epoch": 0.09,
"learning_rate": 4.987815783653055e-05,
"loss": 1.6007,
"step": 400
},
{
"epoch": 0.1,
"learning_rate": 4.9871994846066405e-05,
"loss": 1.6167,
"step": 410
},
{
"epoch": 0.1,
"learning_rate": 4.986568020955685e-05,
"loss": 1.6593,
"step": 420
},
{
"epoch": 0.1,
"learning_rate": 4.9859213965502574e-05,
"loss": 1.6396,
"step": 430
},
{
"epoch": 0.1,
"learning_rate": 4.985259615332868e-05,
"loss": 1.6635,
"step": 440
},
{
"epoch": 0.11,
"learning_rate": 4.984582681338435e-05,
"loss": 1.6402,
"step": 450
},
{
"epoch": 0.11,
"learning_rate": 4.983890598694265e-05,
"loss": 1.6745,
"step": 460
},
{
"epoch": 0.11,
"learning_rate": 4.9831833716200296e-05,
"loss": 1.7139,
"step": 470
},
{
"epoch": 0.11,
"learning_rate": 4.982461004427733e-05,
"loss": 1.6294,
"step": 480
},
{
"epoch": 0.12,
"learning_rate": 4.981723501521692e-05,
"loss": 1.6174,
"step": 490
},
{
"epoch": 0.12,
"learning_rate": 4.980970867398506e-05,
"loss": 1.5851,
"step": 500
},
{
"epoch": 0.12,
"learning_rate": 4.98020310664703e-05,
"loss": 1.6651,
"step": 510
},
{
"epoch": 0.12,
"learning_rate": 4.979420223948348e-05,
"loss": 1.5993,
"step": 520
},
{
"epoch": 0.12,
"learning_rate": 4.978622224075742e-05,
"loss": 1.6467,
"step": 530
},
{
"epoch": 0.13,
"learning_rate": 4.977809111894667e-05,
"loss": 1.6865,
"step": 540
},
{
"epoch": 0.13,
"learning_rate": 4.9769808923627136e-05,
"loss": 1.6948,
"step": 550
},
{
"epoch": 0.13,
"learning_rate": 4.9761375705295894e-05,
"loss": 1.6155,
"step": 560
},
{
"epoch": 0.13,
"learning_rate": 4.975279151537076e-05,
"loss": 1.6964,
"step": 570
},
{
"epoch": 0.14,
"learning_rate": 4.9744056406190066e-05,
"loss": 1.6332,
"step": 580
},
{
"epoch": 0.14,
"learning_rate": 4.973517043101229e-05,
"loss": 1.6989,
"step": 590
},
{
"epoch": 0.14,
"learning_rate": 4.9726133644015753e-05,
"loss": 1.6102,
"step": 600
},
{
"epoch": 0.14,
"learning_rate": 4.97169461002983e-05,
"loss": 1.7337,
"step": 610
},
{
"epoch": 0.15,
"learning_rate": 4.970760785587693e-05,
"loss": 1.6642,
"step": 620
},
{
"epoch": 0.15,
"learning_rate": 4.969811896768748e-05,
"loss": 1.6126,
"step": 630
},
{
"epoch": 0.15,
"learning_rate": 4.968847949358427e-05,
"loss": 1.6332,
"step": 640
},
{
"epoch": 0.15,
"learning_rate": 4.967868949233975e-05,
"loss": 1.644,
"step": 650
},
{
"epoch": 0.16,
"learning_rate": 4.9668749023644154e-05,
"loss": 1.6937,
"step": 660
},
{
"epoch": 0.16,
"learning_rate": 4.965865814810511e-05,
"loss": 1.5925,
"step": 670
},
{
"epoch": 0.16,
"learning_rate": 4.964841692724729e-05,
"loss": 1.646,
"step": 680
},
{
"epoch": 0.16,
"learning_rate": 4.963802542351203e-05,
"loss": 1.646,
"step": 690
},
{
"epoch": 0.17,
"learning_rate": 4.962748370025696e-05,
"loss": 1.6175,
"step": 700
},
{
"epoch": 0.17,
"learning_rate": 4.961679182175559e-05,
"loss": 1.6747,
"step": 710
},
{
"epoch": 0.17,
"learning_rate": 4.960594985319696e-05,
"loss": 1.5985,
"step": 720
},
{
"epoch": 0.17,
"learning_rate": 4.959495786068519e-05,
"loss": 1.6171,
"step": 730
},
{
"epoch": 0.17,
"learning_rate": 4.958381591123912e-05,
"loss": 1.6319,
"step": 740
},
{
"epoch": 0.18,
"learning_rate": 4.957252407279189e-05,
"loss": 1.6629,
"step": 750
},
{
"epoch": 0.18,
"learning_rate": 4.956108241419052e-05,
"loss": 1.6572,
"step": 760
},
{
"epoch": 0.18,
"learning_rate": 4.954949100519547e-05,
"loss": 1.6324,
"step": 770
},
{
"epoch": 0.18,
"learning_rate": 4.953774991648027e-05,
"loss": 1.7111,
"step": 780
},
{
"epoch": 0.19,
"learning_rate": 4.952585921963104e-05,
"loss": 1.69,
"step": 790
},
{
"epoch": 0.19,
"learning_rate": 4.951381898714609e-05,
"loss": 1.6322,
"step": 800
},
{
"epoch": 0.19,
"learning_rate": 4.95016292924354e-05,
"loss": 1.6527,
"step": 810
},
{
"epoch": 0.19,
"learning_rate": 4.9489290209820313e-05,
"loss": 1.6626,
"step": 820
},
{
"epoch": 0.2,
"learning_rate": 4.947680181453293e-05,
"loss": 1.675,
"step": 830
},
{
"epoch": 0.2,
"learning_rate": 4.9464164182715755e-05,
"loss": 1.5939,
"step": 840
},
{
"epoch": 0.2,
"learning_rate": 4.945137739142119e-05,
"loss": 1.5833,
"step": 850
},
{
"epoch": 0.2,
"learning_rate": 4.943844151861106e-05,
"loss": 1.6186,
"step": 860
},
{
"epoch": 0.21,
"learning_rate": 4.9425356643156165e-05,
"loss": 1.6172,
"step": 870
},
{
"epoch": 0.21,
"learning_rate": 4.941212284483578e-05,
"loss": 1.5903,
"step": 880
},
{
"epoch": 0.21,
"learning_rate": 4.939874020433716e-05,
"loss": 1.6082,
"step": 890
},
{
"epoch": 0.21,
"learning_rate": 4.938520880325507e-05,
"loss": 1.6156,
"step": 900
},
{
"epoch": 0.21,
"learning_rate": 4.9371528724091275e-05,
"loss": 1.6322,
"step": 910
},
{
"epoch": 0.22,
"learning_rate": 4.935770005025403e-05,
"loss": 1.5638,
"step": 920
},
{
"epoch": 0.22,
"learning_rate": 4.9343722866057605e-05,
"loss": 1.6491,
"step": 930
},
{
"epoch": 0.22,
"learning_rate": 4.932959725672173e-05,
"loss": 1.6502,
"step": 940
},
{
"epoch": 0.22,
"learning_rate": 4.9315323308371074e-05,
"loss": 1.6091,
"step": 950
},
{
"epoch": 0.23,
"learning_rate": 4.930090110803478e-05,
"loss": 1.6447,
"step": 960
},
{
"epoch": 0.23,
"learning_rate": 4.9286330743645845e-05,
"loss": 1.5945,
"step": 970
},
{
"epoch": 0.23,
"learning_rate": 4.9271612304040685e-05,
"loss": 1.6577,
"step": 980
},
{
"epoch": 0.23,
"learning_rate": 4.925674587895848e-05,
"loss": 1.5934,
"step": 990
},
{
"epoch": 0.24,
"learning_rate": 4.924173155904074e-05,
"loss": 1.5242,
"step": 1000
},
{
"epoch": 0.24,
"eval_loss": 1.6374789476394653,
"eval_runtime": 120.8306,
"eval_samples_per_second": 11.346,
"eval_steps_per_second": 2.839,
"step": 1000
},
{
"epoch": 0.24,
"learning_rate": 4.922656943583066e-05,
"loss": 1.6052,
"step": 1010
},
{
"epoch": 0.24,
"learning_rate": 4.9211259601772615e-05,
"loss": 1.6244,
"step": 1020
},
{
"epoch": 0.24,
"learning_rate": 4.919580215021159e-05,
"loss": 1.5851,
"step": 1030
},
{
"epoch": 0.25,
"learning_rate": 4.91801971753926e-05,
"loss": 1.6342,
"step": 1040
},
{
"epoch": 0.25,
"learning_rate": 4.9164444772460085e-05,
"loss": 1.5859,
"step": 1050
},
{
"epoch": 0.25,
"learning_rate": 4.9148545037457425e-05,
"loss": 1.6124,
"step": 1060
},
{
"epoch": 0.25,
"learning_rate": 4.9132498067326236e-05,
"loss": 1.602,
"step": 1070
},
{
"epoch": 0.25,
"learning_rate": 4.911630395990587e-05,
"loss": 1.6564,
"step": 1080
},
{
"epoch": 0.26,
"learning_rate": 4.9099962813932774e-05,
"loss": 1.5995,
"step": 1090
},
{
"epoch": 0.26,
"learning_rate": 4.908347472903989e-05,
"loss": 1.5934,
"step": 1100
},
{
"epoch": 0.26,
"learning_rate": 4.906683980575606e-05,
"loss": 1.676,
"step": 1110
},
{
"epoch": 0.26,
"learning_rate": 4.9050058145505405e-05,
"loss": 1.6273,
"step": 1120
},
{
"epoch": 0.27,
"learning_rate": 4.9033129850606724e-05,
"loss": 1.6725,
"step": 1130
},
{
"epoch": 0.27,
"learning_rate": 4.9016055024272844e-05,
"loss": 1.5949,
"step": 1140
},
{
"epoch": 0.27,
"learning_rate": 4.899883377061001e-05,
"loss": 1.6016,
"step": 1150
},
{
"epoch": 0.27,
"learning_rate": 4.898146619461723e-05,
"loss": 1.6214,
"step": 1160
},
{
"epoch": 0.28,
"learning_rate": 4.8963952402185666e-05,
"loss": 1.6544,
"step": 1170
},
{
"epoch": 0.28,
"learning_rate": 4.8946292500097956e-05,
"loss": 1.6928,
"step": 1180
},
{
"epoch": 0.28,
"learning_rate": 4.892848659602759e-05,
"loss": 1.617,
"step": 1190
},
{
"epoch": 0.28,
"learning_rate": 4.891053479853822e-05,
"loss": 1.5759,
"step": 1200
},
{
"epoch": 0.29,
"learning_rate": 4.8892437217083046e-05,
"loss": 1.5391,
"step": 1210
},
{
"epoch": 0.29,
"learning_rate": 4.8874193962004105e-05,
"loss": 1.6787,
"step": 1220
},
{
"epoch": 0.29,
"learning_rate": 4.885580514453162e-05,
"loss": 1.623,
"step": 1230
},
{
"epoch": 0.29,
"learning_rate": 4.883727087678331e-05,
"loss": 1.5873,
"step": 1240
},
{
"epoch": 0.29,
"learning_rate": 4.8818591271763714e-05,
"loss": 1.6066,
"step": 1250
},
{
"epoch": 0.3,
"learning_rate": 4.879976644336352e-05,
"loss": 1.6801,
"step": 1260
},
{
"epoch": 0.3,
"learning_rate": 4.8780796506358825e-05,
"loss": 1.5833,
"step": 1270
},
{
"epoch": 0.3,
"learning_rate": 4.876168157641048e-05,
"loss": 1.6128,
"step": 1280
},
{
"epoch": 0.3,
"learning_rate": 4.874242177006335e-05,
"loss": 1.6176,
"step": 1290
},
{
"epoch": 0.31,
"learning_rate": 4.872301720474564e-05,
"loss": 1.575,
"step": 1300
},
{
"epoch": 0.31,
"learning_rate": 4.8703467998768134e-05,
"loss": 1.595,
"step": 1310
},
{
"epoch": 0.31,
"learning_rate": 4.8683774271323544e-05,
"loss": 1.6355,
"step": 1320
},
{
"epoch": 0.31,
"learning_rate": 4.8663936142485685e-05,
"loss": 1.5456,
"step": 1330
},
{
"epoch": 0.32,
"learning_rate": 4.8643953733208824e-05,
"loss": 1.6025,
"step": 1340
},
{
"epoch": 0.32,
"learning_rate": 4.862382716532691e-05,
"loss": 1.6777,
"step": 1350
},
{
"epoch": 0.32,
"learning_rate": 4.8603556561552835e-05,
"loss": 1.5932,
"step": 1360
},
{
"epoch": 0.32,
"learning_rate": 4.8583142045477694e-05,
"loss": 1.6043,
"step": 1370
},
{
"epoch": 0.33,
"learning_rate": 4.856258374157e-05,
"loss": 1.594,
"step": 1380
},
{
"epoch": 0.33,
"learning_rate": 4.854188177517499e-05,
"loss": 1.605,
"step": 1390
},
{
"epoch": 0.33,
"learning_rate": 4.852103627251377e-05,
"loss": 1.6241,
"step": 1400
},
{
"epoch": 0.33,
"learning_rate": 4.8500047360682636e-05,
"loss": 1.6771,
"step": 1410
},
{
"epoch": 0.33,
"learning_rate": 4.8478915167652244e-05,
"loss": 1.5895,
"step": 1420
},
{
"epoch": 0.34,
"learning_rate": 4.8457639822266844e-05,
"loss": 1.6305,
"step": 1430
},
{
"epoch": 0.34,
"learning_rate": 4.843622145424348e-05,
"loss": 1.6484,
"step": 1440
},
{
"epoch": 0.34,
"learning_rate": 4.8414660194171244e-05,
"loss": 1.5931,
"step": 1450
},
{
"epoch": 0.34,
"learning_rate": 4.839295617351042e-05,
"loss": 1.571,
"step": 1460
},
{
"epoch": 0.35,
"learning_rate": 4.837110952459173e-05,
"loss": 1.6011,
"step": 1470
},
{
"epoch": 0.35,
"learning_rate": 4.834912038061551e-05,
"loss": 1.588,
"step": 1480
},
{
"epoch": 0.35,
"learning_rate": 4.832698887565088e-05,
"loss": 1.6041,
"step": 1490
},
{
"epoch": 0.35,
"learning_rate": 4.830471514463496e-05,
"loss": 1.5719,
"step": 1500
},
{
"epoch": 0.36,
"learning_rate": 4.8282299323372027e-05,
"loss": 1.5701,
"step": 1510
},
{
"epoch": 0.36,
"learning_rate": 4.8259741548532675e-05,
"loss": 1.5389,
"step": 1520
},
{
"epoch": 0.36,
"learning_rate": 4.823704195765303e-05,
"loss": 1.5716,
"step": 1530
},
{
"epoch": 0.36,
"learning_rate": 4.8214200689133846e-05,
"loss": 1.6128,
"step": 1540
},
{
"epoch": 0.37,
"learning_rate": 4.819121788223972e-05,
"loss": 1.585,
"step": 1550
},
{
"epoch": 0.37,
"learning_rate": 4.81680936770982e-05,
"loss": 1.6296,
"step": 1560
},
{
"epoch": 0.37,
"learning_rate": 4.814482821469895e-05,
"loss": 1.6498,
"step": 1570
},
{
"epoch": 0.37,
"learning_rate": 4.8121421636892896e-05,
"loss": 1.636,
"step": 1580
},
{
"epoch": 0.37,
"learning_rate": 4.809787408639133e-05,
"loss": 1.5585,
"step": 1590
},
{
"epoch": 0.38,
"learning_rate": 4.8074185706765105e-05,
"loss": 1.56,
"step": 1600
},
{
"epoch": 0.38,
"learning_rate": 4.805035664244368e-05,
"loss": 1.5903,
"step": 1610
},
{
"epoch": 0.38,
"learning_rate": 4.8026387038714294e-05,
"loss": 1.6253,
"step": 1620
},
{
"epoch": 0.38,
"learning_rate": 4.800227704172106e-05,
"loss": 1.6222,
"step": 1630
},
{
"epoch": 0.39,
"learning_rate": 4.797802679846408e-05,
"loss": 1.5612,
"step": 1640
},
{
"epoch": 0.39,
"learning_rate": 4.795363645679853e-05,
"loss": 1.6505,
"step": 1650
},
{
"epoch": 0.39,
"learning_rate": 4.79291061654338e-05,
"loss": 1.574,
"step": 1660
},
{
"epoch": 0.39,
"learning_rate": 4.7904436073932546e-05,
"loss": 1.6852,
"step": 1670
},
{
"epoch": 0.4,
"learning_rate": 4.787962633270979e-05,
"loss": 1.6222,
"step": 1680
},
{
"epoch": 0.4,
"learning_rate": 4.785467709303203e-05,
"loss": 1.6398,
"step": 1690
},
{
"epoch": 0.4,
"learning_rate": 4.782958850701626e-05,
"loss": 1.6125,
"step": 1700
},
{
"epoch": 0.4,
"learning_rate": 4.7804360727629094e-05,
"loss": 1.5746,
"step": 1710
},
{
"epoch": 0.41,
"learning_rate": 4.777899390868583e-05,
"loss": 1.6119,
"step": 1720
},
{
"epoch": 0.41,
"learning_rate": 4.7753488204849474e-05,
"loss": 1.6468,
"step": 1730
},
{
"epoch": 0.41,
"learning_rate": 4.772784377162984e-05,
"loss": 1.6201,
"step": 1740
},
{
"epoch": 0.41,
"learning_rate": 4.7702060765382585e-05,
"loss": 1.6671,
"step": 1750
},
{
"epoch": 0.41,
"learning_rate": 4.7676139343308236e-05,
"loss": 1.6204,
"step": 1760
},
{
"epoch": 0.42,
"learning_rate": 4.765007966345125e-05,
"loss": 1.6959,
"step": 1770
},
{
"epoch": 0.42,
"learning_rate": 4.762388188469907e-05,
"loss": 1.6466,
"step": 1780
},
{
"epoch": 0.42,
"learning_rate": 4.7597546166781125e-05,
"loss": 1.5926,
"step": 1790
},
{
"epoch": 0.42,
"learning_rate": 4.757107267026787e-05,
"loss": 1.6089,
"step": 1800
},
{
"epoch": 0.43,
"learning_rate": 4.75444615565698e-05,
"loss": 1.5725,
"step": 1810
},
{
"epoch": 0.43,
"learning_rate": 4.751771298793647e-05,
"loss": 1.6761,
"step": 1820
},
{
"epoch": 0.43,
"learning_rate": 4.7490827127455504e-05,
"loss": 1.5831,
"step": 1830
},
{
"epoch": 0.43,
"learning_rate": 4.746380413905162e-05,
"loss": 1.5859,
"step": 1840
},
{
"epoch": 0.44,
"learning_rate": 4.743664418748559e-05,
"loss": 1.5971,
"step": 1850
},
{
"epoch": 0.44,
"learning_rate": 4.740934743835328e-05,
"loss": 1.5726,
"step": 1860
},
{
"epoch": 0.44,
"learning_rate": 4.7381914058084586e-05,
"loss": 1.5331,
"step": 1870
},
{
"epoch": 0.44,
"learning_rate": 4.7354344213942506e-05,
"loss": 1.5822,
"step": 1880
},
{
"epoch": 0.45,
"learning_rate": 4.7326638074022e-05,
"loss": 1.5794,
"step": 1890
},
{
"epoch": 0.45,
"learning_rate": 4.7298795807249085e-05,
"loss": 1.5759,
"step": 1900
},
{
"epoch": 0.45,
"learning_rate": 4.727081758337974e-05,
"loss": 1.6224,
"step": 1910
},
{
"epoch": 0.45,
"learning_rate": 4.724270357299886e-05,
"loss": 1.5642,
"step": 1920
},
{
"epoch": 0.46,
"learning_rate": 4.7214453947519256e-05,
"loss": 1.556,
"step": 1930
},
{
"epoch": 0.46,
"learning_rate": 4.71860688791806e-05,
"loss": 1.585,
"step": 1940
},
{
"epoch": 0.46,
"learning_rate": 4.715754854104835e-05,
"loss": 1.6414,
"step": 1950
},
{
"epoch": 0.46,
"learning_rate": 4.7128893107012716e-05,
"loss": 1.5784,
"step": 1960
},
{
"epoch": 0.46,
"learning_rate": 4.71001027517876e-05,
"loss": 1.6047,
"step": 1970
},
{
"epoch": 0.47,
"learning_rate": 4.707117765090954e-05,
"loss": 1.648,
"step": 1980
},
{
"epoch": 0.47,
"learning_rate": 4.704211798073659e-05,
"loss": 1.6223,
"step": 1990
},
{
"epoch": 0.47,
"learning_rate": 4.7012923918447326e-05,
"loss": 1.6081,
"step": 2000
},
{
"epoch": 0.47,
"eval_loss": 1.6162242889404297,
"eval_runtime": 120.2644,
"eval_samples_per_second": 11.4,
"eval_steps_per_second": 2.852,
"step": 2000
},
{
"epoch": 0.47,
"learning_rate": 4.698359564203968e-05,
"loss": 1.6442,
"step": 2010
},
{
"epoch": 0.48,
"learning_rate": 4.695413333032992e-05,
"loss": 1.5722,
"step": 2020
},
{
"epoch": 0.48,
"learning_rate": 4.692453716295153e-05,
"loss": 1.6816,
"step": 2030
},
{
"epoch": 0.48,
"learning_rate": 4.6894807320354125e-05,
"loss": 1.5378,
"step": 2040
},
{
"epoch": 0.48,
"learning_rate": 4.6864943983802324e-05,
"loss": 1.6699,
"step": 2050
},
{
"epoch": 0.49,
"learning_rate": 4.6834947335374696e-05,
"loss": 1.5855,
"step": 2060
},
{
"epoch": 0.49,
"learning_rate": 4.68048175579626e-05,
"loss": 1.6015,
"step": 2070
},
{
"epoch": 0.49,
"learning_rate": 4.67745548352691e-05,
"loss": 1.5387,
"step": 2080
},
{
"epoch": 0.49,
"learning_rate": 4.6744159351807837e-05,
"loss": 1.6405,
"step": 2090
},
{
"epoch": 0.5,
"learning_rate": 4.671363129290188e-05,
"loss": 1.6547,
"step": 2100
},
{
"epoch": 0.5,
"learning_rate": 4.668297084468266e-05,
"loss": 1.6246,
"step": 2110
},
{
"epoch": 0.5,
"learning_rate": 4.665217819408876e-05,
"loss": 1.5588,
"step": 2120
},
{
"epoch": 0.5,
"learning_rate": 4.662125352886482e-05,
"loss": 1.6331,
"step": 2130
},
{
"epoch": 0.5,
"learning_rate": 4.6590197037560367e-05,
"loss": 1.6697,
"step": 2140
},
{
"epoch": 0.51,
"learning_rate": 4.655900890952872e-05,
"loss": 1.6041,
"step": 2150
},
{
"epoch": 0.51,
"learning_rate": 4.652768933492574e-05,
"loss": 1.6565,
"step": 2160
},
{
"epoch": 0.51,
"learning_rate": 4.6496238504708764e-05,
"loss": 1.6182,
"step": 2170
},
{
"epoch": 0.51,
"learning_rate": 4.6464656610635405e-05,
"loss": 1.6574,
"step": 2180
},
{
"epoch": 0.52,
"learning_rate": 4.643294384526234e-05,
"loss": 1.5828,
"step": 2190
},
{
"epoch": 0.52,
"learning_rate": 4.640110040194423e-05,
"loss": 1.5964,
"step": 2200
},
{
"epoch": 0.52,
"learning_rate": 4.6369126474832434e-05,
"loss": 1.6485,
"step": 2210
},
{
"epoch": 0.52,
"learning_rate": 4.633702225887393e-05,
"loss": 1.647,
"step": 2220
},
{
"epoch": 0.53,
"learning_rate": 4.6304787949810037e-05,
"loss": 1.6325,
"step": 2230
},
{
"epoch": 0.53,
"learning_rate": 4.627242374417527e-05,
"loss": 1.5784,
"step": 2240
},
{
"epoch": 0.53,
"learning_rate": 4.6239929839296125e-05,
"loss": 1.6343,
"step": 2250
},
{
"epoch": 0.53,
"learning_rate": 4.6207306433289916e-05,
"loss": 1.6395,
"step": 2260
},
{
"epoch": 0.54,
"learning_rate": 4.6174553725063484e-05,
"loss": 1.6122,
"step": 2270
},
{
"epoch": 0.54,
"learning_rate": 4.6141671914312076e-05,
"loss": 1.5881,
"step": 2280
},
{
"epoch": 0.54,
"learning_rate": 4.610866120151805e-05,
"loss": 1.6092,
"step": 2290
},
{
"epoch": 0.54,
"learning_rate": 4.60755217879497e-05,
"loss": 1.6278,
"step": 2300
},
{
"epoch": 0.54,
"learning_rate": 4.604225387566005e-05,
"loss": 1.5755,
"step": 2310
},
{
"epoch": 0.55,
"learning_rate": 4.600885766748552e-05,
"loss": 1.6634,
"step": 2320
},
{
"epoch": 0.55,
"learning_rate": 4.597533336704482e-05,
"loss": 1.5873,
"step": 2330
},
{
"epoch": 0.55,
"learning_rate": 4.594168117873761e-05,
"loss": 1.616,
"step": 2340
},
{
"epoch": 0.55,
"learning_rate": 4.59079013077433e-05,
"loss": 1.6475,
"step": 2350
},
{
"epoch": 0.56,
"learning_rate": 4.58739939600198e-05,
"loss": 1.6932,
"step": 2360
},
{
"epoch": 0.56,
"learning_rate": 4.583995934230225e-05,
"loss": 1.5927,
"step": 2370
},
{
"epoch": 0.56,
"learning_rate": 4.580579766210175e-05,
"loss": 1.5839,
"step": 2380
},
{
"epoch": 0.56,
"learning_rate": 4.57715091277041e-05,
"loss": 1.704,
"step": 2390
},
{
"epoch": 0.57,
"learning_rate": 4.5737093948168566e-05,
"loss": 1.6202,
"step": 2400
},
{
"epoch": 0.57,
"learning_rate": 4.5702552333326574e-05,
"loss": 1.5782,
"step": 2410
},
{
"epoch": 0.57,
"learning_rate": 4.56678844937804e-05,
"loss": 1.6271,
"step": 2420
},
{
"epoch": 0.57,
"learning_rate": 4.5633090640901965e-05,
"loss": 1.6253,
"step": 2430
},
{
"epoch": 0.58,
"learning_rate": 4.559817098683146e-05,
"loss": 1.6356,
"step": 2440
},
{
"epoch": 0.58,
"learning_rate": 4.556312574447612e-05,
"loss": 1.5635,
"step": 2450
},
{
"epoch": 0.58,
"learning_rate": 4.552795512750889e-05,
"loss": 1.6473,
"step": 2460
},
{
"epoch": 0.58,
"learning_rate": 4.549265935036714e-05,
"loss": 1.6278,
"step": 2470
},
{
"epoch": 0.58,
"learning_rate": 4.545723862825133e-05,
"loss": 1.6253,
"step": 2480
},
{
"epoch": 0.59,
"learning_rate": 4.5421693177123724e-05,
"loss": 1.6483,
"step": 2490
},
{
"epoch": 0.59,
"learning_rate": 4.5386023213707095e-05,
"loss": 1.6938,
"step": 2500
},
{
"epoch": 0.59,
"learning_rate": 4.5350228955483334e-05,
"loss": 1.6065,
"step": 2510
},
{
"epoch": 0.59,
"learning_rate": 4.531431062069217e-05,
"loss": 1.6266,
"step": 2520
},
{
"epoch": 0.6,
"learning_rate": 4.527826842832987e-05,
"loss": 1.5903,
"step": 2530
},
{
"epoch": 0.6,
"learning_rate": 4.524210259814784e-05,
"loss": 1.6314,
"step": 2540
},
{
"epoch": 0.6,
"learning_rate": 4.520581335065131e-05,
"loss": 1.5874,
"step": 2550
},
{
"epoch": 0.6,
"learning_rate": 4.516940090709799e-05,
"loss": 1.6006,
"step": 2560
},
{
"epoch": 0.61,
"learning_rate": 4.5132865489496756e-05,
"loss": 1.5898,
"step": 2570
},
{
"epoch": 0.61,
"learning_rate": 4.509620732060623e-05,
"loss": 1.6183,
"step": 2580
},
{
"epoch": 0.61,
"learning_rate": 4.505942662393346e-05,
"loss": 1.5927,
"step": 2590
},
{
"epoch": 0.61,
"learning_rate": 4.5022523623732586e-05,
"loss": 1.62,
"step": 2600
},
{
"epoch": 0.62,
"learning_rate": 4.498549854500339e-05,
"loss": 1.6127,
"step": 2610
},
{
"epoch": 0.62,
"learning_rate": 4.4948351613490017e-05,
"loss": 1.6201,
"step": 2620
},
{
"epoch": 0.62,
"learning_rate": 4.4911083055679526e-05,
"loss": 1.5915,
"step": 2630
},
{
"epoch": 0.62,
"learning_rate": 4.4873693098800564e-05,
"loss": 1.5617,
"step": 2640
},
{
"epoch": 0.62,
"learning_rate": 4.4836181970821924e-05,
"loss": 1.6041,
"step": 2650
},
{
"epoch": 0.63,
"learning_rate": 4.479854990045121e-05,
"loss": 1.5829,
"step": 2660
},
{
"epoch": 0.63,
"learning_rate": 4.476079711713343e-05,
"loss": 1.5854,
"step": 2670
},
{
"epoch": 0.63,
"learning_rate": 4.4722923851049545e-05,
"loss": 1.6142,
"step": 2680
},
{
"epoch": 0.63,
"learning_rate": 4.468493033311515e-05,
"loss": 1.603,
"step": 2690
},
{
"epoch": 0.64,
"learning_rate": 4.464681679497901e-05,
"loss": 1.5794,
"step": 2700
},
{
"epoch": 0.64,
"learning_rate": 4.460858346902162e-05,
"loss": 1.5865,
"step": 2710
},
{
"epoch": 0.64,
"learning_rate": 4.4570230588353914e-05,
"loss": 1.616,
"step": 2720
},
{
"epoch": 0.64,
"learning_rate": 4.4531758386815665e-05,
"loss": 1.63,
"step": 2730
},
{
"epoch": 0.65,
"learning_rate": 4.449316709897421e-05,
"loss": 1.576,
"step": 2740
},
{
"epoch": 0.65,
"learning_rate": 4.445445696012295e-05,
"loss": 1.6069,
"step": 2750
},
{
"epoch": 0.65,
"learning_rate": 4.441562820627991e-05,
"loss": 1.6056,
"step": 2760
},
{
"epoch": 0.65,
"learning_rate": 4.4376681074186364e-05,
"loss": 1.5949,
"step": 2770
},
{
"epoch": 0.66,
"learning_rate": 4.4337615801305286e-05,
"loss": 1.6223,
"step": 2780
},
{
"epoch": 0.66,
"learning_rate": 4.429843262582e-05,
"loss": 1.6109,
"step": 2790
},
{
"epoch": 0.66,
"learning_rate": 4.425913178663268e-05,
"loss": 1.5932,
"step": 2800
},
{
"epoch": 0.66,
"learning_rate": 4.421971352336289e-05,
"loss": 1.6713,
"step": 2810
},
{
"epoch": 0.66,
"learning_rate": 4.418017807634616e-05,
"loss": 1.64,
"step": 2820
},
{
"epoch": 0.67,
"learning_rate": 4.414052568663248e-05,
"loss": 1.662,
"step": 2830
},
{
"epoch": 0.67,
"learning_rate": 4.4100756595984846e-05,
"loss": 1.5613,
"step": 2840
},
{
"epoch": 0.67,
"learning_rate": 4.40608710468778e-05,
"loss": 1.6049,
"step": 2850
},
{
"epoch": 0.67,
"learning_rate": 4.4020869282495916e-05,
"loss": 1.5997,
"step": 2860
},
{
"epoch": 0.68,
"learning_rate": 4.398075154673237e-05,
"loss": 1.6365,
"step": 2870
},
{
"epoch": 0.68,
"learning_rate": 4.3940518084187384e-05,
"loss": 1.6177,
"step": 2880
},
{
"epoch": 0.68,
"learning_rate": 4.39001691401668e-05,
"loss": 1.6507,
"step": 2890
},
{
"epoch": 0.68,
"learning_rate": 4.385970496068057e-05,
"loss": 1.5873,
"step": 2900
},
{
"epoch": 0.69,
"learning_rate": 4.38191257924412e-05,
"loss": 1.5996,
"step": 2910
},
{
"epoch": 0.69,
"learning_rate": 4.377843188286233e-05,
"loss": 1.665,
"step": 2920
},
{
"epoch": 0.69,
"learning_rate": 4.3737623480057165e-05,
"loss": 1.6183,
"step": 2930
},
{
"epoch": 0.69,
"learning_rate": 4.369670083283698e-05,
"loss": 1.6413,
"step": 2940
},
{
"epoch": 0.7,
"learning_rate": 4.365566419070962e-05,
"loss": 1.5564,
"step": 2950
},
{
"epoch": 0.7,
"learning_rate": 4.3614513803877956e-05,
"loss": 1.6456,
"step": 2960
},
{
"epoch": 0.7,
"learning_rate": 4.357324992323836e-05,
"loss": 1.589,
"step": 2970
},
{
"epoch": 0.7,
"learning_rate": 4.353187280037918e-05,
"loss": 1.5581,
"step": 2980
},
{
"epoch": 0.7,
"learning_rate": 4.349038268757924e-05,
"loss": 1.6063,
"step": 2990
},
{
"epoch": 0.71,
"learning_rate": 4.344877983780624e-05,
"loss": 1.611,
"step": 3000
},
{
"epoch": 0.71,
"eval_loss": 1.6054201126098633,
"eval_runtime": 120.533,
"eval_samples_per_second": 11.374,
"eval_steps_per_second": 2.846,
"step": 3000
},
{
"epoch": 0.71,
"learning_rate": 4.340706450471524e-05,
"loss": 1.6163,
"step": 3010
},
{
"epoch": 0.71,
"learning_rate": 4.3365236942647146e-05,
"loss": 1.6384,
"step": 3020
},
{
"epoch": 0.71,
"learning_rate": 4.33232974066271e-05,
"loss": 1.5703,
"step": 3030
},
{
"epoch": 0.72,
"learning_rate": 4.3281246152362986e-05,
"loss": 1.5774,
"step": 3040
},
{
"epoch": 0.72,
"learning_rate": 4.323908343624381e-05,
"loss": 1.6061,
"step": 3050
},
{
"epoch": 0.72,
"learning_rate": 4.319680951533819e-05,
"loss": 1.5721,
"step": 3060
},
{
"epoch": 0.72,
"learning_rate": 4.315442464739276e-05,
"loss": 1.5836,
"step": 3070
},
{
"epoch": 0.73,
"learning_rate": 4.3111929090830605e-05,
"loss": 1.6175,
"step": 3080
},
{
"epoch": 0.73,
"learning_rate": 4.306932310474968e-05,
"loss": 1.595,
"step": 3090
},
{
"epoch": 0.73,
"learning_rate": 4.302660694892124e-05,
"loss": 1.5982,
"step": 3100
},
{
"epoch": 0.73,
"learning_rate": 4.2983780883788247e-05,
"loss": 1.5907,
"step": 3110
},
{
"epoch": 0.74,
"learning_rate": 4.2940845170463806e-05,
"loss": 1.6695,
"step": 3120
},
{
"epoch": 0.74,
"learning_rate": 4.289780007072952e-05,
"loss": 1.5913,
"step": 3130
},
{
"epoch": 0.74,
"learning_rate": 4.285464584703396e-05,
"loss": 1.553,
"step": 3140
},
{
"epoch": 0.74,
"learning_rate": 4.2811382762491e-05,
"loss": 1.618,
"step": 3150
},
{
"epoch": 0.75,
"learning_rate": 4.276801108087829e-05,
"loss": 1.4985,
"step": 3160
},
{
"epoch": 0.75,
"learning_rate": 4.272453106663555e-05,
"loss": 1.6046,
"step": 3170
},
{
"epoch": 0.75,
"learning_rate": 4.268094298486305e-05,
"loss": 1.6321,
"step": 3180
},
{
"epoch": 0.75,
"learning_rate": 4.263724710131994e-05,
"loss": 1.5503,
"step": 3190
},
{
"epoch": 0.75,
"learning_rate": 4.259344368242264e-05,
"loss": 1.573,
"step": 3200
},
{
"epoch": 0.76,
"learning_rate": 4.254953299524323e-05,
"loss": 1.528,
"step": 3210
},
{
"epoch": 0.76,
"learning_rate": 4.250551530750779e-05,
"loss": 1.5518,
"step": 3220
},
{
"epoch": 0.76,
"learning_rate": 4.246139088759483e-05,
"loss": 1.6624,
"step": 3230
},
{
"epoch": 0.76,
"learning_rate": 4.241716000453357e-05,
"loss": 1.5525,
"step": 3240
},
{
"epoch": 0.77,
"learning_rate": 4.237282292800237e-05,
"loss": 1.559,
"step": 3250
},
{
"epoch": 0.77,
"learning_rate": 4.2328379928327025e-05,
"loss": 1.5948,
"step": 3260
},
{
"epoch": 0.77,
"learning_rate": 4.2283831276479185e-05,
"loss": 1.6198,
"step": 3270
},
{
"epoch": 0.77,
"learning_rate": 4.2239177244074655e-05,
"loss": 1.5895,
"step": 3280
},
{
"epoch": 0.78,
"learning_rate": 4.219441810337176e-05,
"loss": 1.6092,
"step": 3290
},
{
"epoch": 0.78,
"learning_rate": 4.214955412726965e-05,
"loss": 1.5791,
"step": 3300
},
{
"epoch": 0.78,
"learning_rate": 4.210458558930668e-05,
"loss": 1.6058,
"step": 3310
},
{
"epoch": 0.78,
"learning_rate": 4.205951276365875e-05,
"loss": 1.5717,
"step": 3320
},
{
"epoch": 0.79,
"learning_rate": 4.201433592513755e-05,
"loss": 1.5903,
"step": 3330
},
{
"epoch": 0.79,
"learning_rate": 4.1969055349189e-05,
"loss": 1.6179,
"step": 3340
},
{
"epoch": 0.79,
"learning_rate": 4.192367131189148e-05,
"loss": 1.6235,
"step": 3350
},
{
"epoch": 0.79,
"learning_rate": 4.1878184089954185e-05,
"loss": 1.5712,
"step": 3360
},
{
"epoch": 0.79,
"learning_rate": 4.183259396071545e-05,
"loss": 1.6418,
"step": 3370
},
{
"epoch": 0.8,
"learning_rate": 4.178690120214102e-05,
"loss": 1.5748,
"step": 3380
},
{
"epoch": 0.8,
"learning_rate": 4.1741106092822386e-05,
"loss": 1.5349,
"step": 3390
},
{
"epoch": 0.8,
"learning_rate": 4.169520891197508e-05,
"loss": 1.6124,
"step": 3400
},
{
"epoch": 0.8,
"learning_rate": 4.164920993943697e-05,
"loss": 1.5648,
"step": 3410
},
{
"epoch": 0.81,
"learning_rate": 4.1603109455666564e-05,
"loss": 1.6162,
"step": 3420
},
{
"epoch": 0.81,
"learning_rate": 4.1556907741741244e-05,
"loss": 1.5948,
"step": 3430
},
{
"epoch": 0.81,
"learning_rate": 4.151060507935568e-05,
"loss": 1.6071,
"step": 3440
},
{
"epoch": 0.81,
"learning_rate": 4.146420175081995e-05,
"loss": 1.612,
"step": 3450
},
{
"epoch": 0.82,
"learning_rate": 4.141769803905793e-05,
"loss": 1.6257,
"step": 3460
},
{
"epoch": 0.82,
"learning_rate": 4.1371094227605564e-05,
"loss": 1.5877,
"step": 3470
},
{
"epoch": 0.82,
"learning_rate": 4.132439060060908e-05,
"loss": 1.5676,
"step": 3480
},
{
"epoch": 0.82,
"learning_rate": 4.127758744282329e-05,
"loss": 1.5605,
"step": 3490
},
{
"epoch": 0.83,
"learning_rate": 4.123068503960986e-05,
"loss": 1.6394,
"step": 3500
},
{
"epoch": 0.83,
"learning_rate": 4.1183683676935555e-05,
"loss": 1.6232,
"step": 3510
},
{
"epoch": 0.83,
"learning_rate": 4.113658364137051e-05,
"loss": 1.5559,
"step": 3520
},
{
"epoch": 0.83,
"learning_rate": 4.108938522008646e-05,
"loss": 1.5552,
"step": 3530
},
{
"epoch": 0.83,
"learning_rate": 4.104208870085502e-05,
"loss": 1.5411,
"step": 3540
},
{
"epoch": 0.84,
"learning_rate": 4.0994694372045906e-05,
"loss": 1.6087,
"step": 3550
},
{
"epoch": 0.84,
"learning_rate": 4.0947202522625175e-05,
"loss": 1.5833,
"step": 3560
},
{
"epoch": 0.84,
"learning_rate": 4.08996134421535e-05,
"loss": 1.5919,
"step": 3570
},
{
"epoch": 0.84,
"learning_rate": 4.0851927420784353e-05,
"loss": 1.5449,
"step": 3580
},
{
"epoch": 0.85,
"learning_rate": 4.080414474926226e-05,
"loss": 1.6524,
"step": 3590
},
{
"epoch": 0.85,
"learning_rate": 4.075626571892105e-05,
"loss": 1.5543,
"step": 3600
},
{
"epoch": 0.85,
"learning_rate": 4.0708290621682045e-05,
"loss": 1.565,
"step": 3610
},
{
"epoch": 0.85,
"learning_rate": 4.066021975005228e-05,
"loss": 1.6035,
"step": 3620
},
{
"epoch": 0.86,
"learning_rate": 4.061205339712275e-05,
"loss": 1.537,
"step": 3630
},
{
"epoch": 0.86,
"learning_rate": 4.0563791856566616e-05,
"loss": 1.5907,
"step": 3640
},
{
"epoch": 0.86,
"learning_rate": 4.051543542263736e-05,
"loss": 1.558,
"step": 3650
},
{
"epoch": 0.86,
"learning_rate": 4.046698439016708e-05,
"loss": 1.6066,
"step": 3660
},
{
"epoch": 0.87,
"learning_rate": 4.0418439054564615e-05,
"loss": 1.6129,
"step": 3670
},
{
"epoch": 0.87,
"learning_rate": 4.036979971181382e-05,
"loss": 1.6739,
"step": 3680
},
{
"epoch": 0.87,
"learning_rate": 4.0321066658471646e-05,
"loss": 1.5924,
"step": 3690
},
{
"epoch": 0.87,
"learning_rate": 4.027224019166648e-05,
"loss": 1.5308,
"step": 3700
},
{
"epoch": 0.87,
"learning_rate": 4.0223320609096195e-05,
"loss": 1.6009,
"step": 3710
},
{
"epoch": 0.88,
"learning_rate": 4.0174308209026435e-05,
"loss": 1.6631,
"step": 3720
},
{
"epoch": 0.88,
"learning_rate": 4.012520329028874e-05,
"loss": 1.5813,
"step": 3730
},
{
"epoch": 0.88,
"learning_rate": 4.007600615227876e-05,
"loss": 1.619,
"step": 3740
},
{
"epoch": 0.88,
"learning_rate": 4.002671709495438e-05,
"loss": 1.5654,
"step": 3750
},
{
"epoch": 0.89,
"learning_rate": 3.997733641883395e-05,
"loss": 1.5944,
"step": 3760
},
{
"epoch": 0.89,
"learning_rate": 3.992786442499442e-05,
"loss": 1.654,
"step": 3770
},
{
"epoch": 0.89,
"learning_rate": 3.98783014150695e-05,
"loss": 1.5784,
"step": 3780
},
{
"epoch": 0.89,
"learning_rate": 3.9828647691247836e-05,
"loss": 1.5812,
"step": 3790
},
{
"epoch": 0.9,
"learning_rate": 3.977890355627116e-05,
"loss": 1.5983,
"step": 3800
},
{
"epoch": 0.9,
"learning_rate": 3.9729069313432454e-05,
"loss": 1.633,
"step": 3810
},
{
"epoch": 0.9,
"learning_rate": 3.967914526657408e-05,
"loss": 1.639,
"step": 3820
},
{
"epoch": 0.9,
"learning_rate": 3.9629131720085966e-05,
"loss": 1.5822,
"step": 3830
},
{
"epoch": 0.91,
"learning_rate": 3.957902897890369e-05,
"loss": 1.6079,
"step": 3840
},
{
"epoch": 0.91,
"learning_rate": 3.952883734850667e-05,
"loss": 1.6206,
"step": 3850
},
{
"epoch": 0.91,
"learning_rate": 3.947855713491631e-05,
"loss": 1.5733,
"step": 3860
},
{
"epoch": 0.91,
"learning_rate": 3.942818864469407e-05,
"loss": 1.6351,
"step": 3870
},
{
"epoch": 0.91,
"learning_rate": 3.9377732184939664e-05,
"loss": 1.6283,
"step": 3880
},
{
"epoch": 0.92,
"learning_rate": 3.9327188063289156e-05,
"loss": 1.6428,
"step": 3890
},
{
"epoch": 0.92,
"learning_rate": 3.9276556587913096e-05,
"loss": 1.6037,
"step": 3900
},
{
"epoch": 0.92,
"learning_rate": 3.922583806751461e-05,
"loss": 1.5311,
"step": 3910
},
{
"epoch": 0.92,
"learning_rate": 3.917503281132758e-05,
"loss": 1.5731,
"step": 3920
},
{
"epoch": 0.93,
"learning_rate": 3.9124141129114695e-05,
"loss": 1.6292,
"step": 3930
},
{
"epoch": 0.93,
"learning_rate": 3.90731633311656e-05,
"loss": 1.5781,
"step": 3940
},
{
"epoch": 0.93,
"learning_rate": 3.902209972829498e-05,
"loss": 1.5801,
"step": 3950
},
{
"epoch": 0.93,
"learning_rate": 3.897095063184069e-05,
"loss": 1.642,
"step": 3960
},
{
"epoch": 0.94,
"learning_rate": 3.8919716353661846e-05,
"loss": 1.5843,
"step": 3970
},
{
"epoch": 0.94,
"learning_rate": 3.886839720613691e-05,
"loss": 1.6273,
"step": 3980
},
{
"epoch": 0.94,
"learning_rate": 3.8816993502161815e-05,
"loss": 1.675,
"step": 3990
},
{
"epoch": 0.94,
"learning_rate": 3.876550555514802e-05,
"loss": 1.6156,
"step": 4000
},
{
"epoch": 0.94,
"eval_loss": 1.5981309413909912,
"eval_runtime": 120.2627,
"eval_samples_per_second": 11.4,
"eval_steps_per_second": 2.852,
"step": 4000
},
{
"epoch": 0.95,
"learning_rate": 3.8713933679020634e-05,
"loss": 1.6232,
"step": 4010
},
{
"epoch": 0.95,
"learning_rate": 3.8662278188216485e-05,
"loss": 1.5513,
"step": 4020
},
{
"epoch": 0.95,
"learning_rate": 3.861053939768218e-05,
"loss": 1.5522,
"step": 4030
},
{
"epoch": 0.95,
"learning_rate": 3.855871762287225e-05,
"loss": 1.6226,
"step": 4040
},
{
"epoch": 0.95,
"learning_rate": 3.8506813179747165e-05,
"loss": 1.6186,
"step": 4050
},
{
"epoch": 0.96,
"learning_rate": 3.8454826384771426e-05,
"loss": 1.5325,
"step": 4060
},
{
"epoch": 0.96,
"learning_rate": 3.840275755491164e-05,
"loss": 1.5972,
"step": 4070
},
{
"epoch": 0.96,
"learning_rate": 3.83506070076346e-05,
"loss": 1.603,
"step": 4080
},
{
"epoch": 0.96,
"learning_rate": 3.82983750609053e-05,
"loss": 1.517,
"step": 4090
},
{
"epoch": 0.97,
"learning_rate": 3.824606203318507e-05,
"loss": 1.6564,
"step": 4100
},
{
"epoch": 0.97,
"learning_rate": 3.819366824342959e-05,
"loss": 1.656,
"step": 4110
},
{
"epoch": 0.97,
"learning_rate": 3.814119401108692e-05,
"loss": 1.6301,
"step": 4120
},
{
"epoch": 0.97,
"learning_rate": 3.8088639656095614e-05,
"loss": 1.5871,
"step": 4130
},
{
"epoch": 0.98,
"learning_rate": 3.803600549888273e-05,
"loss": 1.6384,
"step": 4140
},
{
"epoch": 0.98,
"learning_rate": 3.7983291860361866e-05,
"loss": 1.559,
"step": 4150
},
{
"epoch": 0.98,
"learning_rate": 3.793049906193127e-05,
"loss": 1.5624,
"step": 4160
},
{
"epoch": 0.98,
"learning_rate": 3.78776274254718e-05,
"loss": 1.5147,
"step": 4170
},
{
"epoch": 0.99,
"learning_rate": 3.782467727334496e-05,
"loss": 1.6521,
"step": 4180
},
{
"epoch": 0.99,
"learning_rate": 3.7771648928391045e-05,
"loss": 1.6049,
"step": 4190
},
{
"epoch": 0.99,
"learning_rate": 3.771854271392703e-05,
"loss": 1.6414,
"step": 4200
},
{
"epoch": 0.99,
"learning_rate": 3.766535895374472e-05,
"loss": 1.5619,
"step": 4210
},
{
"epoch": 0.99,
"learning_rate": 3.761209797210866e-05,
"loss": 1.5992,
"step": 4220
},
{
"epoch": 1.0,
"learning_rate": 3.755876009375428e-05,
"loss": 1.5478,
"step": 4230
},
{
"epoch": 1.0,
"learning_rate": 3.750534564388582e-05,
"loss": 1.6274,
"step": 4240
},
{
"epoch": 1.0,
"learning_rate": 3.745185494817438e-05,
"loss": 1.5732,
"step": 4250
},
{
"epoch": 1.0,
"learning_rate": 3.7398288332755936e-05,
"loss": 1.5372,
"step": 4260
},
{
"epoch": 1.01,
"learning_rate": 3.7344646124229376e-05,
"loss": 1.5875,
"step": 4270
},
{
"epoch": 1.01,
"learning_rate": 3.7290928649654446e-05,
"loss": 1.642,
"step": 4280
},
{
"epoch": 1.01,
"learning_rate": 3.723713623654983e-05,
"loss": 1.5793,
"step": 4290
},
{
"epoch": 1.01,
"learning_rate": 3.718326921289108e-05,
"loss": 1.644,
"step": 4300
},
{
"epoch": 1.02,
"learning_rate": 3.712932790710869e-05,
"loss": 1.5677,
"step": 4310
},
{
"epoch": 1.02,
"learning_rate": 3.7075312648086036e-05,
"loss": 1.5733,
"step": 4320
},
{
"epoch": 1.02,
"learning_rate": 3.702122376515739e-05,
"loss": 1.5445,
"step": 4330
},
{
"epoch": 1.02,
"learning_rate": 3.696706158810591e-05,
"loss": 1.6533,
"step": 4340
},
{
"epoch": 1.03,
"learning_rate": 3.691282644716165e-05,
"loss": 1.5779,
"step": 4350
},
{
"epoch": 1.03,
"learning_rate": 3.685851867299953e-05,
"loss": 1.5356,
"step": 4360
},
{
"epoch": 1.03,
"learning_rate": 3.680413859673728e-05,
"loss": 1.6036,
"step": 4370
},
{
"epoch": 1.03,
"learning_rate": 3.674968654993352e-05,
"loss": 1.5826,
"step": 4380
},
{
"epoch": 1.04,
"learning_rate": 3.669516286458562e-05,
"loss": 1.5973,
"step": 4390
},
{
"epoch": 1.04,
"learning_rate": 3.664056787312779e-05,
"loss": 1.5661,
"step": 4400
},
{
"epoch": 1.04,
"learning_rate": 3.6585901908428946e-05,
"loss": 1.5681,
"step": 4410
},
{
"epoch": 1.04,
"learning_rate": 3.653116530379077e-05,
"loss": 1.565,
"step": 4420
},
{
"epoch": 1.04,
"learning_rate": 3.647635839294561e-05,
"loss": 1.5517,
"step": 4430
},
{
"epoch": 1.05,
"learning_rate": 3.642148151005452e-05,
"loss": 1.588,
"step": 4440
},
{
"epoch": 1.05,
"learning_rate": 3.636653498970512e-05,
"loss": 1.592,
"step": 4450
},
{
"epoch": 1.05,
"learning_rate": 3.6311519166909656e-05,
"loss": 1.5633,
"step": 4460
},
{
"epoch": 1.05,
"learning_rate": 3.62564343771029e-05,
"loss": 1.6122,
"step": 4470
},
{
"epoch": 1.06,
"learning_rate": 3.620128095614012e-05,
"loss": 1.6046,
"step": 4480
},
{
"epoch": 1.06,
"learning_rate": 3.614605924029504e-05,
"loss": 1.5932,
"step": 4490
},
{
"epoch": 1.06,
"learning_rate": 3.6090769566257767e-05,
"loss": 1.6165,
"step": 4500
},
{
"epoch": 1.06,
"learning_rate": 3.603541227113276e-05,
"loss": 1.5227,
"step": 4510
},
{
"epoch": 1.07,
"learning_rate": 3.597998769243678e-05,
"loss": 1.5661,
"step": 4520
},
{
"epoch": 1.07,
"learning_rate": 3.592449616809681e-05,
"loss": 1.5499,
"step": 4530
},
{
"epoch": 1.07,
"learning_rate": 3.5868938036448e-05,
"loss": 1.5931,
"step": 4540
},
{
"epoch": 1.07,
"learning_rate": 3.581331363623161e-05,
"loss": 1.5451,
"step": 4550
},
{
"epoch": 1.08,
"learning_rate": 3.5757623306592955e-05,
"loss": 1.5564,
"step": 4560
},
{
"epoch": 1.08,
"learning_rate": 3.570186738707931e-05,
"loss": 1.5816,
"step": 4570
},
{
"epoch": 1.08,
"learning_rate": 3.564604621763786e-05,
"loss": 1.6446,
"step": 4580
},
{
"epoch": 1.08,
"learning_rate": 3.559016013861364e-05,
"loss": 1.594,
"step": 4590
},
{
"epoch": 1.08,
"learning_rate": 3.553420949074742e-05,
"loss": 1.6004,
"step": 4600
},
{
"epoch": 1.09,
"learning_rate": 3.5478194615173655e-05,
"loss": 1.5862,
"step": 4610
},
{
"epoch": 1.09,
"learning_rate": 3.5422115853418405e-05,
"loss": 1.6648,
"step": 4620
},
{
"epoch": 1.09,
"learning_rate": 3.536597354739725e-05,
"loss": 1.6372,
"step": 4630
},
{
"epoch": 1.09,
"learning_rate": 3.530976803941319e-05,
"loss": 1.5812,
"step": 4640
},
{
"epoch": 1.1,
"learning_rate": 3.52534996721546e-05,
"loss": 1.5302,
"step": 4650
},
{
"epoch": 1.1,
"learning_rate": 3.519716878869308e-05,
"loss": 1.5731,
"step": 4660
},
{
"epoch": 1.1,
"learning_rate": 3.51407757324814e-05,
"loss": 1.6117,
"step": 4670
},
{
"epoch": 1.1,
"learning_rate": 3.508432084735142e-05,
"loss": 1.5817,
"step": 4680
},
{
"epoch": 1.11,
"learning_rate": 3.502780447751196e-05,
"loss": 1.5911,
"step": 4690
},
{
"epoch": 1.11,
"learning_rate": 3.4971226967546714e-05,
"loss": 1.5234,
"step": 4700
},
{
"epoch": 1.11,
"learning_rate": 3.491458866241217e-05,
"loss": 1.5837,
"step": 4710
},
{
"epoch": 1.11,
"learning_rate": 3.485788990743546e-05,
"loss": 1.5487,
"step": 4720
},
{
"epoch": 1.12,
"learning_rate": 3.48011310483123e-05,
"loss": 1.5276,
"step": 4730
},
{
"epoch": 1.12,
"learning_rate": 3.474431243110486e-05,
"loss": 1.5954,
"step": 4740
},
{
"epoch": 1.12,
"learning_rate": 3.468743440223966e-05,
"loss": 1.5444,
"step": 4750
},
{
"epoch": 1.12,
"learning_rate": 3.463049730850546e-05,
"loss": 1.5836,
"step": 4760
},
{
"epoch": 1.12,
"learning_rate": 3.457350149705113e-05,
"loss": 1.547,
"step": 4770
},
{
"epoch": 1.13,
"learning_rate": 3.451644731538357e-05,
"loss": 1.6454,
"step": 4780
},
{
"epoch": 1.13,
"learning_rate": 3.4459335111365533e-05,
"loss": 1.5334,
"step": 4790
},
{
"epoch": 1.13,
"learning_rate": 3.440216523321356e-05,
"loss": 1.5022,
"step": 4800
},
{
"epoch": 1.13,
"learning_rate": 3.434493802949582e-05,
"loss": 1.5177,
"step": 4810
},
{
"epoch": 1.14,
"learning_rate": 3.428765384913004e-05,
"loss": 1.5837,
"step": 4820
},
{
"epoch": 1.14,
"learning_rate": 3.4230313041381265e-05,
"loss": 1.5765,
"step": 4830
},
{
"epoch": 1.14,
"learning_rate": 3.417291595585987e-05,
"loss": 1.5551,
"step": 4840
},
{
"epoch": 1.14,
"learning_rate": 3.411546294251932e-05,
"loss": 1.6088,
"step": 4850
},
{
"epoch": 1.15,
"learning_rate": 3.405795435165409e-05,
"loss": 1.5787,
"step": 4860
},
{
"epoch": 1.15,
"learning_rate": 3.400039053389751e-05,
"loss": 1.6321,
"step": 4870
},
{
"epoch": 1.15,
"learning_rate": 3.394277184021962e-05,
"loss": 1.5318,
"step": 4880
},
{
"epoch": 1.15,
"learning_rate": 3.388509862192507e-05,
"loss": 1.4413,
"step": 4890
},
{
"epoch": 1.16,
"learning_rate": 3.382737123065092e-05,
"loss": 1.5207,
"step": 4900
},
{
"epoch": 1.16,
"learning_rate": 3.3769590018364564e-05,
"loss": 1.6031,
"step": 4910
},
{
"epoch": 1.16,
"learning_rate": 3.371175533736148e-05,
"loss": 1.6052,
"step": 4920
},
{
"epoch": 1.16,
"learning_rate": 3.365386754026323e-05,
"loss": 1.5122,
"step": 4930
},
{
"epoch": 1.16,
"learning_rate": 3.359592698001516e-05,
"loss": 1.6031,
"step": 4940
},
{
"epoch": 1.17,
"learning_rate": 3.353793400988436e-05,
"loss": 1.5539,
"step": 4950
},
{
"epoch": 1.17,
"learning_rate": 3.3479888983457454e-05,
"loss": 1.5709,
"step": 4960
},
{
"epoch": 1.17,
"learning_rate": 3.342179225463843e-05,
"loss": 1.5619,
"step": 4970
},
{
"epoch": 1.17,
"learning_rate": 3.336364417764654e-05,
"loss": 1.5903,
"step": 4980
},
{
"epoch": 1.18,
"learning_rate": 3.330544510701411e-05,
"loss": 1.5481,
"step": 4990
},
{
"epoch": 1.18,
"learning_rate": 3.324719539758435e-05,
"loss": 1.5959,
"step": 5000
},
{
"epoch": 1.18,
"eval_loss": 1.592111349105835,
"eval_runtime": 120.2035,
"eval_samples_per_second": 11.406,
"eval_steps_per_second": 2.853,
"step": 5000
},
{
"epoch": 1.18,
"learning_rate": 3.3188895404509254e-05,
"loss": 1.5371,
"step": 5010
},
{
"epoch": 1.18,
"learning_rate": 3.313054548324737e-05,
"loss": 1.6071,
"step": 5020
},
{
"epoch": 1.19,
"learning_rate": 3.307214598956165e-05,
"loss": 1.5601,
"step": 5030
},
{
"epoch": 1.19,
"learning_rate": 3.3013697279517346e-05,
"loss": 1.4761,
"step": 5040
},
{
"epoch": 1.19,
"learning_rate": 3.295519970947973e-05,
"loss": 1.5097,
"step": 5050
},
{
"epoch": 1.19,
"learning_rate": 3.289665363611201e-05,
"loss": 1.6271,
"step": 5060
},
{
"epoch": 1.2,
"learning_rate": 3.2838059416373094e-05,
"loss": 1.6295,
"step": 5070
},
{
"epoch": 1.2,
"learning_rate": 3.277941740751548e-05,
"loss": 1.5371,
"step": 5080
},
{
"epoch": 1.2,
"learning_rate": 3.272072796708299e-05,
"loss": 1.641,
"step": 5090
},
{
"epoch": 1.2,
"learning_rate": 3.266199145290868e-05,
"loss": 1.5497,
"step": 5100
},
{
"epoch": 1.2,
"learning_rate": 3.260320822311259e-05,
"loss": 1.5779,
"step": 5110
},
{
"epoch": 1.21,
"learning_rate": 3.2544378636099625e-05,
"loss": 1.6502,
"step": 5120
},
{
"epoch": 1.21,
"learning_rate": 3.248550305055728e-05,
"loss": 1.636,
"step": 5130
},
{
"epoch": 1.21,
"learning_rate": 3.242658182545356e-05,
"loss": 1.5893,
"step": 5140
},
{
"epoch": 1.21,
"learning_rate": 3.2367615320034675e-05,
"loss": 1.5649,
"step": 5150
},
{
"epoch": 1.22,
"learning_rate": 3.2308603893822985e-05,
"loss": 1.6139,
"step": 5160
},
{
"epoch": 1.22,
"learning_rate": 3.224954790661469e-05,
"loss": 1.4683,
"step": 5170
},
{
"epoch": 1.22,
"learning_rate": 3.219044771847767e-05,
"loss": 1.5228,
"step": 5180
},
{
"epoch": 1.22,
"learning_rate": 3.2131303689749334e-05,
"loss": 1.5809,
"step": 5190
},
{
"epoch": 1.23,
"learning_rate": 3.2072116181034364e-05,
"loss": 1.5908,
"step": 5200
},
{
"epoch": 1.23,
"learning_rate": 3.201288555320256e-05,
"loss": 1.5981,
"step": 5210
},
{
"epoch": 1.23,
"learning_rate": 3.1953612167386624e-05,
"loss": 1.566,
"step": 5220
},
{
"epoch": 1.23,
"learning_rate": 3.189429638497994e-05,
"loss": 1.6265,
"step": 5230
},
{
"epoch": 1.24,
"learning_rate": 3.183493856763438e-05,
"loss": 1.5968,
"step": 5240
},
{
"epoch": 1.24,
"learning_rate": 3.177553907725814e-05,
"loss": 1.5487,
"step": 5250
},
{
"epoch": 1.24,
"learning_rate": 3.171609827601347e-05,
"loss": 1.5437,
"step": 5260
},
{
"epoch": 1.24,
"learning_rate": 3.16566165263145e-05,
"loss": 1.6033,
"step": 5270
},
{
"epoch": 1.24,
"learning_rate": 3.159709419082503e-05,
"loss": 1.5619,
"step": 5280
},
{
"epoch": 1.25,
"learning_rate": 3.153753163245632e-05,
"loss": 1.595,
"step": 5290
},
{
"epoch": 1.25,
"learning_rate": 3.147792921436484e-05,
"loss": 1.5967,
"step": 5300
},
{
"epoch": 1.25,
"learning_rate": 3.1418287299950136e-05,
"loss": 1.6131,
"step": 5310
},
{
"epoch": 1.25,
"learning_rate": 3.1358606252852526e-05,
"loss": 1.564,
"step": 5320
},
{
"epoch": 1.26,
"learning_rate": 3.1298886436950946e-05,
"loss": 1.5854,
"step": 5330
},
{
"epoch": 1.26,
"learning_rate": 3.1239128216360696e-05,
"loss": 1.5676,
"step": 5340
},
{
"epoch": 1.26,
"learning_rate": 3.117933195543122e-05,
"loss": 1.5799,
"step": 5350
},
{
"epoch": 1.26,
"learning_rate": 3.111949801874393e-05,
"loss": 1.5614,
"step": 5360
},
{
"epoch": 1.27,
"learning_rate": 3.105962677110991e-05,
"loss": 1.5621,
"step": 5370
},
{
"epoch": 1.27,
"learning_rate": 3.099971857756777e-05,
"loss": 1.64,
"step": 5380
},
{
"epoch": 1.27,
"learning_rate": 3.093977380338134e-05,
"loss": 1.5687,
"step": 5390
},
{
"epoch": 1.27,
"learning_rate": 3.0879792814037524e-05,
"loss": 1.6168,
"step": 5400
},
{
"epoch": 1.28,
"learning_rate": 3.0819775975244005e-05,
"loss": 1.5049,
"step": 5410
},
{
"epoch": 1.28,
"learning_rate": 3.075972365292706e-05,
"loss": 1.6166,
"step": 5420
},
{
"epoch": 1.28,
"learning_rate": 3.0699636213229294e-05,
"loss": 1.5757,
"step": 5430
},
{
"epoch": 1.28,
"learning_rate": 3.0639514022507436e-05,
"loss": 1.5442,
"step": 5440
},
{
"epoch": 1.28,
"learning_rate": 3.057935744733009e-05,
"loss": 1.5417,
"step": 5450
},
{
"epoch": 1.29,
"learning_rate": 3.051916685447551e-05,
"loss": 1.556,
"step": 5460
},
{
"epoch": 1.29,
"learning_rate": 3.0458942610929353e-05,
"loss": 1.5974,
"step": 5470
},
{
"epoch": 1.29,
"learning_rate": 3.0398685083882438e-05,
"loss": 1.5775,
"step": 5480
},
{
"epoch": 1.29,
"learning_rate": 3.0338394640728533e-05,
"loss": 1.5323,
"step": 5490
},
{
"epoch": 1.3,
"learning_rate": 3.027807164906209e-05,
"loss": 1.6194,
"step": 5500
},
{
"epoch": 1.3,
"learning_rate": 3.0217716476676005e-05,
"loss": 1.5799,
"step": 5510
},
{
"epoch": 1.3,
"learning_rate": 3.0157329491559382e-05,
"loss": 1.5676,
"step": 5520
},
{
"epoch": 1.3,
"learning_rate": 3.0096911061895306e-05,
"loss": 1.5538,
"step": 5530
},
{
"epoch": 1.31,
"learning_rate": 3.0036461556058552e-05,
"loss": 1.5787,
"step": 5540
},
{
"epoch": 1.31,
"learning_rate": 2.9975981342613406e-05,
"loss": 1.5615,
"step": 5550
},
{
"epoch": 1.31,
"learning_rate": 2.9915470790311338e-05,
"loss": 1.5831,
"step": 5560
},
{
"epoch": 1.31,
"learning_rate": 2.9854930268088845e-05,
"loss": 1.6376,
"step": 5570
},
{
"epoch": 1.32,
"learning_rate": 2.9794360145065093e-05,
"loss": 1.5673,
"step": 5580
},
{
"epoch": 1.32,
"learning_rate": 2.9733760790539784e-05,
"loss": 1.5465,
"step": 5590
},
{
"epoch": 1.32,
"learning_rate": 2.9673132573990796e-05,
"loss": 1.5375,
"step": 5600
},
{
"epoch": 1.32,
"learning_rate": 2.961247586507203e-05,
"loss": 1.6384,
"step": 5610
},
{
"epoch": 1.33,
"learning_rate": 2.955179103361106e-05,
"loss": 1.608,
"step": 5620
},
{
"epoch": 1.33,
"learning_rate": 2.9491078449606958e-05,
"loss": 1.6231,
"step": 5630
},
{
"epoch": 1.33,
"learning_rate": 2.9430338483227982e-05,
"loss": 1.5672,
"step": 5640
},
{
"epoch": 1.33,
"learning_rate": 2.9369571504809368e-05,
"loss": 1.5708,
"step": 5650
},
{
"epoch": 1.33,
"learning_rate": 2.9308777884851013e-05,
"loss": 1.5704,
"step": 5660
},
{
"epoch": 1.34,
"learning_rate": 2.924795799401528e-05,
"loss": 1.5602,
"step": 5670
},
{
"epoch": 1.34,
"learning_rate": 2.9187112203124687e-05,
"loss": 1.5754,
"step": 5680
},
{
"epoch": 1.34,
"learning_rate": 2.9126240883159684e-05,
"loss": 1.5619,
"step": 5690
},
{
"epoch": 1.34,
"learning_rate": 2.9065344405256345e-05,
"loss": 1.5708,
"step": 5700
},
{
"epoch": 1.35,
"learning_rate": 2.9004423140704162e-05,
"loss": 1.6248,
"step": 5710
},
{
"epoch": 1.35,
"learning_rate": 2.894347746094374e-05,
"loss": 1.5462,
"step": 5720
},
{
"epoch": 1.35,
"learning_rate": 2.8882507737564546e-05,
"loss": 1.523,
"step": 5730
},
{
"epoch": 1.35,
"learning_rate": 2.8821514342302646e-05,
"loss": 1.5787,
"step": 5740
},
{
"epoch": 1.36,
"learning_rate": 2.876049764703842e-05,
"loss": 1.5542,
"step": 5750
},
{
"epoch": 1.36,
"learning_rate": 2.8699458023794342e-05,
"loss": 1.503,
"step": 5760
},
{
"epoch": 1.36,
"learning_rate": 2.8638395844732636e-05,
"loss": 1.554,
"step": 5770
},
{
"epoch": 1.36,
"learning_rate": 2.857731148215309e-05,
"loss": 1.5576,
"step": 5780
},
{
"epoch": 1.37,
"learning_rate": 2.8516205308490718e-05,
"loss": 1.5663,
"step": 5790
},
{
"epoch": 1.37,
"learning_rate": 2.8455077696313536e-05,
"loss": 1.5718,
"step": 5800
},
{
"epoch": 1.37,
"learning_rate": 2.8393929018320264e-05,
"loss": 1.5952,
"step": 5810
},
{
"epoch": 1.37,
"learning_rate": 2.8332759647338047e-05,
"loss": 1.6014,
"step": 5820
},
{
"epoch": 1.37,
"learning_rate": 2.827156995632024e-05,
"loss": 1.5607,
"step": 5830
},
{
"epoch": 1.38,
"learning_rate": 2.8210360318344032e-05,
"loss": 1.6279,
"step": 5840
},
{
"epoch": 1.38,
"learning_rate": 2.8149131106608284e-05,
"loss": 1.5182,
"step": 5850
},
{
"epoch": 1.38,
"learning_rate": 2.8087882694431156e-05,
"loss": 1.6032,
"step": 5860
},
{
"epoch": 1.38,
"learning_rate": 2.80266154552479e-05,
"loss": 1.5395,
"step": 5870
},
{
"epoch": 1.39,
"learning_rate": 2.796532976260856e-05,
"loss": 1.5687,
"step": 5880
},
{
"epoch": 1.39,
"learning_rate": 2.7904025990175675e-05,
"loss": 1.5735,
"step": 5890
},
{
"epoch": 1.39,
"learning_rate": 2.7842704511722017e-05,
"loss": 1.5653,
"step": 5900
},
{
"epoch": 1.39,
"learning_rate": 2.7781365701128333e-05,
"loss": 1.5791,
"step": 5910
},
{
"epoch": 1.4,
"learning_rate": 2.7720009932381024e-05,
"loss": 1.5829,
"step": 5920
},
{
"epoch": 1.4,
"learning_rate": 2.76586375795699e-05,
"loss": 1.5581,
"step": 5930
},
{
"epoch": 1.4,
"learning_rate": 2.7597249016885878e-05,
"loss": 1.6085,
"step": 5940
},
{
"epoch": 1.4,
"learning_rate": 2.753584461861871e-05,
"loss": 1.5713,
"step": 5950
},
{
"epoch": 1.41,
"learning_rate": 2.74744247591547e-05,
"loss": 1.5618,
"step": 5960
},
{
"epoch": 1.41,
"learning_rate": 2.7412989812974416e-05,
"loss": 1.6267,
"step": 5970
},
{
"epoch": 1.41,
"learning_rate": 2.7351540154650408e-05,
"loss": 1.6045,
"step": 5980
},
{
"epoch": 1.41,
"learning_rate": 2.7290076158844935e-05,
"loss": 1.5682,
"step": 5990
},
{
"epoch": 1.41,
"learning_rate": 2.7228598200307666e-05,
"loss": 1.5716,
"step": 6000
},
{
"epoch": 1.41,
"eval_loss": 1.5865955352783203,
"eval_runtime": 120.5077,
"eval_samples_per_second": 11.377,
"eval_steps_per_second": 2.846,
"step": 6000
},
{
"epoch": 1.42,
"learning_rate": 2.716710665387341e-05,
"loss": 1.5948,
"step": 6010
},
{
"epoch": 1.42,
"learning_rate": 2.710560189445981e-05,
"loss": 1.5397,
"step": 6020
},
{
"epoch": 1.42,
"learning_rate": 2.704408429706508e-05,
"loss": 1.5307,
"step": 6030
},
{
"epoch": 1.42,
"learning_rate": 2.6982554236765704e-05,
"loss": 1.5215,
"step": 6040
},
{
"epoch": 1.43,
"learning_rate": 2.692101208871415e-05,
"loss": 1.6056,
"step": 6050
},
{
"epoch": 1.43,
"learning_rate": 2.6859458228136592e-05,
"loss": 1.5652,
"step": 6060
},
{
"epoch": 1.43,
"learning_rate": 2.6797893030330607e-05,
"loss": 1.5155,
"step": 6070
},
{
"epoch": 1.43,
"learning_rate": 2.6736316870662904e-05,
"loss": 1.6073,
"step": 6080
},
{
"epoch": 1.44,
"learning_rate": 2.6674730124567023e-05,
"loss": 1.5765,
"step": 6090
},
{
"epoch": 1.44,
"learning_rate": 2.6613133167541055e-05,
"loss": 1.601,
"step": 6100
},
{
"epoch": 1.44,
"learning_rate": 2.6551526375145342e-05,
"loss": 1.6192,
"step": 6110
},
{
"epoch": 1.44,
"learning_rate": 2.6489910123000195e-05,
"loss": 1.5599,
"step": 6120
},
{
"epoch": 1.45,
"learning_rate": 2.6428284786783597e-05,
"loss": 1.5574,
"step": 6130
},
{
"epoch": 1.45,
"learning_rate": 2.6366650742228937e-05,
"loss": 1.609,
"step": 6140
},
{
"epoch": 1.45,
"learning_rate": 2.6305008365122664e-05,
"loss": 1.5537,
"step": 6150
},
{
"epoch": 1.45,
"learning_rate": 2.6243358031302067e-05,
"loss": 1.4887,
"step": 6160
},
{
"epoch": 1.45,
"learning_rate": 2.6181700116652917e-05,
"loss": 1.604,
"step": 6170
},
{
"epoch": 1.46,
"learning_rate": 2.612003499710724e-05,
"loss": 1.5129,
"step": 6180
},
{
"epoch": 1.46,
"learning_rate": 2.6058363048640948e-05,
"loss": 1.5484,
"step": 6190
},
{
"epoch": 1.46,
"learning_rate": 2.5996684647271635e-05,
"loss": 1.5315,
"step": 6200
},
{
"epoch": 1.46,
"learning_rate": 2.59350001690562e-05,
"loss": 1.5443,
"step": 6210
},
{
"epoch": 1.47,
"learning_rate": 2.5873309990088612e-05,
"loss": 1.5461,
"step": 6220
},
{
"epoch": 1.47,
"learning_rate": 2.5811614486497605e-05,
"loss": 1.5906,
"step": 6230
},
{
"epoch": 1.47,
"learning_rate": 2.574991403444435e-05,
"loss": 1.5412,
"step": 6240
},
{
"epoch": 1.47,
"learning_rate": 2.5688209010120225e-05,
"loss": 1.5833,
"step": 6250
},
{
"epoch": 1.48,
"learning_rate": 2.562649978974445e-05,
"loss": 1.5442,
"step": 6260
},
{
"epoch": 1.48,
"learning_rate": 2.556478674956186e-05,
"loss": 1.5377,
"step": 6270
},
{
"epoch": 1.48,
"learning_rate": 2.5503070265840556e-05,
"loss": 1.5521,
"step": 6280
},
{
"epoch": 1.48,
"learning_rate": 2.5441350714869644e-05,
"loss": 1.5607,
"step": 6290
},
{
"epoch": 1.49,
"learning_rate": 2.5379628472956933e-05,
"loss": 1.5642,
"step": 6300
},
{
"epoch": 1.49,
"learning_rate": 2.5317903916426645e-05,
"loss": 1.5422,
"step": 6310
},
{
"epoch": 1.49,
"learning_rate": 2.5256177421617088e-05,
"loss": 1.5729,
"step": 6320
},
{
"epoch": 1.49,
"learning_rate": 2.519444936487842e-05,
"loss": 1.5666,
"step": 6330
},
{
"epoch": 1.49,
"learning_rate": 2.5132720122570298e-05,
"loss": 1.5788,
"step": 6340
},
{
"epoch": 1.5,
"learning_rate": 2.507099007105963e-05,
"loss": 1.4995,
"step": 6350
},
{
"epoch": 1.5,
"learning_rate": 2.500925958671823e-05,
"loss": 1.6433,
"step": 6360
},
{
"epoch": 1.5,
"learning_rate": 2.494752904592058e-05,
"loss": 1.5099,
"step": 6370
},
{
"epoch": 1.5,
"learning_rate": 2.4885798825041488e-05,
"loss": 1.6121,
"step": 6380
},
{
"epoch": 1.51,
"learning_rate": 2.4824069300453815e-05,
"loss": 1.551,
"step": 6390
},
{
"epoch": 1.51,
"learning_rate": 2.4762340848526162e-05,
"loss": 1.5144,
"step": 6400
},
{
"epoch": 1.51,
"learning_rate": 2.4700613845620632e-05,
"loss": 1.5084,
"step": 6410
},
{
"epoch": 1.51,
"learning_rate": 2.4638888668090457e-05,
"loss": 1.5896,
"step": 6420
},
{
"epoch": 1.52,
"learning_rate": 2.4577165692277744e-05,
"loss": 1.5006,
"step": 6430
},
{
"epoch": 1.52,
"learning_rate": 2.4515445294511176e-05,
"loss": 1.5435,
"step": 6440
},
{
"epoch": 1.52,
"learning_rate": 2.445372785110374e-05,
"loss": 1.5812,
"step": 6450
},
{
"epoch": 1.52,
"learning_rate": 2.439201373835039e-05,
"loss": 1.5867,
"step": 6460
},
{
"epoch": 1.53,
"learning_rate": 2.433030333252576e-05,
"loss": 1.5748,
"step": 6470
},
{
"epoch": 1.53,
"learning_rate": 2.42685970098819e-05,
"loss": 1.5986,
"step": 6480
},
{
"epoch": 1.53,
"learning_rate": 2.42130651215743e-05,
"loss": 1.5599,
"step": 6490
},
{
"epoch": 1.53,
"learning_rate": 2.4151367593457314e-05,
"loss": 1.549,
"step": 6500
},
{
"epoch": 1.53,
"learning_rate": 2.4089675239503044e-05,
"loss": 1.5109,
"step": 6510
},
{
"epoch": 1.54,
"learning_rate": 2.4027988435853466e-05,
"loss": 1.5712,
"step": 6520
},
{
"epoch": 1.54,
"learning_rate": 2.3966307558616745e-05,
"loss": 1.5581,
"step": 6530
},
{
"epoch": 1.54,
"learning_rate": 2.3904632983864885e-05,
"loss": 1.5607,
"step": 6540
},
{
"epoch": 1.54,
"learning_rate": 2.384296508763147e-05,
"loss": 1.598,
"step": 6550
},
{
"epoch": 1.55,
"learning_rate": 2.378130424590935e-05,
"loss": 1.5464,
"step": 6560
},
{
"epoch": 1.55,
"learning_rate": 2.37196508346484e-05,
"loss": 1.5242,
"step": 6570
},
{
"epoch": 1.55,
"learning_rate": 2.365800522975316e-05,
"loss": 1.4973,
"step": 6580
},
{
"epoch": 1.55,
"learning_rate": 2.359636780708058e-05,
"loss": 1.6024,
"step": 6590
},
{
"epoch": 1.56,
"learning_rate": 2.353473894243772e-05,
"loss": 1.5798,
"step": 6600
},
{
"epoch": 1.56,
"learning_rate": 2.3473119011579485e-05,
"loss": 1.5767,
"step": 6610
},
{
"epoch": 1.56,
"learning_rate": 2.3411508390206286e-05,
"loss": 1.5836,
"step": 6620
},
{
"epoch": 1.56,
"learning_rate": 2.334990745396177e-05,
"loss": 1.5451,
"step": 6630
},
{
"epoch": 1.57,
"learning_rate": 2.328831657843054e-05,
"loss": 1.5379,
"step": 6640
},
{
"epoch": 1.57,
"learning_rate": 2.3226736139135876e-05,
"loss": 1.591,
"step": 6650
},
{
"epoch": 1.57,
"learning_rate": 2.316516651153741e-05,
"loss": 1.5998,
"step": 6660
},
{
"epoch": 1.57,
"learning_rate": 2.3103608071028848e-05,
"loss": 1.6216,
"step": 6670
},
{
"epoch": 1.58,
"learning_rate": 2.3042061192935705e-05,
"loss": 1.5791,
"step": 6680
},
{
"epoch": 1.58,
"learning_rate": 2.2980526252512972e-05,
"loss": 1.6064,
"step": 6690
},
{
"epoch": 1.58,
"learning_rate": 2.29190036249429e-05,
"loss": 1.6265,
"step": 6700
},
{
"epoch": 1.58,
"learning_rate": 2.2857493685332633e-05,
"loss": 1.5985,
"step": 6710
},
{
"epoch": 1.58,
"learning_rate": 2.2795996808711963e-05,
"loss": 1.591,
"step": 6720
},
{
"epoch": 1.59,
"learning_rate": 2.2734513370031025e-05,
"loss": 1.5104,
"step": 6730
},
{
"epoch": 1.59,
"learning_rate": 2.2673043744158057e-05,
"loss": 1.5631,
"step": 6740
},
{
"epoch": 1.59,
"learning_rate": 2.261158830587705e-05,
"loss": 1.5249,
"step": 6750
},
{
"epoch": 1.59,
"learning_rate": 2.25501474298855e-05,
"loss": 1.6217,
"step": 6760
},
{
"epoch": 1.6,
"learning_rate": 2.2488721490792104e-05,
"loss": 1.5736,
"step": 6770
},
{
"epoch": 1.6,
"learning_rate": 2.2427310863114513e-05,
"loss": 1.5733,
"step": 6780
},
{
"epoch": 1.6,
"learning_rate": 2.2365915921277004e-05,
"loss": 1.56,
"step": 6790
},
{
"epoch": 1.6,
"learning_rate": 2.2304537039608224e-05,
"loss": 1.5668,
"step": 6800
},
{
"epoch": 1.61,
"learning_rate": 2.22431745923389e-05,
"loss": 1.5134,
"step": 6810
},
{
"epoch": 1.61,
"learning_rate": 2.2181828953599556e-05,
"loss": 1.5995,
"step": 6820
},
{
"epoch": 1.61,
"learning_rate": 2.2120500497418238e-05,
"loss": 1.644,
"step": 6830
},
{
"epoch": 1.61,
"learning_rate": 2.2059189597718205e-05,
"loss": 1.5736,
"step": 6840
},
{
"epoch": 1.62,
"learning_rate": 2.200402510774106e-05,
"loss": 1.449,
"step": 6850
},
{
"epoch": 1.62,
"learning_rate": 2.194274859512892e-05,
"loss": 1.5933,
"step": 6860
},
{
"epoch": 1.62,
"learning_rate": 2.18814907227621e-05,
"loss": 1.5343,
"step": 6870
},
{
"epoch": 1.62,
"learning_rate": 2.182025186413352e-05,
"loss": 1.4848,
"step": 6880
},
{
"epoch": 1.62,
"learning_rate": 2.175903239262017e-05,
"loss": 1.5866,
"step": 6890
},
{
"epoch": 1.63,
"learning_rate": 2.1697832681480858e-05,
"loss": 1.5759,
"step": 6900
},
{
"epoch": 1.63,
"learning_rate": 2.1636653103853887e-05,
"loss": 1.5649,
"step": 6910
},
{
"epoch": 1.63,
"learning_rate": 2.157549403275481e-05,
"loss": 1.5768,
"step": 6920
},
{
"epoch": 1.63,
"learning_rate": 2.1514355841074157e-05,
"loss": 1.5945,
"step": 6930
},
{
"epoch": 1.64,
"learning_rate": 2.1453238901575158e-05,
"loss": 1.5329,
"step": 6940
},
{
"epoch": 1.64,
"learning_rate": 2.139214358689146e-05,
"loss": 1.5272,
"step": 6950
},
{
"epoch": 1.64,
"learning_rate": 2.1331070269524858e-05,
"loss": 1.6365,
"step": 6960
},
{
"epoch": 1.64,
"learning_rate": 2.1270019321843033e-05,
"loss": 1.5395,
"step": 6970
},
{
"epoch": 1.65,
"learning_rate": 2.120899111607728e-05,
"loss": 1.5281,
"step": 6980
},
{
"epoch": 1.65,
"learning_rate": 2.114798602432024e-05,
"loss": 1.6174,
"step": 6990
},
{
"epoch": 1.65,
"learning_rate": 2.108700441852361e-05,
"loss": 1.6281,
"step": 7000
},
{
"epoch": 1.65,
"eval_loss": 1.5833255052566528,
"eval_runtime": 120.1395,
"eval_samples_per_second": 11.412,
"eval_steps_per_second": 2.855,
"step": 7000
},
{
"epoch": 1.65,
"learning_rate": 2.1026046670495906e-05,
"loss": 1.6199,
"step": 7010
},
{
"epoch": 1.66,
"learning_rate": 2.0965113151900166e-05,
"loss": 1.6042,
"step": 7020
},
{
"epoch": 1.66,
"learning_rate": 2.090420423425172e-05,
"loss": 1.5406,
"step": 7030
},
{
"epoch": 1.66,
"learning_rate": 2.0843320288915903e-05,
"loss": 1.5542,
"step": 7040
},
{
"epoch": 1.66,
"learning_rate": 2.078246168710577e-05,
"loss": 1.5691,
"step": 7050
},
{
"epoch": 1.66,
"learning_rate": 2.072162879987986e-05,
"loss": 1.5629,
"step": 7060
},
{
"epoch": 1.67,
"learning_rate": 2.066082199813996e-05,
"loss": 1.6191,
"step": 7070
},
{
"epoch": 1.67,
"learning_rate": 2.0600041652628787e-05,
"loss": 1.6127,
"step": 7080
},
{
"epoch": 1.67,
"learning_rate": 2.0539288133927746e-05,
"loss": 1.5847,
"step": 7090
},
{
"epoch": 1.67,
"learning_rate": 2.0478561812454678e-05,
"loss": 1.591,
"step": 7100
},
{
"epoch": 1.68,
"learning_rate": 2.0417863058461633e-05,
"loss": 1.5478,
"step": 7110
},
{
"epoch": 1.68,
"learning_rate": 2.0357192242032547e-05,
"loss": 1.5991,
"step": 7120
},
{
"epoch": 1.68,
"learning_rate": 2.0296549733081027e-05,
"loss": 1.5795,
"step": 7130
},
{
"epoch": 1.68,
"learning_rate": 2.0235935901348098e-05,
"loss": 1.5478,
"step": 7140
},
{
"epoch": 1.69,
"learning_rate": 2.0175351116399904e-05,
"loss": 1.641,
"step": 7150
},
{
"epoch": 1.69,
"learning_rate": 2.011479574762555e-05,
"loss": 1.5214,
"step": 7160
},
{
"epoch": 1.69,
"learning_rate": 2.005427016423474e-05,
"loss": 1.5606,
"step": 7170
},
{
"epoch": 1.69,
"learning_rate": 1.9993774735255587e-05,
"loss": 1.5445,
"step": 7180
},
{
"epoch": 1.7,
"learning_rate": 1.9933309829532344e-05,
"loss": 1.6013,
"step": 7190
},
{
"epoch": 1.7,
"learning_rate": 1.9872875815723187e-05,
"loss": 1.61,
"step": 7200
},
{
"epoch": 1.7,
"learning_rate": 1.981247306229792e-05,
"loss": 1.5263,
"step": 7210
},
{
"epoch": 1.7,
"learning_rate": 1.9752101937535754e-05,
"loss": 1.5198,
"step": 7220
},
{
"epoch": 1.7,
"learning_rate": 1.9691762809523055e-05,
"loss": 1.5959,
"step": 7230
},
{
"epoch": 1.71,
"learning_rate": 1.963145604615112e-05,
"loss": 1.6017,
"step": 7240
},
{
"epoch": 1.71,
"learning_rate": 1.9571182015113894e-05,
"loss": 1.5784,
"step": 7250
},
{
"epoch": 1.71,
"learning_rate": 1.9510941083905775e-05,
"loss": 1.5273,
"step": 7260
},
{
"epoch": 1.71,
"learning_rate": 1.9450733619819317e-05,
"loss": 1.5608,
"step": 7270
},
{
"epoch": 1.72,
"learning_rate": 1.939055998994306e-05,
"loss": 1.5707,
"step": 7280
},
{
"epoch": 1.72,
"learning_rate": 1.9330420561159224e-05,
"loss": 1.5873,
"step": 7290
},
{
"epoch": 1.72,
"learning_rate": 1.9270315700141532e-05,
"loss": 1.5831,
"step": 7300
},
{
"epoch": 1.72,
"learning_rate": 1.9210245773352913e-05,
"loss": 1.5502,
"step": 7310
},
{
"epoch": 1.73,
"learning_rate": 1.915021114704332e-05,
"loss": 1.5395,
"step": 7320
},
{
"epoch": 1.73,
"learning_rate": 1.909021218724748e-05,
"loss": 1.6205,
"step": 7330
},
{
"epoch": 1.73,
"learning_rate": 1.9030249259782647e-05,
"loss": 1.6269,
"step": 7340
},
{
"epoch": 1.73,
"learning_rate": 1.8970322730246386e-05,
"loss": 1.6254,
"step": 7350
},
{
"epoch": 1.74,
"learning_rate": 1.891043296401435e-05,
"loss": 1.6078,
"step": 7360
},
{
"epoch": 1.74,
"learning_rate": 1.8850580326238037e-05,
"loss": 1.5952,
"step": 7370
},
{
"epoch": 1.74,
"learning_rate": 1.8790765181842572e-05,
"loss": 1.6439,
"step": 7380
},
{
"epoch": 1.74,
"learning_rate": 1.873098789552448e-05,
"loss": 1.5527,
"step": 7390
},
{
"epoch": 1.74,
"learning_rate": 1.8671248831749454e-05,
"loss": 1.5974,
"step": 7400
},
{
"epoch": 1.75,
"learning_rate": 1.8611548354750176e-05,
"loss": 1.5194,
"step": 7410
},
{
"epoch": 1.75,
"learning_rate": 1.8551886828524013e-05,
"loss": 1.5647,
"step": 7420
},
{
"epoch": 1.75,
"learning_rate": 1.8492264616830884e-05,
"loss": 1.6324,
"step": 7430
},
{
"epoch": 1.75,
"learning_rate": 1.843268208319098e-05,
"loss": 1.5751,
"step": 7440
},
{
"epoch": 1.76,
"learning_rate": 1.8373139590882603e-05,
"loss": 1.5693,
"step": 7450
},
{
"epoch": 1.76,
"learning_rate": 1.8313637502939895e-05,
"loss": 1.562,
"step": 7460
},
{
"epoch": 1.76,
"learning_rate": 1.8254176182150654e-05,
"loss": 1.5584,
"step": 7470
},
{
"epoch": 1.76,
"learning_rate": 1.8194755991054123e-05,
"loss": 1.5866,
"step": 7480
},
{
"epoch": 1.77,
"learning_rate": 1.8135377291938765e-05,
"loss": 1.6487,
"step": 7490
},
{
"epoch": 1.77,
"learning_rate": 1.8076040446840092e-05,
"loss": 1.5458,
"step": 7500
},
{
"epoch": 1.77,
"learning_rate": 1.80167458175384e-05,
"loss": 1.5688,
"step": 7510
},
{
"epoch": 1.77,
"learning_rate": 1.79574937655566e-05,
"loss": 1.5377,
"step": 7520
},
{
"epoch": 1.78,
"learning_rate": 1.7898284652158006e-05,
"loss": 1.6038,
"step": 7530
},
{
"epoch": 1.78,
"learning_rate": 1.783911883834415e-05,
"loss": 1.5159,
"step": 7540
},
{
"epoch": 1.78,
"learning_rate": 1.777999668485254e-05,
"loss": 1.575,
"step": 7550
},
{
"epoch": 1.78,
"learning_rate": 1.7720918552154498e-05,
"loss": 1.6133,
"step": 7560
},
{
"epoch": 1.78,
"learning_rate": 1.7661884800452932e-05,
"loss": 1.5649,
"step": 7570
},
{
"epoch": 1.79,
"learning_rate": 1.7602895789680194e-05,
"loss": 1.5856,
"step": 7580
},
{
"epoch": 1.79,
"learning_rate": 1.7543951879495806e-05,
"loss": 1.5763,
"step": 7590
},
{
"epoch": 1.79,
"learning_rate": 1.7485053429284335e-05,
"loss": 1.5841,
"step": 7600
},
{
"epoch": 1.79,
"learning_rate": 1.7426200798153152e-05,
"loss": 1.6031,
"step": 7610
},
{
"epoch": 1.8,
"learning_rate": 1.7367394344930298e-05,
"loss": 1.5723,
"step": 7620
},
{
"epoch": 1.8,
"learning_rate": 1.7308634428162245e-05,
"loss": 1.5619,
"step": 7630
},
{
"epoch": 1.8,
"learning_rate": 1.724992140611173e-05,
"loss": 1.5642,
"step": 7640
},
{
"epoch": 1.8,
"learning_rate": 1.719125563675557e-05,
"loss": 1.5634,
"step": 7650
},
{
"epoch": 1.81,
"learning_rate": 1.7132637477782477e-05,
"loss": 1.5896,
"step": 7660
},
{
"epoch": 1.81,
"learning_rate": 1.7074067286590897e-05,
"loss": 1.5564,
"step": 7670
},
{
"epoch": 1.81,
"learning_rate": 1.7015545420286798e-05,
"loss": 1.533,
"step": 7680
},
{
"epoch": 1.81,
"learning_rate": 1.695707223568151e-05,
"loss": 1.5789,
"step": 7690
},
{
"epoch": 1.82,
"learning_rate": 1.689864808928954e-05,
"loss": 1.573,
"step": 7700
},
{
"epoch": 1.82,
"learning_rate": 1.6840273337326424e-05,
"loss": 1.6167,
"step": 7710
},
{
"epoch": 1.82,
"learning_rate": 1.6781948335706534e-05,
"loss": 1.5644,
"step": 7720
},
{
"epoch": 1.82,
"learning_rate": 1.67236734400409e-05,
"loss": 1.5827,
"step": 7730
},
{
"epoch": 1.82,
"learning_rate": 1.666544900563505e-05,
"loss": 1.5427,
"step": 7740
},
{
"epoch": 1.83,
"learning_rate": 1.660727538748687e-05,
"loss": 1.5782,
"step": 7750
},
{
"epoch": 1.83,
"learning_rate": 1.654915294028439e-05,
"loss": 1.5257,
"step": 7760
},
{
"epoch": 1.83,
"learning_rate": 1.649108201840367e-05,
"loss": 1.5747,
"step": 7770
},
{
"epoch": 1.83,
"learning_rate": 1.6433062975906594e-05,
"loss": 1.5598,
"step": 7780
},
{
"epoch": 1.84,
"learning_rate": 1.6375096166538757e-05,
"loss": 1.5349,
"step": 7790
},
{
"epoch": 1.84,
"learning_rate": 1.6317181943727272e-05,
"loss": 1.5958,
"step": 7800
},
{
"epoch": 1.84,
"learning_rate": 1.6259320660578627e-05,
"loss": 1.5406,
"step": 7810
},
{
"epoch": 1.84,
"learning_rate": 1.620151266987654e-05,
"loss": 1.4676,
"step": 7820
},
{
"epoch": 1.85,
"learning_rate": 1.61437583240798e-05,
"loss": 1.5561,
"step": 7830
},
{
"epoch": 1.85,
"learning_rate": 1.608605797532013e-05,
"loss": 1.5527,
"step": 7840
},
{
"epoch": 1.85,
"learning_rate": 1.6028411975400005e-05,
"loss": 1.6027,
"step": 7850
},
{
"epoch": 1.85,
"learning_rate": 1.5970820675790554e-05,
"loss": 1.5452,
"step": 7860
},
{
"epoch": 1.86,
"learning_rate": 1.5913284427629376e-05,
"loss": 1.5342,
"step": 7870
},
{
"epoch": 1.86,
"learning_rate": 1.585580358171845e-05,
"loss": 1.6369,
"step": 7880
},
{
"epoch": 1.86,
"learning_rate": 1.5798378488521937e-05,
"loss": 1.6002,
"step": 7890
},
{
"epoch": 1.86,
"learning_rate": 1.5741009498164066e-05,
"loss": 1.5132,
"step": 7900
},
{
"epoch": 1.87,
"learning_rate": 1.5683696960427012e-05,
"loss": 1.6326,
"step": 7910
},
{
"epoch": 1.87,
"learning_rate": 1.5626441224748784e-05,
"loss": 1.5737,
"step": 7920
},
{
"epoch": 1.87,
"learning_rate": 1.5569242640221015e-05,
"loss": 1.6005,
"step": 7930
},
{
"epoch": 1.87,
"learning_rate": 1.5512101555586918e-05,
"loss": 1.5976,
"step": 7940
},
{
"epoch": 1.87,
"learning_rate": 1.54550183192391e-05,
"loss": 1.6039,
"step": 7950
},
{
"epoch": 1.88,
"learning_rate": 1.5397993279217504e-05,
"loss": 1.5774,
"step": 7960
},
{
"epoch": 1.88,
"learning_rate": 1.5341026783207208e-05,
"loss": 1.5339,
"step": 7970
},
{
"epoch": 1.88,
"learning_rate": 1.528411917853636e-05,
"loss": 1.5806,
"step": 7980
},
{
"epoch": 1.88,
"learning_rate": 1.5227270812174033e-05,
"loss": 1.5673,
"step": 7990
},
{
"epoch": 1.89,
"learning_rate": 1.5170482030728142e-05,
"loss": 1.6091,
"step": 8000
},
{
"epoch": 1.89,
"eval_loss": 1.5800806283950806,
"eval_runtime": 121.3887,
"eval_samples_per_second": 11.294,
"eval_steps_per_second": 2.826,
"step": 8000
},
{
"epoch": 1.89,
"learning_rate": 1.511375318044329e-05,
"loss": 1.5585,
"step": 8010
},
{
"epoch": 1.89,
"learning_rate": 1.5057084607198685e-05,
"loss": 1.6185,
"step": 8020
},
{
"epoch": 1.89,
"learning_rate": 1.5000476656506019e-05,
"loss": 1.5859,
"step": 8030
},
{
"epoch": 1.9,
"learning_rate": 1.4943929673507345e-05,
"loss": 1.5645,
"step": 8040
},
{
"epoch": 1.9,
"learning_rate": 1.4887444002973048e-05,
"loss": 1.5036,
"step": 8050
},
{
"epoch": 1.9,
"learning_rate": 1.483101998929963e-05,
"loss": 1.6188,
"step": 8060
},
{
"epoch": 1.9,
"learning_rate": 1.4774657976507695e-05,
"loss": 1.5842,
"step": 8070
},
{
"epoch": 1.91,
"learning_rate": 1.4718358308239799e-05,
"loss": 1.5984,
"step": 8080
},
{
"epoch": 1.91,
"learning_rate": 1.4662121327758432e-05,
"loss": 1.5114,
"step": 8090
},
{
"epoch": 1.91,
"learning_rate": 1.4605947377943818e-05,
"loss": 1.5658,
"step": 8100
},
{
"epoch": 1.91,
"learning_rate": 1.454983680129191e-05,
"loss": 1.5323,
"step": 8110
},
{
"epoch": 1.91,
"learning_rate": 1.4493789939912244e-05,
"loss": 1.6191,
"step": 8120
},
{
"epoch": 1.92,
"learning_rate": 1.4437807135525922e-05,
"loss": 1.5712,
"step": 8130
},
{
"epoch": 1.92,
"learning_rate": 1.438188872946345e-05,
"loss": 1.5304,
"step": 8140
},
{
"epoch": 1.92,
"learning_rate": 1.4326035062662707e-05,
"loss": 1.5967,
"step": 8150
},
{
"epoch": 1.92,
"learning_rate": 1.4270246475666846e-05,
"loss": 1.5486,
"step": 8160
},
{
"epoch": 1.93,
"learning_rate": 1.4214523308622243e-05,
"loss": 1.6059,
"step": 8170
},
{
"epoch": 1.93,
"learning_rate": 1.4158865901276385e-05,
"loss": 1.593,
"step": 8180
},
{
"epoch": 1.93,
"learning_rate": 1.410327459297583e-05,
"loss": 1.5811,
"step": 8190
},
{
"epoch": 1.93,
"learning_rate": 1.4047749722664116e-05,
"loss": 1.5334,
"step": 8200
},
{
"epoch": 1.94,
"learning_rate": 1.39922916288797e-05,
"loss": 1.5781,
"step": 8210
},
{
"epoch": 1.94,
"learning_rate": 1.3936900649753931e-05,
"loss": 1.6089,
"step": 8220
},
{
"epoch": 1.94,
"learning_rate": 1.3881577123008921e-05,
"loss": 1.5119,
"step": 8230
},
{
"epoch": 1.94,
"learning_rate": 1.3826321385955535e-05,
"loss": 1.5515,
"step": 8240
},
{
"epoch": 1.95,
"learning_rate": 1.3771133775491307e-05,
"loss": 1.586,
"step": 8250
},
{
"epoch": 1.95,
"learning_rate": 1.3716014628098431e-05,
"loss": 1.6166,
"step": 8260
},
{
"epoch": 1.95,
"learning_rate": 1.3660964279841647e-05,
"loss": 1.5123,
"step": 8270
},
{
"epoch": 1.95,
"learning_rate": 1.3605983066366234e-05,
"loss": 1.5726,
"step": 8280
},
{
"epoch": 1.95,
"learning_rate": 1.3551071322895936e-05,
"loss": 1.5723,
"step": 8290
},
{
"epoch": 1.96,
"learning_rate": 1.3496229384230974e-05,
"loss": 1.5756,
"step": 8300
},
{
"epoch": 1.96,
"learning_rate": 1.3441457584745928e-05,
"loss": 1.5795,
"step": 8310
},
{
"epoch": 1.96,
"learning_rate": 1.3386756258387744e-05,
"loss": 1.5917,
"step": 8320
},
{
"epoch": 1.96,
"learning_rate": 1.33321257386737e-05,
"loss": 1.5951,
"step": 8330
},
{
"epoch": 1.97,
"learning_rate": 1.3277566358689336e-05,
"loss": 1.5424,
"step": 8340
},
{
"epoch": 1.97,
"learning_rate": 1.3223078451086487e-05,
"loss": 1.548,
"step": 8350
},
{
"epoch": 1.97,
"learning_rate": 1.316866234808119e-05,
"loss": 1.5404,
"step": 8360
},
{
"epoch": 1.97,
"learning_rate": 1.3114318381451688e-05,
"loss": 1.5472,
"step": 8370
},
{
"epoch": 1.98,
"learning_rate": 1.3060046882536409e-05,
"loss": 1.5692,
"step": 8380
},
{
"epoch": 1.98,
"learning_rate": 1.3005848182231939e-05,
"loss": 1.4966,
"step": 8390
},
{
"epoch": 1.98,
"learning_rate": 1.2951722610990993e-05,
"loss": 1.564,
"step": 8400
},
{
"epoch": 1.98,
"learning_rate": 1.2897670498820455e-05,
"loss": 1.5788,
"step": 8410
},
{
"epoch": 1.99,
"learning_rate": 1.284369217527928e-05,
"loss": 1.5353,
"step": 8420
},
{
"epoch": 1.99,
"learning_rate": 1.2789787969476554e-05,
"loss": 1.5966,
"step": 8430
},
{
"epoch": 1.99,
"learning_rate": 1.2735958210069448e-05,
"loss": 1.5634,
"step": 8440
},
{
"epoch": 1.99,
"learning_rate": 1.268220322526123e-05,
"loss": 1.5649,
"step": 8450
},
{
"epoch": 1.99,
"learning_rate": 1.262852334279929e-05,
"loss": 1.4958,
"step": 8460
},
{
"epoch": 2.0,
"learning_rate": 1.257491888997308e-05,
"loss": 1.5192,
"step": 8470
},
{
"epoch": 2.0,
"learning_rate": 1.2521390193612165e-05,
"loss": 1.5598,
"step": 8480
},
{
"epoch": 2.0,
"learning_rate": 1.2467937580084225e-05,
"loss": 1.5079,
"step": 8490
},
{
"epoch": 2.0,
"learning_rate": 1.2414561375293038e-05,
"loss": 1.514,
"step": 8500
},
{
"epoch": 2.01,
"learning_rate": 1.236126190467655e-05,
"loss": 1.5451,
"step": 8510
},
{
"epoch": 2.01,
"learning_rate": 1.2308039493204823e-05,
"loss": 1.5526,
"step": 8520
},
{
"epoch": 2.01,
"learning_rate": 1.2254894465378094e-05,
"loss": 1.4948,
"step": 8530
},
{
"epoch": 2.01,
"learning_rate": 1.220182714522479e-05,
"loss": 1.6119,
"step": 8540
},
{
"epoch": 2.02,
"learning_rate": 1.2148837856299533e-05,
"loss": 1.5818,
"step": 8550
},
{
"epoch": 2.02,
"learning_rate": 1.2095926921681219e-05,
"loss": 1.5446,
"step": 8560
},
{
"epoch": 2.02,
"learning_rate": 1.2043094663970982e-05,
"loss": 1.5348,
"step": 8570
},
{
"epoch": 2.02,
"learning_rate": 1.1990341405290271e-05,
"loss": 1.5595,
"step": 8580
},
{
"epoch": 2.03,
"learning_rate": 1.193766746727886e-05,
"loss": 1.5402,
"step": 8590
},
{
"epoch": 2.03,
"learning_rate": 1.1885073171092926e-05,
"loss": 1.5021,
"step": 8600
},
{
"epoch": 2.03,
"learning_rate": 1.1832558837403043e-05,
"loss": 1.5309,
"step": 8610
},
{
"epoch": 2.03,
"learning_rate": 1.1780124786392258e-05,
"loss": 1.6031,
"step": 8620
},
{
"epoch": 2.03,
"learning_rate": 1.1727771337754112e-05,
"loss": 1.6009,
"step": 8630
},
{
"epoch": 2.04,
"learning_rate": 1.167549881069075e-05,
"loss": 1.5555,
"step": 8640
},
{
"epoch": 2.04,
"learning_rate": 1.162330752391089e-05,
"loss": 1.5342,
"step": 8650
},
{
"epoch": 2.04,
"learning_rate": 1.1571197795627941e-05,
"loss": 1.5715,
"step": 8660
},
{
"epoch": 2.04,
"learning_rate": 1.1519169943558042e-05,
"loss": 1.5763,
"step": 8670
},
{
"epoch": 2.05,
"learning_rate": 1.1467224284918141e-05,
"loss": 1.5585,
"step": 8680
},
{
"epoch": 2.05,
"learning_rate": 1.141536113642403e-05,
"loss": 1.5248,
"step": 8690
},
{
"epoch": 2.05,
"learning_rate": 1.1363580814288435e-05,
"loss": 1.5985,
"step": 8700
},
{
"epoch": 2.05,
"learning_rate": 1.1311883634219095e-05,
"loss": 1.5718,
"step": 8710
},
{
"epoch": 2.06,
"learning_rate": 1.1260269911416807e-05,
"loss": 1.5899,
"step": 8720
},
{
"epoch": 2.06,
"learning_rate": 1.1208739960573553e-05,
"loss": 1.5258,
"step": 8730
},
{
"epoch": 2.06,
"learning_rate": 1.1157294095870527e-05,
"loss": 1.517,
"step": 8740
},
{
"epoch": 2.06,
"learning_rate": 1.110593263097626e-05,
"loss": 1.4968,
"step": 8750
},
{
"epoch": 2.07,
"learning_rate": 1.105465587904467e-05,
"loss": 1.538,
"step": 8760
},
{
"epoch": 2.07,
"learning_rate": 1.100346415271321e-05,
"loss": 1.5363,
"step": 8770
},
{
"epoch": 2.07,
"learning_rate": 1.0952357764100906e-05,
"loss": 1.5474,
"step": 8780
},
{
"epoch": 2.07,
"learning_rate": 1.090133702480647e-05,
"loss": 1.5999,
"step": 8790
},
{
"epoch": 2.07,
"learning_rate": 1.0850402245906408e-05,
"loss": 1.538,
"step": 8800
},
{
"epoch": 2.08,
"learning_rate": 1.0799553737953136e-05,
"loss": 1.5791,
"step": 8810
},
{
"epoch": 2.08,
"learning_rate": 1.0748791810973052e-05,
"loss": 1.6128,
"step": 8820
},
{
"epoch": 2.08,
"learning_rate": 1.0698116774464676e-05,
"loss": 1.5819,
"step": 8830
},
{
"epoch": 2.08,
"learning_rate": 1.064752893739673e-05,
"loss": 1.4816,
"step": 8840
},
{
"epoch": 2.09,
"learning_rate": 1.059702860820632e-05,
"loss": 1.5091,
"step": 8850
},
{
"epoch": 2.09,
"learning_rate": 1.0546616094796968e-05,
"loss": 1.5383,
"step": 8860
},
{
"epoch": 2.09,
"learning_rate": 1.0496291704536798e-05,
"loss": 1.5577,
"step": 8870
},
{
"epoch": 2.09,
"learning_rate": 1.044605574425664e-05,
"loss": 1.5483,
"step": 8880
},
{
"epoch": 2.1,
"learning_rate": 1.0395908520248143e-05,
"loss": 1.5387,
"step": 8890
},
{
"epoch": 2.1,
"learning_rate": 1.0345850338261964e-05,
"loss": 1.5891,
"step": 8900
},
{
"epoch": 2.1,
"learning_rate": 1.0295881503505836e-05,
"loss": 1.565,
"step": 8910
},
{
"epoch": 2.1,
"learning_rate": 1.0246002320642742e-05,
"loss": 1.5359,
"step": 8920
},
{
"epoch": 2.11,
"learning_rate": 1.0196213093789042e-05,
"loss": 1.5579,
"step": 8930
},
{
"epoch": 2.11,
"learning_rate": 1.0146514126512663e-05,
"loss": 1.563,
"step": 8940
},
{
"epoch": 2.11,
"learning_rate": 1.0096905721831176e-05,
"loss": 1.5762,
"step": 8950
},
{
"epoch": 2.11,
"learning_rate": 1.004738818221001e-05,
"loss": 1.4976,
"step": 8960
},
{
"epoch": 2.11,
"learning_rate": 9.997961809560564e-06,
"loss": 1.5758,
"step": 8970
},
{
"epoch": 2.12,
"learning_rate": 9.948626905238415e-06,
"loss": 1.5827,
"step": 8980
},
{
"epoch": 2.12,
"learning_rate": 9.899383770041426e-06,
"loss": 1.5686,
"step": 8990
},
{
"epoch": 2.12,
"learning_rate": 9.850232704207951e-06,
"loss": 1.5696,
"step": 9000
},
{
"epoch": 2.12,
"eval_loss": 1.5784997940063477,
"eval_runtime": 121.4622,
"eval_samples_per_second": 11.287,
"eval_steps_per_second": 2.824,
"step": 9000
},
{
"epoch": 2.12,
"learning_rate": 9.801174007414978e-06,
"loss": 1.5198,
"step": 9010
},
{
"epoch": 2.13,
"learning_rate": 9.752207978776346e-06,
"loss": 1.4989,
"step": 9020
},
{
"epoch": 2.13,
"learning_rate": 9.703334916840856e-06,
"loss": 1.5645,
"step": 9030
},
{
"epoch": 2.13,
"learning_rate": 9.654555119590506e-06,
"loss": 1.5655,
"step": 9040
},
{
"epoch": 2.13,
"learning_rate": 9.605868884438645e-06,
"loss": 1.5699,
"step": 9050
},
{
"epoch": 2.14,
"learning_rate": 9.557276508228164e-06,
"loss": 1.532,
"step": 9060
},
{
"epoch": 2.14,
"learning_rate": 9.508778287229714e-06,
"loss": 1.5158,
"step": 9070
},
{
"epoch": 2.14,
"learning_rate": 9.460374517139848e-06,
"loss": 1.5939,
"step": 9080
},
{
"epoch": 2.14,
"learning_rate": 9.412065493079261e-06,
"loss": 1.4778,
"step": 9090
},
{
"epoch": 2.15,
"learning_rate": 9.363851509590962e-06,
"loss": 1.5716,
"step": 9100
},
{
"epoch": 2.15,
"learning_rate": 9.315732860638518e-06,
"loss": 1.5349,
"step": 9110
},
{
"epoch": 2.15,
"learning_rate": 9.267709839604217e-06,
"loss": 1.5646,
"step": 9120
},
{
"epoch": 2.15,
"learning_rate": 9.219782739287292e-06,
"loss": 1.5573,
"step": 9130
},
{
"epoch": 2.16,
"learning_rate": 9.171951851902149e-06,
"loss": 1.5657,
"step": 9140
},
{
"epoch": 2.16,
"learning_rate": 9.124217469076593e-06,
"loss": 1.5415,
"step": 9150
},
{
"epoch": 2.16,
"learning_rate": 9.076579881850011e-06,
"loss": 1.5735,
"step": 9160
},
{
"epoch": 2.16,
"learning_rate": 9.029039380671636e-06,
"loss": 1.6238,
"step": 9170
},
{
"epoch": 2.16,
"learning_rate": 8.981596255398756e-06,
"loss": 1.612,
"step": 9180
},
{
"epoch": 2.17,
"learning_rate": 8.934250795294943e-06,
"loss": 1.4949,
"step": 9190
},
{
"epoch": 2.17,
"learning_rate": 8.887003289028326e-06,
"loss": 1.5107,
"step": 9200
},
{
"epoch": 2.17,
"learning_rate": 8.839854024669781e-06,
"loss": 1.534,
"step": 9210
},
{
"epoch": 2.17,
"learning_rate": 8.792803289691199e-06,
"loss": 1.538,
"step": 9220
},
{
"epoch": 2.18,
"learning_rate": 8.745851370963737e-06,
"loss": 1.5493,
"step": 9230
},
{
"epoch": 2.18,
"learning_rate": 8.698998554756052e-06,
"loss": 1.5706,
"step": 9240
},
{
"epoch": 2.18,
"learning_rate": 8.652245126732595e-06,
"loss": 1.5403,
"step": 9250
},
{
"epoch": 2.18,
"learning_rate": 8.605591371951815e-06,
"loss": 1.5141,
"step": 9260
},
{
"epoch": 2.19,
"learning_rate": 8.559037574864453e-06,
"loss": 1.59,
"step": 9270
},
{
"epoch": 2.19,
"learning_rate": 8.512584019311806e-06,
"loss": 1.5207,
"step": 9280
},
{
"epoch": 2.19,
"learning_rate": 8.466230988523988e-06,
"loss": 1.5303,
"step": 9290
},
{
"epoch": 2.19,
"learning_rate": 8.419978765118206e-06,
"loss": 1.5287,
"step": 9300
},
{
"epoch": 2.2,
"learning_rate": 8.373827631097052e-06,
"loss": 1.5204,
"step": 9310
},
{
"epoch": 2.2,
"learning_rate": 8.327777867846758e-06,
"loss": 1.5644,
"step": 9320
},
{
"epoch": 2.2,
"learning_rate": 8.281829756135492e-06,
"loss": 1.5745,
"step": 9330
},
{
"epoch": 2.2,
"learning_rate": 8.23598357611165e-06,
"loss": 1.5933,
"step": 9340
},
{
"epoch": 2.2,
"learning_rate": 8.190239607302133e-06,
"loss": 1.5505,
"step": 9350
},
{
"epoch": 2.21,
"learning_rate": 8.144598128610684e-06,
"loss": 1.5541,
"step": 9360
},
{
"epoch": 2.21,
"learning_rate": 8.099059418316126e-06,
"loss": 1.6338,
"step": 9370
},
{
"epoch": 2.21,
"learning_rate": 8.053623754070714e-06,
"loss": 1.5897,
"step": 9380
},
{
"epoch": 2.21,
"learning_rate": 8.008291412898414e-06,
"loss": 1.5704,
"step": 9390
},
{
"epoch": 2.22,
"learning_rate": 7.963062671193225e-06,
"loss": 1.5133,
"step": 9400
},
{
"epoch": 2.22,
"learning_rate": 7.917937804717521e-06,
"loss": 1.6135,
"step": 9410
},
{
"epoch": 2.22,
"learning_rate": 7.872917088600307e-06,
"loss": 1.4678,
"step": 9420
},
{
"epoch": 2.22,
"learning_rate": 7.828000797335593e-06,
"loss": 1.5418,
"step": 9430
},
{
"epoch": 2.23,
"learning_rate": 7.783189204780696e-06,
"loss": 1.6363,
"step": 9440
},
{
"epoch": 2.23,
"learning_rate": 7.738482584154601e-06,
"loss": 1.5124,
"step": 9450
},
{
"epoch": 2.23,
"learning_rate": 7.693881208036253e-06,
"loss": 1.5569,
"step": 9460
},
{
"epoch": 2.23,
"learning_rate": 7.649385348362912e-06,
"loss": 1.567,
"step": 9470
},
{
"epoch": 2.24,
"learning_rate": 7.604995276428501e-06,
"loss": 1.5967,
"step": 9480
},
{
"epoch": 2.24,
"learning_rate": 7.560711262881967e-06,
"loss": 1.5462,
"step": 9490
},
{
"epoch": 2.24,
"learning_rate": 7.516533577725593e-06,
"loss": 1.5963,
"step": 9500
},
{
"epoch": 2.24,
"learning_rate": 7.472462490313379e-06,
"loss": 1.5272,
"step": 9510
},
{
"epoch": 2.24,
"learning_rate": 7.428498269349376e-06,
"loss": 1.6033,
"step": 9520
},
{
"epoch": 2.25,
"learning_rate": 7.384641182886098e-06,
"loss": 1.5305,
"step": 9530
},
{
"epoch": 2.25,
"learning_rate": 7.340891498322824e-06,
"loss": 1.5018,
"step": 9540
},
{
"epoch": 2.25,
"learning_rate": 7.297249482404009e-06,
"loss": 1.5483,
"step": 9550
},
{
"epoch": 2.25,
"learning_rate": 7.2537154012176425e-06,
"loss": 1.5751,
"step": 9560
},
{
"epoch": 2.26,
"learning_rate": 7.210289520193619e-06,
"loss": 1.5893,
"step": 9570
},
{
"epoch": 2.26,
"learning_rate": 7.166972104102163e-06,
"loss": 1.5296,
"step": 9580
},
{
"epoch": 2.26,
"learning_rate": 7.123763417052151e-06,
"loss": 1.4995,
"step": 9590
},
{
"epoch": 2.26,
"learning_rate": 7.080663722489536e-06,
"loss": 1.5799,
"step": 9600
},
{
"epoch": 2.27,
"learning_rate": 7.037673283195742e-06,
"loss": 1.596,
"step": 9610
},
{
"epoch": 2.27,
"learning_rate": 6.99479236128607e-06,
"loss": 1.4874,
"step": 9620
},
{
"epoch": 2.27,
"learning_rate": 6.952021218208069e-06,
"loss": 1.6123,
"step": 9630
},
{
"epoch": 2.27,
"learning_rate": 6.909360114739963e-06,
"loss": 1.5308,
"step": 9640
},
{
"epoch": 2.28,
"learning_rate": 6.866809310989053e-06,
"loss": 1.5896,
"step": 9650
},
{
"epoch": 2.28,
"learning_rate": 6.824369066390157e-06,
"loss": 1.5295,
"step": 9660
},
{
"epoch": 2.28,
"learning_rate": 6.782039639703991e-06,
"loss": 1.5922,
"step": 9670
},
{
"epoch": 2.28,
"learning_rate": 6.739821289015607e-06,
"loss": 1.5989,
"step": 9680
},
{
"epoch": 2.28,
"learning_rate": 6.6977142717328165e-06,
"loss": 1.5635,
"step": 9690
},
{
"epoch": 2.29,
"learning_rate": 6.6557188445846465e-06,
"loss": 1.5945,
"step": 9700
},
{
"epoch": 2.29,
"learning_rate": 6.613835263619727e-06,
"loss": 1.5264,
"step": 9710
},
{
"epoch": 2.29,
"learning_rate": 6.572063784204769e-06,
"loss": 1.5457,
"step": 9720
},
{
"epoch": 2.29,
"learning_rate": 6.530404661022984e-06,
"loss": 1.549,
"step": 9730
},
{
"epoch": 2.3,
"learning_rate": 6.488858148072547e-06,
"loss": 1.6442,
"step": 9740
},
{
"epoch": 2.3,
"learning_rate": 6.44742449866505e-06,
"loss": 1.5169,
"step": 9750
},
{
"epoch": 2.3,
"learning_rate": 6.406103965423932e-06,
"loss": 1.6114,
"step": 9760
},
{
"epoch": 2.3,
"learning_rate": 6.364896800282968e-06,
"loss": 1.519,
"step": 9770
},
{
"epoch": 2.31,
"learning_rate": 6.323803254484712e-06,
"loss": 1.5455,
"step": 9780
},
{
"epoch": 2.31,
"learning_rate": 6.282823578578986e-06,
"loss": 1.587,
"step": 9790
},
{
"epoch": 2.31,
"learning_rate": 6.241958022421332e-06,
"loss": 1.5676,
"step": 9800
},
{
"epoch": 2.31,
"learning_rate": 6.201206835171497e-06,
"loss": 1.5286,
"step": 9810
},
{
"epoch": 2.32,
"learning_rate": 6.1605702652919095e-06,
"loss": 1.5097,
"step": 9820
},
{
"epoch": 2.32,
"learning_rate": 6.12004856054619e-06,
"loss": 1.5699,
"step": 9830
},
{
"epoch": 2.32,
"learning_rate": 6.079641967997596e-06,
"loss": 1.5225,
"step": 9840
},
{
"epoch": 2.32,
"learning_rate": 6.039350734007546e-06,
"loss": 1.5637,
"step": 9850
},
{
"epoch": 2.32,
"learning_rate": 5.9991751042341085e-06,
"loss": 1.5933,
"step": 9860
},
{
"epoch": 2.33,
"learning_rate": 5.959115323630521e-06,
"loss": 1.6083,
"step": 9870
},
{
"epoch": 2.33,
"learning_rate": 5.919171636443663e-06,
"loss": 1.5347,
"step": 9880
},
{
"epoch": 2.33,
"learning_rate": 5.879344286212596e-06,
"loss": 1.5161,
"step": 9890
},
{
"epoch": 2.33,
"learning_rate": 5.8396335157670625e-06,
"loss": 1.5147,
"step": 9900
},
{
"epoch": 2.34,
"learning_rate": 5.800039567226004e-06,
"loss": 1.4942,
"step": 9910
},
{
"epoch": 2.34,
"learning_rate": 5.760562681996121e-06,
"loss": 1.5687,
"step": 9920
},
{
"epoch": 2.34,
"learning_rate": 5.721203100770339e-06,
"loss": 1.5276,
"step": 9930
},
{
"epoch": 2.34,
"learning_rate": 5.681961063526392e-06,
"loss": 1.5139,
"step": 9940
},
{
"epoch": 2.35,
"learning_rate": 5.6428368095253286e-06,
"loss": 1.6345,
"step": 9950
},
{
"epoch": 2.35,
"learning_rate": 5.603830577310084e-06,
"loss": 1.5481,
"step": 9960
},
{
"epoch": 2.35,
"learning_rate": 5.564942604703996e-06,
"loss": 1.5523,
"step": 9970
},
{
"epoch": 2.35,
"learning_rate": 5.526173128809362e-06,
"loss": 1.5385,
"step": 9980
},
{
"epoch": 2.36,
"learning_rate": 5.487522386006e-06,
"loss": 1.5857,
"step": 9990
},
{
"epoch": 2.36,
"learning_rate": 5.448990611949823e-06,
"loss": 1.5659,
"step": 10000
},
{
"epoch": 2.36,
"eval_loss": 1.5774571895599365,
"eval_runtime": 120.2987,
"eval_samples_per_second": 11.397,
"eval_steps_per_second": 2.851,
"step": 10000
},
{
"epoch": 2.36,
"learning_rate": 5.41057804157136e-06,
"loss": 1.5826,
"step": 10010
},
{
"epoch": 2.36,
"learning_rate": 5.372284909074362e-06,
"loss": 1.5019,
"step": 10020
},
{
"epoch": 2.36,
"learning_rate": 5.334111447934348e-06,
"loss": 1.5691,
"step": 10030
},
{
"epoch": 2.37,
"learning_rate": 5.296057890897213e-06,
"loss": 1.524,
"step": 10040
},
{
"epoch": 2.37,
"learning_rate": 5.258124469977776e-06,
"loss": 1.5359,
"step": 10050
},
{
"epoch": 2.37,
"learning_rate": 5.220311416458376e-06,
"loss": 1.631,
"step": 10060
},
{
"epoch": 2.37,
"learning_rate": 5.182618960887476e-06,
"loss": 1.5473,
"step": 10070
},
{
"epoch": 2.38,
"learning_rate": 5.145047333078235e-06,
"loss": 1.5423,
"step": 10080
},
{
"epoch": 2.38,
"learning_rate": 5.1075967621071166e-06,
"loss": 1.5166,
"step": 10090
},
{
"epoch": 2.38,
"learning_rate": 5.070267476312515e-06,
"loss": 1.6095,
"step": 10100
},
{
"epoch": 2.38,
"learning_rate": 5.033059703293319e-06,
"loss": 1.552,
"step": 10110
},
{
"epoch": 2.39,
"learning_rate": 4.995973669907553e-06,
"loss": 1.5961,
"step": 10120
},
{
"epoch": 2.39,
"learning_rate": 4.959009602270989e-06,
"loss": 1.548,
"step": 10130
},
{
"epoch": 2.39,
"learning_rate": 4.922167725755761e-06,
"loss": 1.6018,
"step": 10140
},
{
"epoch": 2.39,
"learning_rate": 4.885448264989015e-06,
"loss": 1.4976,
"step": 10150
},
{
"epoch": 2.4,
"learning_rate": 4.8488514438514955e-06,
"loss": 1.5935,
"step": 10160
},
{
"epoch": 2.4,
"learning_rate": 4.812377485476224e-06,
"loss": 1.5797,
"step": 10170
},
{
"epoch": 2.4,
"learning_rate": 4.776026612247108e-06,
"loss": 1.5587,
"step": 10180
},
{
"epoch": 2.4,
"learning_rate": 4.739799045797611e-06,
"loss": 1.5267,
"step": 10190
},
{
"epoch": 2.4,
"learning_rate": 4.7036950070093645e-06,
"loss": 1.5398,
"step": 10200
},
{
"epoch": 2.41,
"learning_rate": 4.667714716010882e-06,
"loss": 1.6229,
"step": 10210
},
{
"epoch": 2.41,
"learning_rate": 4.631858392176142e-06,
"loss": 1.5529,
"step": 10220
},
{
"epoch": 2.41,
"learning_rate": 4.596126254123309e-06,
"loss": 1.6194,
"step": 10230
},
{
"epoch": 2.41,
"learning_rate": 4.560518519713372e-06,
"loss": 1.5792,
"step": 10240
},
{
"epoch": 2.42,
"learning_rate": 4.525035406048819e-06,
"loss": 1.5083,
"step": 10250
},
{
"epoch": 2.42,
"learning_rate": 4.4896771294723334e-06,
"loss": 1.5629,
"step": 10260
},
{
"epoch": 2.42,
"learning_rate": 4.4544439055654474e-06,
"loss": 1.464,
"step": 10270
},
{
"epoch": 2.42,
"learning_rate": 4.419335949147241e-06,
"loss": 1.5578,
"step": 10280
},
{
"epoch": 2.43,
"learning_rate": 4.384353474273023e-06,
"loss": 1.5438,
"step": 10290
},
{
"epoch": 2.43,
"learning_rate": 4.349496694233057e-06,
"loss": 1.5336,
"step": 10300
},
{
"epoch": 2.43,
"learning_rate": 4.3147658215512196e-06,
"loss": 1.5478,
"step": 10310
},
{
"epoch": 2.43,
"learning_rate": 4.280161067983721e-06,
"loss": 1.5084,
"step": 10320
},
{
"epoch": 2.44,
"learning_rate": 4.245682644517815e-06,
"loss": 1.6072,
"step": 10330
},
{
"epoch": 2.44,
"learning_rate": 4.211330761370533e-06,
"loss": 1.5532,
"step": 10340
},
{
"epoch": 2.44,
"learning_rate": 4.177105627987363e-06,
"loss": 1.5815,
"step": 10350
},
{
"epoch": 2.44,
"learning_rate": 4.143007453040995e-06,
"loss": 1.5864,
"step": 10360
},
{
"epoch": 2.45,
"learning_rate": 4.109036444430045e-06,
"loss": 1.4932,
"step": 10370
},
{
"epoch": 2.45,
"learning_rate": 4.075192809277803e-06,
"loss": 1.4614,
"step": 10380
},
{
"epoch": 2.45,
"learning_rate": 4.041476753930937e-06,
"loss": 1.5828,
"step": 10390
},
{
"epoch": 2.45,
"learning_rate": 4.007888483958258e-06,
"loss": 1.5167,
"step": 10400
},
{
"epoch": 2.45,
"learning_rate": 3.974428204149469e-06,
"loss": 1.5448,
"step": 10410
},
{
"epoch": 2.46,
"learning_rate": 3.941096118513893e-06,
"loss": 1.5393,
"step": 10420
},
{
"epoch": 2.46,
"learning_rate": 3.907892430279272e-06,
"loss": 1.5387,
"step": 10430
},
{
"epoch": 2.46,
"learning_rate": 3.87481734189048e-06,
"loss": 1.5227,
"step": 10440
},
{
"epoch": 2.46,
"learning_rate": 3.841871055008317e-06,
"loss": 1.6027,
"step": 10450
},
{
"epoch": 2.47,
"learning_rate": 3.809053770508261e-06,
"loss": 1.6111,
"step": 10460
},
{
"epoch": 2.47,
"learning_rate": 3.776365688479283e-06,
"loss": 1.5452,
"step": 10470
},
{
"epoch": 2.47,
"learning_rate": 3.743807008222572e-06,
"loss": 1.5782,
"step": 10480
},
{
"epoch": 2.47,
"learning_rate": 3.7113779282503564e-06,
"loss": 1.5405,
"step": 10490
},
{
"epoch": 2.48,
"learning_rate": 3.6790786462846783e-06,
"loss": 1.6353,
"step": 10500
},
{
"epoch": 2.48,
"learning_rate": 3.6469093592562066e-06,
"loss": 1.4493,
"step": 10510
},
{
"epoch": 2.48,
"learning_rate": 3.6148702633030135e-06,
"loss": 1.6325,
"step": 10520
},
{
"epoch": 2.48,
"learning_rate": 3.582961553769387e-06,
"loss": 1.5489,
"step": 10530
},
{
"epoch": 2.49,
"learning_rate": 3.5511834252046435e-06,
"loss": 1.6267,
"step": 10540
},
{
"epoch": 2.49,
"learning_rate": 3.5195360713619452e-06,
"loss": 1.5385,
"step": 10550
},
{
"epoch": 2.49,
"learning_rate": 3.4880196851971055e-06,
"loss": 1.5823,
"step": 10560
},
{
"epoch": 2.49,
"learning_rate": 3.4566344588674248e-06,
"loss": 1.5976,
"step": 10570
},
{
"epoch": 2.49,
"learning_rate": 3.425380583730506e-06,
"loss": 1.5544,
"step": 10580
},
{
"epoch": 2.5,
"learning_rate": 3.394258250343102e-06,
"loss": 1.5691,
"step": 10590
},
{
"epoch": 2.5,
"learning_rate": 3.363267648459956e-06,
"loss": 1.4777,
"step": 10600
},
{
"epoch": 2.5,
"learning_rate": 3.3324089670326185e-06,
"loss": 1.5837,
"step": 10610
},
{
"epoch": 2.5,
"learning_rate": 3.3016823942083303e-06,
"loss": 1.5463,
"step": 10620
},
{
"epoch": 2.51,
"learning_rate": 3.2710881173288384e-06,
"loss": 1.5895,
"step": 10630
},
{
"epoch": 2.51,
"learning_rate": 3.2406263229292992e-06,
"loss": 1.5495,
"step": 10640
},
{
"epoch": 2.51,
"learning_rate": 3.2102971967370944e-06,
"loss": 1.6608,
"step": 10650
},
{
"epoch": 2.51,
"learning_rate": 3.1801009236707285e-06,
"loss": 1.5773,
"step": 10660
},
{
"epoch": 2.52,
"learning_rate": 3.1500376878386832e-06,
"loss": 1.5229,
"step": 10670
},
{
"epoch": 2.52,
"learning_rate": 3.120107672538325e-06,
"loss": 1.5475,
"step": 10680
},
{
"epoch": 2.52,
"learning_rate": 3.090311060254747e-06,
"loss": 1.5161,
"step": 10690
},
{
"epoch": 2.52,
"learning_rate": 3.0606480326596825e-06,
"loss": 1.5614,
"step": 10700
},
{
"epoch": 2.53,
"learning_rate": 3.031118770610386e-06,
"loss": 1.5324,
"step": 10710
},
{
"epoch": 2.53,
"learning_rate": 3.0017234541485503e-06,
"loss": 1.6035,
"step": 10720
},
{
"epoch": 2.53,
"learning_rate": 2.9724622624991815e-06,
"loss": 1.5388,
"step": 10730
},
{
"epoch": 2.53,
"learning_rate": 2.94333537406952e-06,
"loss": 1.5093,
"step": 10740
},
{
"epoch": 2.53,
"learning_rate": 2.9143429664479525e-06,
"loss": 1.5894,
"step": 10750
},
{
"epoch": 2.54,
"learning_rate": 2.8854852164029225e-06,
"loss": 1.5607,
"step": 10760
},
{
"epoch": 2.54,
"learning_rate": 2.8567622998818765e-06,
"loss": 1.5648,
"step": 10770
},
{
"epoch": 2.54,
"learning_rate": 2.8281743920101523e-06,
"loss": 1.565,
"step": 10780
},
{
"epoch": 2.54,
"learning_rate": 2.799721667089944e-06,
"loss": 1.5372,
"step": 10790
},
{
"epoch": 2.55,
"learning_rate": 2.7714042985992144e-06,
"loss": 1.5587,
"step": 10800
},
{
"epoch": 2.55,
"learning_rate": 2.7432224591906698e-06,
"loss": 1.5361,
"step": 10810
},
{
"epoch": 2.55,
"learning_rate": 2.715176320690674e-06,
"loss": 1.5489,
"step": 10820
},
{
"epoch": 2.55,
"learning_rate": 2.687266054098217e-06,
"loss": 1.5527,
"step": 10830
},
{
"epoch": 2.56,
"learning_rate": 2.65949182958386e-06,
"loss": 1.5471,
"step": 10840
},
{
"epoch": 2.56,
"learning_rate": 2.6318538164887303e-06,
"loss": 1.528,
"step": 10850
},
{
"epoch": 2.56,
"learning_rate": 2.604352183323447e-06,
"loss": 1.5848,
"step": 10860
},
{
"epoch": 2.56,
"learning_rate": 2.576987097767117e-06,
"loss": 1.5875,
"step": 10870
},
{
"epoch": 2.57,
"learning_rate": 2.549758726666307e-06,
"loss": 1.5908,
"step": 10880
},
{
"epoch": 2.57,
"learning_rate": 2.5226672360340373e-06,
"loss": 1.5164,
"step": 10890
},
{
"epoch": 2.57,
"learning_rate": 2.4984020638028378e-06,
"loss": 1.54,
"step": 10900
},
{
"epoch": 2.57,
"learning_rate": 2.4715711004368204e-06,
"loss": 1.4219,
"step": 10910
},
{
"epoch": 2.57,
"learning_rate": 2.4448774942539832e-06,
"loss": 1.527,
"step": 10920
},
{
"epoch": 2.58,
"learning_rate": 2.418321408006857e-06,
"loss": 1.5304,
"step": 10930
},
{
"epoch": 2.58,
"learning_rate": 2.391903003609486e-06,
"loss": 1.5367,
"step": 10940
},
{
"epoch": 2.58,
"learning_rate": 2.3656224421364724e-06,
"loss": 1.5717,
"step": 10950
},
{
"epoch": 2.58,
"learning_rate": 2.339479883821968e-06,
"loss": 1.5522,
"step": 10960
},
{
"epoch": 2.59,
"learning_rate": 2.3134754880587307e-06,
"loss": 1.5375,
"step": 10970
},
{
"epoch": 2.59,
"learning_rate": 2.2876094133971154e-06,
"loss": 1.5232,
"step": 10980
},
{
"epoch": 2.59,
"learning_rate": 2.261881817544137e-06,
"loss": 1.5888,
"step": 10990
},
{
"epoch": 2.59,
"learning_rate": 2.2362928573624877e-06,
"loss": 1.5925,
"step": 11000
},
{
"epoch": 2.59,
"eval_loss": 1.576684594154358,
"eval_runtime": 121.7819,
"eval_samples_per_second": 11.258,
"eval_steps_per_second": 2.817,
"step": 11000
},
{
"epoch": 2.6,
"learning_rate": 2.210842688869591e-06,
"loss": 1.6087,
"step": 11010
},
{
"epoch": 2.6,
"learning_rate": 2.1855314672366568e-06,
"loss": 1.5736,
"step": 11020
},
{
"epoch": 2.6,
"learning_rate": 2.1603593467877243e-06,
"loss": 1.5425,
"step": 11030
},
{
"epoch": 2.6,
"learning_rate": 2.135326480998717e-06,
"loss": 1.5811,
"step": 11040
},
{
"epoch": 2.61,
"learning_rate": 2.1104330224965247e-06,
"loss": 1.5116,
"step": 11050
},
{
"epoch": 2.61,
"learning_rate": 2.0856791230580484e-06,
"loss": 1.5301,
"step": 11060
},
{
"epoch": 2.61,
"learning_rate": 2.0610649336093134e-06,
"loss": 1.5275,
"step": 11070
},
{
"epoch": 2.61,
"learning_rate": 2.036590604224503e-06,
"loss": 1.5363,
"step": 11080
},
{
"epoch": 2.61,
"learning_rate": 2.012256284125072e-06,
"loss": 1.4919,
"step": 11090
},
{
"epoch": 2.62,
"learning_rate": 1.9880621216788298e-06,
"loss": 1.627,
"step": 11100
},
{
"epoch": 2.62,
"learning_rate": 1.9640082643990394e-06,
"loss": 1.5766,
"step": 11110
},
{
"epoch": 2.62,
"learning_rate": 1.9400948589435088e-06,
"loss": 1.5139,
"step": 11120
},
{
"epoch": 2.62,
"learning_rate": 1.9163220511137114e-06,
"loss": 1.5513,
"step": 11130
},
{
"epoch": 2.63,
"learning_rate": 1.8926899858538794e-06,
"loss": 1.5805,
"step": 11140
},
{
"epoch": 2.63,
"learning_rate": 1.8691988072501359e-06,
"loss": 1.5263,
"step": 11150
},
{
"epoch": 2.63,
"learning_rate": 1.845848658529606e-06,
"loss": 1.528,
"step": 11160
},
{
"epoch": 2.63,
"learning_rate": 1.8226396820595431e-06,
"loss": 1.5566,
"step": 11170
},
{
"epoch": 2.64,
"learning_rate": 1.7995720193464766e-06,
"loss": 1.5577,
"step": 11180
},
{
"epoch": 2.64,
"learning_rate": 1.7766458110353297e-06,
"loss": 1.5529,
"step": 11190
},
{
"epoch": 2.64,
"learning_rate": 1.753861196908571e-06,
"loss": 1.5468,
"step": 11200
},
{
"epoch": 2.64,
"learning_rate": 1.7312183158853524e-06,
"loss": 1.568,
"step": 11210
},
{
"epoch": 2.65,
"learning_rate": 1.7087173060206879e-06,
"loss": 1.5648,
"step": 11220
},
{
"epoch": 2.65,
"learning_rate": 1.6863583045045816e-06,
"loss": 1.5755,
"step": 11230
},
{
"epoch": 2.65,
"learning_rate": 1.6641414476612077e-06,
"loss": 1.5526,
"step": 11240
},
{
"epoch": 2.65,
"learning_rate": 1.642066870948078e-06,
"loss": 1.5661,
"step": 11250
},
{
"epoch": 2.65,
"learning_rate": 1.6201347089552038e-06,
"loss": 1.5303,
"step": 11260
},
{
"epoch": 2.66,
"learning_rate": 1.598345095404305e-06,
"loss": 1.5469,
"step": 11270
},
{
"epoch": 2.66,
"learning_rate": 1.576698163147955e-06,
"loss": 1.5015,
"step": 11280
},
{
"epoch": 2.66,
"learning_rate": 1.5551940441688034e-06,
"loss": 1.6044,
"step": 11290
},
{
"epoch": 2.66,
"learning_rate": 1.5338328695787496e-06,
"loss": 1.4745,
"step": 11300
},
{
"epoch": 2.67,
"learning_rate": 1.512614769618162e-06,
"loss": 1.5845,
"step": 11310
},
{
"epoch": 2.67,
"learning_rate": 1.491539873655068e-06,
"loss": 1.5262,
"step": 11320
},
{
"epoch": 2.67,
"learning_rate": 1.4706083101843737e-06,
"loss": 1.5452,
"step": 11330
},
{
"epoch": 2.67,
"learning_rate": 1.449820206827071e-06,
"loss": 1.4844,
"step": 11340
},
{
"epoch": 2.68,
"learning_rate": 1.4291756903294845e-06,
"loss": 1.5662,
"step": 11350
},
{
"epoch": 2.68,
"learning_rate": 1.4086748865624666e-06,
"loss": 1.5769,
"step": 11360
},
{
"epoch": 2.68,
"learning_rate": 1.3883179205206459e-06,
"loss": 1.5101,
"step": 11370
},
{
"epoch": 2.68,
"learning_rate": 1.3681049163216664e-06,
"loss": 1.6027,
"step": 11380
},
{
"epoch": 2.69,
"learning_rate": 1.3480359972054325e-06,
"loss": 1.5928,
"step": 11390
},
{
"epoch": 2.69,
"learning_rate": 1.3281112855333428e-06,
"loss": 1.4951,
"step": 11400
},
{
"epoch": 2.69,
"learning_rate": 1.3083309027875663e-06,
"loss": 1.5245,
"step": 11410
},
{
"epoch": 2.69,
"learning_rate": 1.2886949695702782e-06,
"loss": 1.5047,
"step": 11420
},
{
"epoch": 2.69,
"learning_rate": 1.269203605602942e-06,
"loss": 1.5346,
"step": 11430
},
{
"epoch": 2.7,
"learning_rate": 1.249856929725579e-06,
"loss": 1.4552,
"step": 11440
},
{
"epoch": 2.7,
"learning_rate": 1.2306550598960298e-06,
"loss": 1.6088,
"step": 11450
},
{
"epoch": 2.7,
"learning_rate": 1.2115981131892469e-06,
"loss": 1.5466,
"step": 11460
},
{
"epoch": 2.7,
"learning_rate": 1.1926862057965755e-06,
"loss": 1.5386,
"step": 11470
},
{
"epoch": 2.71,
"learning_rate": 1.1739194530250574e-06,
"loss": 1.6025,
"step": 11480
},
{
"epoch": 2.71,
"learning_rate": 1.1552979692967064e-06,
"loss": 1.5199,
"step": 11490
},
{
"epoch": 2.71,
"learning_rate": 1.1368218681478276e-06,
"loss": 1.5507,
"step": 11500
},
{
"epoch": 2.71,
"learning_rate": 1.1184912622283133e-06,
"loss": 1.5485,
"step": 11510
},
{
"epoch": 2.72,
"learning_rate": 1.1003062633009765e-06,
"loss": 1.5408,
"step": 11520
},
{
"epoch": 2.72,
"learning_rate": 1.0822669822408427e-06,
"loss": 1.5907,
"step": 11530
},
{
"epoch": 2.72,
"learning_rate": 1.064373529034493e-06,
"loss": 1.581,
"step": 11540
},
{
"epoch": 2.72,
"learning_rate": 1.0466260127793808e-06,
"loss": 1.5346,
"step": 11550
},
{
"epoch": 2.73,
"learning_rate": 1.0290245416831823e-06,
"loss": 1.5924,
"step": 11560
},
{
"epoch": 2.73,
"learning_rate": 1.0115692230631245e-06,
"loss": 1.4831,
"step": 11570
},
{
"epoch": 2.73,
"learning_rate": 9.942601633453313e-07,
"loss": 1.5182,
"step": 11580
},
{
"epoch": 2.73,
"learning_rate": 9.77097468064178e-07,
"loss": 1.5762,
"step": 11590
},
{
"epoch": 2.74,
"learning_rate": 9.600812418616434e-07,
"loss": 1.5347,
"step": 11600
},
{
"epoch": 2.74,
"learning_rate": 9.432115884866865e-07,
"loss": 1.5561,
"step": 11610
},
{
"epoch": 2.74,
"learning_rate": 9.264886107945986e-07,
"loss": 1.5222,
"step": 11620
},
{
"epoch": 2.74,
"learning_rate": 9.099124107463718e-07,
"loss": 1.5198,
"step": 11630
},
{
"epoch": 2.74,
"learning_rate": 8.934830894080897e-07,
"loss": 1.5235,
"step": 11640
},
{
"epoch": 2.75,
"learning_rate": 8.772007469503241e-07,
"loss": 1.547,
"step": 11650
},
{
"epoch": 2.75,
"learning_rate": 8.610654826474828e-07,
"loss": 1.5714,
"step": 11660
},
{
"epoch": 2.75,
"learning_rate": 8.450773948772445e-07,
"loss": 1.5183,
"step": 11670
},
{
"epoch": 2.75,
"learning_rate": 8.292365811199381e-07,
"loss": 1.5648,
"step": 11680
},
{
"epoch": 2.76,
"learning_rate": 8.135431379579589e-07,
"loss": 1.6074,
"step": 11690
},
{
"epoch": 2.76,
"learning_rate": 7.979971610751701e-07,
"loss": 1.6357,
"step": 11700
},
{
"epoch": 2.76,
"learning_rate": 7.825987452563271e-07,
"loss": 1.584,
"step": 11710
},
{
"epoch": 2.76,
"learning_rate": 7.673479843864933e-07,
"loss": 1.5438,
"step": 11720
},
{
"epoch": 2.77,
"learning_rate": 7.522449714504748e-07,
"loss": 1.5387,
"step": 11730
},
{
"epoch": 2.77,
"learning_rate": 7.37289798532248e-07,
"loss": 1.5856,
"step": 11740
},
{
"epoch": 2.77,
"learning_rate": 7.224825568143967e-07,
"loss": 1.5303,
"step": 11750
},
{
"epoch": 2.77,
"learning_rate": 7.078233365775677e-07,
"loss": 1.5315,
"step": 11760
},
{
"epoch": 2.78,
"learning_rate": 6.933122271998993e-07,
"loss": 1.5769,
"step": 11770
},
{
"epoch": 2.78,
"learning_rate": 6.789493171565048e-07,
"loss": 1.5182,
"step": 11780
},
{
"epoch": 2.78,
"learning_rate": 6.647346940189037e-07,
"loss": 1.5813,
"step": 11790
},
{
"epoch": 2.78,
"learning_rate": 6.50668444454508e-07,
"loss": 1.5716,
"step": 11800
},
{
"epoch": 2.78,
"learning_rate": 6.367506542260842e-07,
"loss": 1.5617,
"step": 11810
},
{
"epoch": 2.79,
"learning_rate": 6.229814081912366e-07,
"loss": 1.6197,
"step": 11820
},
{
"epoch": 2.79,
"learning_rate": 6.093607903018828e-07,
"loss": 1.5667,
"step": 11830
},
{
"epoch": 2.79,
"learning_rate": 5.958888836037513e-07,
"loss": 1.5824,
"step": 11840
},
{
"epoch": 2.79,
"learning_rate": 5.825657702358572e-07,
"loss": 1.5595,
"step": 11850
},
{
"epoch": 2.8,
"learning_rate": 5.69391531430033e-07,
"loss": 1.5121,
"step": 11860
},
{
"epoch": 2.8,
"learning_rate": 5.563662475103982e-07,
"loss": 1.4909,
"step": 11870
},
{
"epoch": 2.8,
"learning_rate": 5.434899978928904e-07,
"loss": 1.5018,
"step": 11880
},
{
"epoch": 2.8,
"learning_rate": 5.307628610847798e-07,
"loss": 1.5611,
"step": 11890
},
{
"epoch": 2.81,
"learning_rate": 5.18184914684175e-07,
"loss": 1.5067,
"step": 11900
},
{
"epoch": 2.81,
"learning_rate": 5.057562353795813e-07,
"loss": 1.5029,
"step": 11910
},
{
"epoch": 2.81,
"learning_rate": 4.934768989493938e-07,
"loss": 1.6069,
"step": 11920
},
{
"epoch": 2.81,
"learning_rate": 4.813469802614684e-07,
"loss": 1.5995,
"step": 11930
},
{
"epoch": 2.82,
"learning_rate": 4.69366553272646e-07,
"loss": 1.5461,
"step": 11940
},
{
"epoch": 2.82,
"learning_rate": 4.5753569102831016e-07,
"loss": 1.5528,
"step": 11950
},
{
"epoch": 2.82,
"learning_rate": 4.4585446566193236e-07,
"loss": 1.6161,
"step": 11960
},
{
"epoch": 2.82,
"learning_rate": 4.343229483946526e-07,
"loss": 1.6181,
"step": 11970
},
{
"epoch": 2.82,
"learning_rate": 4.2294120953482173e-07,
"loss": 1.5454,
"step": 11980
},
{
"epoch": 2.83,
"learning_rate": 4.117093184775822e-07,
"loss": 1.524,
"step": 11990
},
{
"epoch": 2.83,
"learning_rate": 4.006273437044489e-07,
"loss": 1.5892,
"step": 12000
},
{
"epoch": 2.83,
"eval_loss": 1.5761640071868896,
"eval_runtime": 120.7457,
"eval_samples_per_second": 11.354,
"eval_steps_per_second": 2.841,
"step": 12000
}
],
"max_steps": 12723,
"num_train_epochs": 3,
"total_flos": 4.293883720613929e+18,
"trial_name": null,
"trial_params": null
}