where-lambo-checkpoints2 / trainer_state.json
amazingvince's picture
Upload folder using huggingface_hub
98bad92
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.056865781523698304,
"eval_steps": 400,
"global_step": 1400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 8.097165991902834e-09,
"loss": 3.5463,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 4.048582995951417e-08,
"loss": 3.6591,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 8.097165991902834e-08,
"loss": 3.6377,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.214574898785425e-07,
"loss": 3.3634,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 1.6194331983805668e-07,
"loss": 3.069,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.0242914979757083e-07,
"loss": 2.6403,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 2.42914979757085e-07,
"loss": 2.1059,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 2.8340080971659917e-07,
"loss": 1.7116,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 3.2388663967611335e-07,
"loss": 1.2154,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 3.6437246963562754e-07,
"loss": 1.1809,
"step": 45
},
{
"epoch": 0.0,
"learning_rate": 4.0485829959514166e-07,
"loss": 1.1222,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 4.4534412955465585e-07,
"loss": 1.0215,
"step": 55
},
{
"epoch": 0.0,
"learning_rate": 4.8582995951417e-07,
"loss": 0.9725,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 5.263157894736842e-07,
"loss": 0.9506,
"step": 65
},
{
"epoch": 0.0,
"learning_rate": 5.668016194331983e-07,
"loss": 0.8981,
"step": 70
},
{
"epoch": 0.0,
"learning_rate": 6.072874493927125e-07,
"loss": 0.8955,
"step": 75
},
{
"epoch": 0.0,
"learning_rate": 6.477732793522267e-07,
"loss": 0.8819,
"step": 80
},
{
"epoch": 0.0,
"learning_rate": 6.882591093117408e-07,
"loss": 0.9384,
"step": 85
},
{
"epoch": 0.0,
"learning_rate": 7.287449392712551e-07,
"loss": 0.9053,
"step": 90
},
{
"epoch": 0.0,
"learning_rate": 7.692307692307693e-07,
"loss": 0.8439,
"step": 95
},
{
"epoch": 0.0,
"learning_rate": 8.097165991902833e-07,
"loss": 0.8176,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 8.502024291497975e-07,
"loss": 0.8403,
"step": 105
},
{
"epoch": 0.0,
"learning_rate": 8.906882591093117e-07,
"loss": 0.8365,
"step": 110
},
{
"epoch": 0.0,
"learning_rate": 9.311740890688259e-07,
"loss": 0.8354,
"step": 115
},
{
"epoch": 0.0,
"learning_rate": 9.7165991902834e-07,
"loss": 0.7901,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 1.0121457489878542e-06,
"loss": 0.8182,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 1.0526315789473683e-06,
"loss": 0.8021,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 1.0931174089068826e-06,
"loss": 0.7999,
"step": 135
},
{
"epoch": 0.01,
"learning_rate": 1.1336032388663967e-06,
"loss": 0.7896,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 1.1740890688259108e-06,
"loss": 0.8225,
"step": 145
},
{
"epoch": 0.01,
"learning_rate": 1.214574898785425e-06,
"loss": 0.8044,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 1.2550607287449393e-06,
"loss": 0.7872,
"step": 155
},
{
"epoch": 0.01,
"learning_rate": 1.2955465587044534e-06,
"loss": 0.826,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 1.3360323886639675e-06,
"loss": 0.772,
"step": 165
},
{
"epoch": 0.01,
"learning_rate": 1.3765182186234816e-06,
"loss": 0.7842,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 1.4170040485829959e-06,
"loss": 0.7833,
"step": 175
},
{
"epoch": 0.01,
"learning_rate": 1.4574898785425101e-06,
"loss": 0.7697,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 1.4979757085020242e-06,
"loss": 0.7515,
"step": 185
},
{
"epoch": 0.01,
"learning_rate": 1.5384615384615385e-06,
"loss": 0.7914,
"step": 190
},
{
"epoch": 0.01,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.7729,
"step": 195
},
{
"epoch": 0.01,
"learning_rate": 1.6194331983805667e-06,
"loss": 0.7222,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 1.6599190283400807e-06,
"loss": 0.7658,
"step": 205
},
{
"epoch": 0.01,
"learning_rate": 1.700404858299595e-06,
"loss": 0.7895,
"step": 210
},
{
"epoch": 0.01,
"learning_rate": 1.7408906882591093e-06,
"loss": 0.7526,
"step": 215
},
{
"epoch": 0.01,
"learning_rate": 1.7813765182186234e-06,
"loss": 0.7686,
"step": 220
},
{
"epoch": 0.01,
"learning_rate": 1.8218623481781377e-06,
"loss": 0.7364,
"step": 225
},
{
"epoch": 0.01,
"learning_rate": 1.8623481781376518e-06,
"loss": 0.7588,
"step": 230
},
{
"epoch": 0.01,
"learning_rate": 1.9028340080971658e-06,
"loss": 0.7637,
"step": 235
},
{
"epoch": 0.01,
"learning_rate": 1.94331983805668e-06,
"loss": 0.755,
"step": 240
},
{
"epoch": 0.01,
"learning_rate": 1.983805668016194e-06,
"loss": 0.7491,
"step": 245
},
{
"epoch": 0.01,
"learning_rate": 1.9999999252295637e-06,
"loss": 0.813,
"step": 250
},
{
"epoch": 0.01,
"learning_rate": 1.9999994682991603e-06,
"loss": 0.7609,
"step": 255
},
{
"epoch": 0.01,
"learning_rate": 1.999998595977674e-06,
"loss": 0.7429,
"step": 260
},
{
"epoch": 0.01,
"learning_rate": 1.999997308265467e-06,
"loss": 0.7488,
"step": 265
},
{
"epoch": 0.01,
"learning_rate": 1.999995605163075e-06,
"loss": 0.7678,
"step": 270
},
{
"epoch": 0.01,
"learning_rate": 1.9999934866712048e-06,
"loss": 0.765,
"step": 275
},
{
"epoch": 0.01,
"learning_rate": 1.9999909527907367e-06,
"loss": 0.7583,
"step": 280
},
{
"epoch": 0.01,
"learning_rate": 1.9999880035227236e-06,
"loss": 0.7759,
"step": 285
},
{
"epoch": 0.01,
"learning_rate": 1.9999846388683895e-06,
"loss": 0.774,
"step": 290
},
{
"epoch": 0.01,
"learning_rate": 1.9999808588291327e-06,
"loss": 0.77,
"step": 295
},
{
"epoch": 0.01,
"learning_rate": 1.999976663406524e-06,
"loss": 0.7639,
"step": 300
},
{
"epoch": 0.01,
"learning_rate": 1.999972052602305e-06,
"loss": 0.7397,
"step": 305
},
{
"epoch": 0.01,
"learning_rate": 1.999967026418392e-06,
"loss": 0.7814,
"step": 310
},
{
"epoch": 0.01,
"learning_rate": 1.999961584856872e-06,
"loss": 0.7255,
"step": 315
},
{
"epoch": 0.01,
"learning_rate": 1.9999557279200056e-06,
"loss": 0.7316,
"step": 320
},
{
"epoch": 0.01,
"learning_rate": 1.9999494556102263e-06,
"loss": 0.7047,
"step": 325
},
{
"epoch": 0.01,
"learning_rate": 1.9999427679301387e-06,
"loss": 0.7689,
"step": 330
},
{
"epoch": 0.01,
"learning_rate": 1.999935664882522e-06,
"loss": 0.7218,
"step": 335
},
{
"epoch": 0.01,
"learning_rate": 1.9999281464703247e-06,
"loss": 0.7172,
"step": 340
},
{
"epoch": 0.01,
"learning_rate": 1.999920212696672e-06,
"loss": 0.7462,
"step": 345
},
{
"epoch": 0.01,
"learning_rate": 1.999911863564859e-06,
"loss": 0.7158,
"step": 350
},
{
"epoch": 0.01,
"learning_rate": 1.9999030990783527e-06,
"loss": 0.7133,
"step": 355
},
{
"epoch": 0.01,
"learning_rate": 1.999893919240795e-06,
"loss": 0.7068,
"step": 360
},
{
"epoch": 0.01,
"learning_rate": 1.9998843240559986e-06,
"loss": 0.7682,
"step": 365
},
{
"epoch": 0.02,
"learning_rate": 1.9998743135279497e-06,
"loss": 0.7442,
"step": 370
},
{
"epoch": 0.02,
"learning_rate": 1.999863887660806e-06,
"loss": 0.7516,
"step": 375
},
{
"epoch": 0.02,
"learning_rate": 1.999853046458899e-06,
"loss": 0.6991,
"step": 380
},
{
"epoch": 0.02,
"learning_rate": 1.9998417899267313e-06,
"loss": 0.7608,
"step": 385
},
{
"epoch": 0.02,
"learning_rate": 1.999830118068979e-06,
"loss": 0.7307,
"step": 390
},
{
"epoch": 0.02,
"learning_rate": 1.999818030890491e-06,
"loss": 0.7196,
"step": 395
},
{
"epoch": 0.02,
"learning_rate": 1.999805528396288e-06,
"loss": 0.7533,
"step": 400
},
{
"epoch": 0.02,
"eval_loss": 0.7047787308692932,
"eval_runtime": 138.0084,
"eval_samples_per_second": 17.144,
"eval_steps_per_second": 2.862,
"step": 400
},
{
"epoch": 0.02,
"learning_rate": 1.9997926105915627e-06,
"loss": 0.7101,
"step": 405
},
{
"epoch": 0.02,
"learning_rate": 1.999779277481682e-06,
"loss": 0.7487,
"step": 410
},
{
"epoch": 0.02,
"learning_rate": 1.9997655290721834e-06,
"loss": 0.7254,
"step": 415
},
{
"epoch": 0.02,
"learning_rate": 1.9997513653687786e-06,
"loss": 0.7329,
"step": 420
},
{
"epoch": 0.02,
"learning_rate": 1.999736786377351e-06,
"loss": 0.7669,
"step": 425
},
{
"epoch": 0.02,
"learning_rate": 1.9997217921039567e-06,
"loss": 0.74,
"step": 430
},
{
"epoch": 0.02,
"learning_rate": 1.9997063825548237e-06,
"loss": 0.7122,
"step": 435
},
{
"epoch": 0.02,
"learning_rate": 1.9996905577363533e-06,
"loss": 0.759,
"step": 440
},
{
"epoch": 0.02,
"learning_rate": 1.9996743176551186e-06,
"loss": 0.7528,
"step": 445
},
{
"epoch": 0.02,
"learning_rate": 1.999657662317866e-06,
"loss": 0.7414,
"step": 450
},
{
"epoch": 0.02,
"learning_rate": 1.999640591731515e-06,
"loss": 0.7201,
"step": 455
},
{
"epoch": 0.02,
"learning_rate": 1.999623105903154e-06,
"loss": 0.7269,
"step": 460
},
{
"epoch": 0.02,
"learning_rate": 1.999605204840049e-06,
"loss": 0.7579,
"step": 465
},
{
"epoch": 0.02,
"learning_rate": 1.9995868885496343e-06,
"loss": 0.7396,
"step": 470
},
{
"epoch": 0.02,
"learning_rate": 1.9995681570395195e-06,
"loss": 0.782,
"step": 475
},
{
"epoch": 0.02,
"learning_rate": 1.9995490103174847e-06,
"loss": 0.7324,
"step": 480
},
{
"epoch": 0.02,
"learning_rate": 1.999529448391483e-06,
"loss": 0.7546,
"step": 485
},
{
"epoch": 0.02,
"learning_rate": 1.9995094712696413e-06,
"loss": 0.764,
"step": 490
},
{
"epoch": 0.02,
"learning_rate": 1.9994890789602576e-06,
"loss": 0.7337,
"step": 495
},
{
"epoch": 0.02,
"learning_rate": 1.999468271471802e-06,
"loss": 0.7331,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 1.9994470488129185e-06,
"loss": 0.7463,
"step": 505
},
{
"epoch": 0.02,
"learning_rate": 1.9994254109924223e-06,
"loss": 0.7241,
"step": 510
},
{
"epoch": 0.02,
"learning_rate": 1.9994033580193017e-06,
"loss": 0.7291,
"step": 515
},
{
"epoch": 0.02,
"learning_rate": 1.999380889902718e-06,
"loss": 0.7099,
"step": 520
},
{
"epoch": 0.02,
"learning_rate": 1.9993580066520034e-06,
"loss": 0.7436,
"step": 525
},
{
"epoch": 0.02,
"learning_rate": 1.9993347082766636e-06,
"loss": 0.7509,
"step": 530
},
{
"epoch": 0.02,
"learning_rate": 1.9993109947863764e-06,
"loss": 0.7072,
"step": 535
},
{
"epoch": 0.02,
"learning_rate": 1.999286866190993e-06,
"loss": 0.7369,
"step": 540
},
{
"epoch": 0.02,
"learning_rate": 1.999262322500535e-06,
"loss": 0.7032,
"step": 545
},
{
"epoch": 0.02,
"learning_rate": 1.9992373637251982e-06,
"loss": 0.7081,
"step": 550
},
{
"epoch": 0.02,
"learning_rate": 1.999211989875351e-06,
"loss": 0.7122,
"step": 555
},
{
"epoch": 0.02,
"learning_rate": 1.999186200961532e-06,
"loss": 0.7411,
"step": 560
},
{
"epoch": 0.02,
"learning_rate": 1.9991599969944552e-06,
"loss": 0.7329,
"step": 565
},
{
"epoch": 0.02,
"learning_rate": 1.9991333779850043e-06,
"loss": 0.7111,
"step": 570
},
{
"epoch": 0.02,
"learning_rate": 1.999106343944237e-06,
"loss": 0.7317,
"step": 575
},
{
"epoch": 0.02,
"learning_rate": 1.9990788948833833e-06,
"loss": 0.7427,
"step": 580
},
{
"epoch": 0.02,
"learning_rate": 1.999051030813845e-06,
"loss": 0.7158,
"step": 585
},
{
"epoch": 0.02,
"learning_rate": 1.999022751747197e-06,
"loss": 0.7271,
"step": 590
},
{
"epoch": 0.02,
"learning_rate": 1.998994057695185e-06,
"loss": 0.7145,
"step": 595
},
{
"epoch": 0.02,
"learning_rate": 1.99896494866973e-06,
"loss": 0.6823,
"step": 600
},
{
"epoch": 0.02,
"learning_rate": 1.9989354246829222e-06,
"loss": 0.7484,
"step": 605
},
{
"epoch": 0.02,
"learning_rate": 1.9989054857470267e-06,
"loss": 0.731,
"step": 610
},
{
"epoch": 0.02,
"learning_rate": 1.9988751318744787e-06,
"loss": 0.7809,
"step": 615
},
{
"epoch": 0.03,
"learning_rate": 1.998844363077888e-06,
"loss": 0.7207,
"step": 620
},
{
"epoch": 0.03,
"learning_rate": 1.998813179370035e-06,
"loss": 0.7359,
"step": 625
},
{
"epoch": 0.03,
"learning_rate": 1.9987815807638733e-06,
"loss": 0.6915,
"step": 630
},
{
"epoch": 0.03,
"learning_rate": 1.9987495672725294e-06,
"loss": 0.6988,
"step": 635
},
{
"epoch": 0.03,
"learning_rate": 1.9987171389093e-06,
"loss": 0.7673,
"step": 640
},
{
"epoch": 0.03,
"learning_rate": 1.998684295687657e-06,
"loss": 0.7086,
"step": 645
},
{
"epoch": 0.03,
"learning_rate": 1.998651037621242e-06,
"loss": 0.7789,
"step": 650
},
{
"epoch": 0.03,
"learning_rate": 1.9986173647238715e-06,
"loss": 0.7504,
"step": 655
},
{
"epoch": 0.03,
"learning_rate": 1.9985832770095313e-06,
"loss": 0.7214,
"step": 660
},
{
"epoch": 0.03,
"learning_rate": 1.998548774492382e-06,
"loss": 0.719,
"step": 665
},
{
"epoch": 0.03,
"learning_rate": 1.9985138571867557e-06,
"loss": 0.7286,
"step": 670
},
{
"epoch": 0.03,
"learning_rate": 1.998478525107157e-06,
"loss": 0.7362,
"step": 675
},
{
"epoch": 0.03,
"learning_rate": 1.998442778268262e-06,
"loss": 0.7099,
"step": 680
},
{
"epoch": 0.03,
"learning_rate": 1.99840661668492e-06,
"loss": 0.7514,
"step": 685
},
{
"epoch": 0.03,
"learning_rate": 1.998370040372151e-06,
"loss": 0.7663,
"step": 690
},
{
"epoch": 0.03,
"learning_rate": 1.99833304934515e-06,
"loss": 0.7008,
"step": 695
},
{
"epoch": 0.03,
"learning_rate": 1.9982956436192827e-06,
"loss": 0.7785,
"step": 700
},
{
"epoch": 0.03,
"learning_rate": 1.9982578232100866e-06,
"loss": 0.7303,
"step": 705
},
{
"epoch": 0.03,
"learning_rate": 1.9982195881332714e-06,
"loss": 0.7714,
"step": 710
},
{
"epoch": 0.03,
"learning_rate": 1.9981809384047207e-06,
"loss": 0.7396,
"step": 715
},
{
"epoch": 0.03,
"learning_rate": 1.9981418740404886e-06,
"loss": 0.7499,
"step": 720
},
{
"epoch": 0.03,
"learning_rate": 1.998102395056802e-06,
"loss": 0.7325,
"step": 725
},
{
"epoch": 0.03,
"learning_rate": 1.998062501470061e-06,
"loss": 0.7169,
"step": 730
},
{
"epoch": 0.03,
"learning_rate": 1.998022193296836e-06,
"loss": 0.7412,
"step": 735
},
{
"epoch": 0.03,
"learning_rate": 1.9979814705538715e-06,
"loss": 0.6935,
"step": 740
},
{
"epoch": 0.03,
"learning_rate": 1.997940333258083e-06,
"loss": 0.7245,
"step": 745
},
{
"epoch": 0.03,
"learning_rate": 1.9978987814265583e-06,
"loss": 0.7087,
"step": 750
},
{
"epoch": 0.03,
"learning_rate": 1.997856815076558e-06,
"loss": 0.6977,
"step": 755
},
{
"epoch": 0.03,
"learning_rate": 1.9978144342255147e-06,
"loss": 0.6989,
"step": 760
},
{
"epoch": 0.03,
"learning_rate": 1.9977716388910325e-06,
"loss": 0.7284,
"step": 765
},
{
"epoch": 0.03,
"learning_rate": 1.997728429090889e-06,
"loss": 0.7646,
"step": 770
},
{
"epoch": 0.03,
"learning_rate": 1.9976848048430323e-06,
"loss": 0.7415,
"step": 775
},
{
"epoch": 0.03,
"learning_rate": 1.9976407661655844e-06,
"loss": 0.7046,
"step": 780
},
{
"epoch": 0.03,
"learning_rate": 1.997596313076838e-06,
"loss": 0.6835,
"step": 785
},
{
"epoch": 0.03,
"learning_rate": 1.9975514455952584e-06,
"loss": 0.7351,
"step": 790
},
{
"epoch": 0.03,
"learning_rate": 1.9975061637394834e-06,
"loss": 0.7202,
"step": 795
},
{
"epoch": 0.03,
"learning_rate": 1.997460467528323e-06,
"loss": 0.7147,
"step": 800
},
{
"epoch": 0.03,
"eval_loss": 0.6881120800971985,
"eval_runtime": 138.0127,
"eval_samples_per_second": 17.143,
"eval_steps_per_second": 2.862,
"step": 800
},
{
"epoch": 0.03,
"learning_rate": 1.997414356980759e-06,
"loss": 0.7896,
"step": 805
},
{
"epoch": 0.03,
"learning_rate": 1.9973678321159443e-06,
"loss": 0.7029,
"step": 810
},
{
"epoch": 0.03,
"learning_rate": 1.9973208929532063e-06,
"loss": 0.7063,
"step": 815
},
{
"epoch": 0.03,
"learning_rate": 1.9972735395120418e-06,
"loss": 0.7171,
"step": 820
},
{
"epoch": 0.03,
"learning_rate": 1.997225771812122e-06,
"loss": 0.7217,
"step": 825
},
{
"epoch": 0.03,
"learning_rate": 1.9971775898732893e-06,
"loss": 0.725,
"step": 830
},
{
"epoch": 0.03,
"learning_rate": 1.9971289937155577e-06,
"loss": 0.7252,
"step": 835
},
{
"epoch": 0.03,
"learning_rate": 1.997079983359113e-06,
"loss": 0.7049,
"step": 840
},
{
"epoch": 0.03,
"learning_rate": 1.9970305588243145e-06,
"loss": 0.7056,
"step": 845
},
{
"epoch": 0.03,
"learning_rate": 1.9969807201316925e-06,
"loss": 0.6981,
"step": 850
},
{
"epoch": 0.03,
"learning_rate": 1.9969304673019494e-06,
"loss": 0.7154,
"step": 855
},
{
"epoch": 0.03,
"learning_rate": 1.99687980035596e-06,
"loss": 0.7277,
"step": 860
},
{
"epoch": 0.04,
"learning_rate": 1.996828719314771e-06,
"loss": 0.7181,
"step": 865
},
{
"epoch": 0.04,
"learning_rate": 1.996777224199601e-06,
"loss": 0.7028,
"step": 870
},
{
"epoch": 0.04,
"learning_rate": 1.99672531503184e-06,
"loss": 0.7336,
"step": 875
},
{
"epoch": 0.04,
"learning_rate": 1.996672991833051e-06,
"loss": 0.714,
"step": 880
},
{
"epoch": 0.04,
"learning_rate": 1.996620254624969e-06,
"loss": 0.713,
"step": 885
},
{
"epoch": 0.04,
"learning_rate": 1.9965671034295e-06,
"loss": 0.7295,
"step": 890
},
{
"epoch": 0.04,
"learning_rate": 1.996513538268723e-06,
"loss": 0.7798,
"step": 895
},
{
"epoch": 0.04,
"learning_rate": 1.9964595591648883e-06,
"loss": 0.7388,
"step": 900
},
{
"epoch": 0.04,
"learning_rate": 1.9964051661404185e-06,
"loss": 0.6813,
"step": 905
},
{
"epoch": 0.04,
"learning_rate": 1.9963503592179078e-06,
"loss": 0.7163,
"step": 910
},
{
"epoch": 0.04,
"learning_rate": 1.996295138420122e-06,
"loss": 0.7595,
"step": 915
},
{
"epoch": 0.04,
"learning_rate": 1.9962395037700007e-06,
"loss": 0.7457,
"step": 920
},
{
"epoch": 0.04,
"learning_rate": 1.996183455290653e-06,
"loss": 0.6897,
"step": 925
},
{
"epoch": 0.04,
"learning_rate": 1.996126993005361e-06,
"loss": 0.7031,
"step": 930
},
{
"epoch": 0.04,
"learning_rate": 1.996070116937579e-06,
"loss": 0.7177,
"step": 935
},
{
"epoch": 0.04,
"learning_rate": 1.9960128271109326e-06,
"loss": 0.6966,
"step": 940
},
{
"epoch": 0.04,
"learning_rate": 1.9959551235492195e-06,
"loss": 0.7391,
"step": 945
},
{
"epoch": 0.04,
"learning_rate": 1.9958970062764095e-06,
"loss": 0.7456,
"step": 950
},
{
"epoch": 0.04,
"learning_rate": 1.9958384753166437e-06,
"loss": 0.7072,
"step": 955
},
{
"epoch": 0.04,
"learning_rate": 1.995779530694236e-06,
"loss": 0.6894,
"step": 960
},
{
"epoch": 0.04,
"learning_rate": 1.9957201724336704e-06,
"loss": 0.704,
"step": 965
},
{
"epoch": 0.04,
"learning_rate": 1.9956604005596043e-06,
"loss": 0.6939,
"step": 970
},
{
"epoch": 0.04,
"learning_rate": 1.9956002150968667e-06,
"loss": 0.7045,
"step": 975
},
{
"epoch": 0.04,
"learning_rate": 1.9955396160704582e-06,
"loss": 0.6791,
"step": 980
},
{
"epoch": 0.04,
"learning_rate": 1.99547860350555e-06,
"loss": 0.675,
"step": 985
},
{
"epoch": 0.04,
"learning_rate": 1.995417177427488e-06,
"loss": 0.7163,
"step": 990
},
{
"epoch": 0.04,
"learning_rate": 1.9953553378617866e-06,
"loss": 0.6922,
"step": 995
},
{
"epoch": 0.04,
"learning_rate": 1.995293084834134e-06,
"loss": 0.7101,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 1.9952304183703893e-06,
"loss": 0.7109,
"step": 1005
},
{
"epoch": 0.04,
"learning_rate": 1.9951673384965835e-06,
"loss": 0.7103,
"step": 1010
},
{
"epoch": 0.04,
"learning_rate": 1.99510384523892e-06,
"loss": 0.7677,
"step": 1015
},
{
"epoch": 0.04,
"learning_rate": 1.995039938623773e-06,
"loss": 0.7371,
"step": 1020
},
{
"epoch": 0.04,
"learning_rate": 1.9949756186776893e-06,
"loss": 0.7204,
"step": 1025
},
{
"epoch": 0.04,
"learning_rate": 1.9949108854273855e-06,
"loss": 0.7271,
"step": 1030
},
{
"epoch": 0.04,
"learning_rate": 1.9948457388997528e-06,
"loss": 0.7031,
"step": 1035
},
{
"epoch": 0.04,
"learning_rate": 1.994780179121851e-06,
"loss": 0.7612,
"step": 1040
},
{
"epoch": 0.04,
"learning_rate": 1.994714206120914e-06,
"loss": 0.7234,
"step": 1045
},
{
"epoch": 0.04,
"learning_rate": 1.9946478199243466e-06,
"loss": 0.7187,
"step": 1050
},
{
"epoch": 0.04,
"learning_rate": 1.9945810205597246e-06,
"loss": 0.7004,
"step": 1055
},
{
"epoch": 0.04,
"learning_rate": 1.9945138080547957e-06,
"loss": 0.6932,
"step": 1060
},
{
"epoch": 0.04,
"learning_rate": 1.99444618243748e-06,
"loss": 0.7135,
"step": 1065
},
{
"epoch": 0.04,
"learning_rate": 1.994378143735868e-06,
"loss": 0.7056,
"step": 1070
},
{
"epoch": 0.04,
"learning_rate": 1.9943096919782225e-06,
"loss": 0.6984,
"step": 1075
},
{
"epoch": 0.04,
"learning_rate": 1.994240827192978e-06,
"loss": 0.6947,
"step": 1080
},
{
"epoch": 0.04,
"learning_rate": 1.9941715494087408e-06,
"loss": 0.7332,
"step": 1085
},
{
"epoch": 0.04,
"learning_rate": 1.9941018586542866e-06,
"loss": 0.6963,
"step": 1090
},
{
"epoch": 0.04,
"learning_rate": 1.9940317549585665e-06,
"loss": 0.7245,
"step": 1095
},
{
"epoch": 0.04,
"learning_rate": 1.9939612383506993e-06,
"loss": 0.769,
"step": 1100
},
{
"epoch": 0.04,
"learning_rate": 1.993890308859978e-06,
"loss": 0.7245,
"step": 1105
},
{
"epoch": 0.05,
"learning_rate": 1.9938189665158654e-06,
"loss": 0.6868,
"step": 1110
},
{
"epoch": 0.05,
"learning_rate": 1.9937472113479966e-06,
"loss": 0.7072,
"step": 1115
},
{
"epoch": 0.05,
"learning_rate": 1.9936750433861787e-06,
"loss": 0.7415,
"step": 1120
},
{
"epoch": 0.05,
"learning_rate": 1.993602462660389e-06,
"loss": 0.71,
"step": 1125
},
{
"epoch": 0.05,
"learning_rate": 1.993529469200777e-06,
"loss": 0.7006,
"step": 1130
},
{
"epoch": 0.05,
"learning_rate": 1.993456063037664e-06,
"loss": 0.6957,
"step": 1135
},
{
"epoch": 0.05,
"learning_rate": 1.9933822442015416e-06,
"loss": 0.733,
"step": 1140
},
{
"epoch": 0.05,
"learning_rate": 1.993308012723074e-06,
"loss": 0.7156,
"step": 1145
},
{
"epoch": 0.05,
"learning_rate": 1.993233368633096e-06,
"loss": 0.6977,
"step": 1150
},
{
"epoch": 0.05,
"learning_rate": 1.993158311962614e-06,
"loss": 0.6911,
"step": 1155
},
{
"epoch": 0.05,
"learning_rate": 1.9930828427428066e-06,
"loss": 0.7124,
"step": 1160
},
{
"epoch": 0.05,
"learning_rate": 1.9930069610050224e-06,
"loss": 0.7197,
"step": 1165
},
{
"epoch": 0.05,
"learning_rate": 1.9929306667807823e-06,
"loss": 0.7129,
"step": 1170
},
{
"epoch": 0.05,
"learning_rate": 1.992853960101778e-06,
"loss": 0.6775,
"step": 1175
},
{
"epoch": 0.05,
"learning_rate": 1.9927768409998733e-06,
"loss": 0.7333,
"step": 1180
},
{
"epoch": 0.05,
"learning_rate": 1.992699309507102e-06,
"loss": 0.6704,
"step": 1185
},
{
"epoch": 0.05,
"learning_rate": 1.992621365655671e-06,
"loss": 0.7136,
"step": 1190
},
{
"epoch": 0.05,
"learning_rate": 1.9925430094779566e-06,
"loss": 0.696,
"step": 1195
},
{
"epoch": 0.05,
"learning_rate": 1.9924642410065075e-06,
"loss": 0.7362,
"step": 1200
},
{
"epoch": 0.05,
"eval_loss": 0.6821444630622864,
"eval_runtime": 138.0679,
"eval_samples_per_second": 17.136,
"eval_steps_per_second": 2.861,
"step": 1200
},
{
"epoch": 0.05,
"learning_rate": 1.992385060274044e-06,
"loss": 0.6971,
"step": 1205
},
{
"epoch": 0.05,
"learning_rate": 1.9923054673134564e-06,
"loss": 0.7887,
"step": 1210
},
{
"epoch": 0.05,
"learning_rate": 1.992225462157807e-06,
"loss": 0.7134,
"step": 1215
},
{
"epoch": 0.05,
"learning_rate": 1.99214504484033e-06,
"loss": 0.7384,
"step": 1220
},
{
"epoch": 0.05,
"learning_rate": 1.9920642153944288e-06,
"loss": 0.7228,
"step": 1225
},
{
"epoch": 0.05,
"learning_rate": 1.9919829738536806e-06,
"loss": 0.6835,
"step": 1230
},
{
"epoch": 0.05,
"learning_rate": 1.991901320251831e-06,
"loss": 0.6922,
"step": 1235
},
{
"epoch": 0.05,
"learning_rate": 1.9918192546227995e-06,
"loss": 0.7258,
"step": 1240
},
{
"epoch": 0.05,
"learning_rate": 1.991736777000675e-06,
"loss": 0.7399,
"step": 1245
},
{
"epoch": 0.05,
"learning_rate": 1.9916538874197176e-06,
"loss": 0.7625,
"step": 1250
},
{
"epoch": 0.05,
"learning_rate": 1.9915705859143594e-06,
"loss": 0.6707,
"step": 1255
},
{
"epoch": 0.05,
"learning_rate": 1.9914868725192025e-06,
"loss": 0.6932,
"step": 1260
},
{
"epoch": 0.05,
"learning_rate": 1.991402747269022e-06,
"loss": 0.7425,
"step": 1265
},
{
"epoch": 0.05,
"learning_rate": 1.991318210198761e-06,
"loss": 0.7002,
"step": 1270
},
{
"epoch": 0.05,
"learning_rate": 1.991233261343537e-06,
"loss": 0.6766,
"step": 1275
},
{
"epoch": 0.05,
"learning_rate": 1.9911479007386364e-06,
"loss": 0.7258,
"step": 1280
},
{
"epoch": 0.05,
"learning_rate": 1.991062128419517e-06,
"loss": 0.7467,
"step": 1285
},
{
"epoch": 0.05,
"learning_rate": 1.9909759444218085e-06,
"loss": 0.722,
"step": 1290
},
{
"epoch": 0.05,
"learning_rate": 1.9908893487813106e-06,
"loss": 0.7107,
"step": 1295
},
{
"epoch": 0.05,
"learning_rate": 1.990802341533994e-06,
"loss": 0.7337,
"step": 1300
},
{
"epoch": 0.05,
"learning_rate": 1.9907149227160016e-06,
"loss": 0.7075,
"step": 1305
},
{
"epoch": 0.05,
"learning_rate": 1.9906270923636457e-06,
"loss": 0.7157,
"step": 1310
},
{
"epoch": 0.05,
"learning_rate": 1.9905388505134107e-06,
"loss": 0.6916,
"step": 1315
},
{
"epoch": 0.05,
"learning_rate": 1.990450197201951e-06,
"loss": 0.6997,
"step": 1320
},
{
"epoch": 0.05,
"learning_rate": 1.990361132466093e-06,
"loss": 0.7067,
"step": 1325
},
{
"epoch": 0.05,
"learning_rate": 1.9902716563428335e-06,
"loss": 0.7209,
"step": 1330
},
{
"epoch": 0.05,
"learning_rate": 1.9901817688693395e-06,
"loss": 0.7004,
"step": 1335
},
{
"epoch": 0.05,
"learning_rate": 1.99009147008295e-06,
"loss": 0.713,
"step": 1340
},
{
"epoch": 0.05,
"learning_rate": 1.9900007600211735e-06,
"loss": 0.6596,
"step": 1345
},
{
"epoch": 0.05,
"learning_rate": 1.9899096387216914e-06,
"loss": 0.7426,
"step": 1350
},
{
"epoch": 0.06,
"learning_rate": 1.9898181062223536e-06,
"loss": 0.7103,
"step": 1355
},
{
"epoch": 0.06,
"learning_rate": 1.9897261625611822e-06,
"loss": 0.6906,
"step": 1360
},
{
"epoch": 0.06,
"learning_rate": 1.9896338077763704e-06,
"loss": 0.7082,
"step": 1365
},
{
"epoch": 0.06,
"learning_rate": 1.989541041906281e-06,
"loss": 0.7135,
"step": 1370
},
{
"epoch": 0.06,
"learning_rate": 1.9894478649894484e-06,
"loss": 0.7033,
"step": 1375
},
{
"epoch": 0.06,
"learning_rate": 1.989354277064577e-06,
"loss": 0.7452,
"step": 1380
},
{
"epoch": 0.06,
"learning_rate": 1.9892602781705427e-06,
"loss": 0.6947,
"step": 1385
},
{
"epoch": 0.06,
"learning_rate": 1.9891658683463922e-06,
"loss": 0.7412,
"step": 1390
},
{
"epoch": 0.06,
"learning_rate": 1.989071047631342e-06,
"loss": 0.6646,
"step": 1395
},
{
"epoch": 0.06,
"learning_rate": 1.98897581606478e-06,
"loss": 0.6847,
"step": 1400
}
],
"logging_steps": 5,
"max_steps": 24619,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"total_flos": 194475417608192.0,
"trial_name": null,
"trial_params": null
}