text-normalization-ru-new / trainer_state.json
alexue4's picture
End of training
4312d76
raw
history blame
30.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 280880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.5609999287800016e-09,
"loss": 0.0011,
"step": 1
},
{
"epoch": 0.1,
"learning_rate": 5.003204899935903e-06,
"loss": 0.0046,
"step": 1405
},
{
"epoch": 0.2,
"learning_rate": 1.0006409799871805e-05,
"loss": 0.005,
"step": 2810
},
{
"epoch": 0.3,
"learning_rate": 1.5009614699807706e-05,
"loss": 0.0052,
"step": 4215
},
{
"epoch": 0.4,
"learning_rate": 2.001281959974361e-05,
"loss": 0.005,
"step": 5620
},
{
"epoch": 0.5,
"learning_rate": 2.5016024499679513e-05,
"loss": 0.0048,
"step": 7025
},
{
"epoch": 0.6,
"learning_rate": 3.001922939961541e-05,
"loss": 0.0045,
"step": 8430
},
{
"epoch": 0.7,
"learning_rate": 3.5022434299551316e-05,
"loss": 0.0049,
"step": 9835
},
{
"epoch": 0.8,
"learning_rate": 4.002563919948722e-05,
"loss": 0.0051,
"step": 11240
},
{
"epoch": 0.9,
"learning_rate": 4.502884409942312e-05,
"loss": 0.0052,
"step": 12645
},
{
"epoch": 1.0,
"eval_loss": 0.027173755690455437,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 13.945,
"eval_samples_per_second": 17.928,
"eval_steps_per_second": 1.219,
"step": 14041
},
{
"epoch": 1.0,
"learning_rate": 5.0032048999359025e-05,
"loss": 0.0045,
"step": 14050
},
{
"epoch": 1.1,
"learning_rate": 5.5035253899294924e-05,
"loss": 0.0047,
"step": 15455
},
{
"epoch": 1.2,
"learning_rate": 6.003845879923082e-05,
"loss": 0.0044,
"step": 16860
},
{
"epoch": 1.3,
"learning_rate": 6.504166369916673e-05,
"loss": 0.0045,
"step": 18265
},
{
"epoch": 1.4,
"learning_rate": 7.002990600968385e-05,
"loss": 0.0046,
"step": 19670
},
{
"epoch": 1.5,
"learning_rate": 7.50320421532327e-05,
"loss": 0.0043,
"step": 21075
},
{
"epoch": 1.6,
"learning_rate": 8.003417829678155e-05,
"loss": 0.0042,
"step": 22480
},
{
"epoch": 1.7,
"learning_rate": 8.503631444033039e-05,
"loss": 0.0046,
"step": 23885
},
{
"epoch": 1.8,
"learning_rate": 9.003845058387924e-05,
"loss": 0.0043,
"step": 25290
},
{
"epoch": 1.9,
"learning_rate": 9.504058672742809e-05,
"loss": 0.0045,
"step": 26695
},
{
"epoch": 2.0,
"eval_loss": 0.03272660821676254,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 12.7866,
"eval_samples_per_second": 19.552,
"eval_steps_per_second": 1.33,
"step": 28088
},
{
"epoch": 2.0,
"learning_rate": 9.99952530143359e-05,
"loss": 0.0046,
"step": 28100
},
{
"epoch": 2.1,
"learning_rate": 9.943946010949714e-05,
"loss": 0.0042,
"step": 29505
},
{
"epoch": 2.2,
"learning_rate": 9.888366720465838e-05,
"loss": 0.0043,
"step": 30910
},
{
"epoch": 2.3,
"learning_rate": 9.832787429981962e-05,
"loss": 0.0044,
"step": 32315
},
{
"epoch": 2.4,
"learning_rate": 9.777208139498087e-05,
"loss": 0.0045,
"step": 33720
},
{
"epoch": 2.5,
"learning_rate": 9.721628849014209e-05,
"loss": 0.0041,
"step": 35125
},
{
"epoch": 2.6,
"learning_rate": 9.666049558530333e-05,
"loss": 0.0043,
"step": 36530
},
{
"epoch": 2.7,
"learning_rate": 9.610470268046457e-05,
"loss": 0.0046,
"step": 37935
},
{
"epoch": 2.8,
"learning_rate": 9.554890977562582e-05,
"loss": 0.0043,
"step": 39340
},
{
"epoch": 2.9,
"learning_rate": 9.499311687078706e-05,
"loss": 0.0043,
"step": 40745
},
{
"epoch": 3.0,
"eval_loss": 0.03169206902384758,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.3553,
"eval_samples_per_second": 20.234,
"eval_steps_per_second": 1.376,
"step": 42132
},
{
"epoch": 3.0,
"learning_rate": 9.44373239659483e-05,
"loss": 0.0048,
"step": 42150
},
{
"epoch": 3.1,
"learning_rate": 9.388153106110953e-05,
"loss": 0.0034,
"step": 43555
},
{
"epoch": 3.2,
"learning_rate": 9.332573815627077e-05,
"loss": 0.0036,
"step": 44960
},
{
"epoch": 3.3,
"learning_rate": 9.276994525143201e-05,
"loss": 0.0037,
"step": 46365
},
{
"epoch": 3.4,
"learning_rate": 9.221415234659325e-05,
"loss": 0.004,
"step": 47770
},
{
"epoch": 3.5,
"learning_rate": 9.16583594417545e-05,
"loss": 0.0041,
"step": 49175
},
{
"epoch": 3.6,
"learning_rate": 9.110256653691574e-05,
"loss": 0.0039,
"step": 50580
},
{
"epoch": 3.7,
"learning_rate": 9.054677363207696e-05,
"loss": 0.0039,
"step": 51985
},
{
"epoch": 3.8,
"learning_rate": 8.99909807272382e-05,
"loss": 0.0039,
"step": 53390
},
{
"epoch": 3.9,
"learning_rate": 8.943518782239945e-05,
"loss": 0.0042,
"step": 54795
},
{
"epoch": 4.0,
"eval_loss": 0.031554438173770905,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.1943,
"eval_samples_per_second": 20.501,
"eval_steps_per_second": 1.394,
"step": 56176
},
{
"epoch": 4.0,
"learning_rate": 8.887939491756069e-05,
"loss": 0.004,
"step": 56200
},
{
"epoch": 4.1,
"learning_rate": 8.832360201272193e-05,
"loss": 0.0034,
"step": 57605
},
{
"epoch": 4.2,
"learning_rate": 8.776780910788317e-05,
"loss": 0.0033,
"step": 59010
},
{
"epoch": 4.3,
"learning_rate": 8.72120162030444e-05,
"loss": 0.0034,
"step": 60415
},
{
"epoch": 4.4,
"learning_rate": 8.665622329820564e-05,
"loss": 0.0034,
"step": 61820
},
{
"epoch": 4.5,
"learning_rate": 8.610043039336688e-05,
"loss": 0.0032,
"step": 63225
},
{
"epoch": 4.6,
"learning_rate": 8.554463748852812e-05,
"loss": 0.0035,
"step": 64630
},
{
"epoch": 4.7,
"learning_rate": 8.498884458368937e-05,
"loss": 0.0035,
"step": 66035
},
{
"epoch": 4.8,
"learning_rate": 8.443305167885061e-05,
"loss": 0.0035,
"step": 67440
},
{
"epoch": 4.9,
"learning_rate": 8.387725877401183e-05,
"loss": 0.0035,
"step": 68845
},
{
"epoch": 5.0,
"eval_loss": 0.03568544238805771,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.3273,
"eval_samples_per_second": 20.28,
"eval_steps_per_second": 1.379,
"step": 70220
},
{
"epoch": 5.0,
"learning_rate": 8.332146586917307e-05,
"loss": 0.0037,
"step": 70250
},
{
"epoch": 5.1,
"learning_rate": 8.276567296433432e-05,
"loss": 0.0028,
"step": 71655
},
{
"epoch": 5.2,
"learning_rate": 8.220988005949556e-05,
"loss": 0.0029,
"step": 73060
},
{
"epoch": 5.3,
"learning_rate": 8.16540871546568e-05,
"loss": 0.003,
"step": 74465
},
{
"epoch": 5.4,
"learning_rate": 8.109829424981804e-05,
"loss": 0.0031,
"step": 75870
},
{
"epoch": 5.5,
"learning_rate": 8.054250134497927e-05,
"loss": 0.0032,
"step": 77275
},
{
"epoch": 5.6,
"learning_rate": 7.998670844014051e-05,
"loss": 0.003,
"step": 78680
},
{
"epoch": 5.7,
"learning_rate": 7.943091553530175e-05,
"loss": 0.0032,
"step": 80085
},
{
"epoch": 5.8,
"learning_rate": 7.8875122630463e-05,
"loss": 0.0031,
"step": 81490
},
{
"epoch": 5.9,
"learning_rate": 7.831932972562424e-05,
"loss": 0.0032,
"step": 82895
},
{
"epoch": 6.0,
"eval_loss": 0.03648597374558449,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.1121,
"eval_samples_per_second": 20.641,
"eval_steps_per_second": 1.404,
"step": 84264
},
{
"epoch": 6.0,
"learning_rate": 7.776353682078548e-05,
"loss": 0.0032,
"step": 84300
},
{
"epoch": 6.1,
"learning_rate": 7.72077439159467e-05,
"loss": 0.0027,
"step": 85705
},
{
"epoch": 6.2,
"learning_rate": 7.665195101110795e-05,
"loss": 0.0026,
"step": 87110
},
{
"epoch": 6.3,
"learning_rate": 7.609615810626919e-05,
"loss": 0.0028,
"step": 88515
},
{
"epoch": 6.4,
"learning_rate": 7.554036520143043e-05,
"loss": 0.0029,
"step": 89920
},
{
"epoch": 6.5,
"learning_rate": 7.498457229659167e-05,
"loss": 0.0026,
"step": 91325
},
{
"epoch": 6.6,
"learning_rate": 7.442877939175292e-05,
"loss": 0.0028,
"step": 92730
},
{
"epoch": 6.7,
"learning_rate": 7.387298648691414e-05,
"loss": 0.0031,
"step": 94135
},
{
"epoch": 6.8,
"learning_rate": 7.331719358207538e-05,
"loss": 0.0027,
"step": 95540
},
{
"epoch": 6.9,
"learning_rate": 7.276140067723662e-05,
"loss": 0.0027,
"step": 96945
},
{
"epoch": 7.0,
"eval_loss": 0.04028007388114929,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.1827,
"eval_samples_per_second": 20.521,
"eval_steps_per_second": 1.395,
"step": 98308
},
{
"epoch": 7.0,
"learning_rate": 7.220560777239787e-05,
"loss": 0.0028,
"step": 98350
},
{
"epoch": 7.1,
"learning_rate": 7.164981486755911e-05,
"loss": 0.0027,
"step": 99755
},
{
"epoch": 7.2,
"learning_rate": 7.109402196272035e-05,
"loss": 0.0025,
"step": 101160
},
{
"epoch": 7.3,
"learning_rate": 7.053822905788158e-05,
"loss": 0.0025,
"step": 102565
},
{
"epoch": 7.4,
"learning_rate": 6.998243615304282e-05,
"loss": 0.0024,
"step": 103970
},
{
"epoch": 7.5,
"learning_rate": 6.942664324820406e-05,
"loss": 0.0026,
"step": 105375
},
{
"epoch": 7.6,
"learning_rate": 6.88708503433653e-05,
"loss": 0.0026,
"step": 106780
},
{
"epoch": 7.7,
"learning_rate": 6.831505743852654e-05,
"loss": 0.0026,
"step": 108185
},
{
"epoch": 7.8,
"learning_rate": 6.775926453368779e-05,
"loss": 0.0025,
"step": 109590
},
{
"epoch": 7.9,
"learning_rate": 6.720347162884901e-05,
"loss": 0.0027,
"step": 110995
},
{
"epoch": 8.0,
"eval_loss": 0.03977961093187332,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.1864,
"eval_samples_per_second": 20.515,
"eval_steps_per_second": 1.395,
"step": 112352
},
{
"epoch": 8.0,
"learning_rate": 6.664767872401025e-05,
"loss": 0.0028,
"step": 112400
},
{
"epoch": 8.1,
"learning_rate": 6.60918858191715e-05,
"loss": 0.0023,
"step": 113805
},
{
"epoch": 8.2,
"learning_rate": 6.553609291433274e-05,
"loss": 0.0023,
"step": 115210
},
{
"epoch": 8.3,
"learning_rate": 6.498030000949398e-05,
"loss": 0.0022,
"step": 116615
},
{
"epoch": 8.4,
"learning_rate": 6.442450710465522e-05,
"loss": 0.0023,
"step": 118020
},
{
"epoch": 8.5,
"learning_rate": 6.386871419981645e-05,
"loss": 0.0024,
"step": 119425
},
{
"epoch": 8.6,
"learning_rate": 6.331292129497769e-05,
"loss": 0.0025,
"step": 120830
},
{
"epoch": 8.7,
"learning_rate": 6.275712839013893e-05,
"loss": 0.0024,
"step": 122235
},
{
"epoch": 8.8,
"learning_rate": 6.220133548530017e-05,
"loss": 0.0022,
"step": 123640
},
{
"epoch": 8.9,
"learning_rate": 6.164554258046142e-05,
"loss": 0.0023,
"step": 125045
},
{
"epoch": 9.0,
"eval_loss": 0.040445487946271896,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.3201,
"eval_samples_per_second": 20.292,
"eval_steps_per_second": 1.38,
"step": 126396
},
{
"epoch": 9.0,
"learning_rate": 6.108974967562266e-05,
"loss": 0.0024,
"step": 126450
},
{
"epoch": 9.1,
"learning_rate": 6.053395677078388e-05,
"loss": 0.002,
"step": 127855
},
{
"epoch": 9.2,
"learning_rate": 5.9978163865945126e-05,
"loss": 0.0021,
"step": 129260
},
{
"epoch": 9.3,
"learning_rate": 5.9422370961106364e-05,
"loss": 0.0022,
"step": 130665
},
{
"epoch": 9.4,
"learning_rate": 5.886657805626761e-05,
"loss": 0.0023,
"step": 132070
},
{
"epoch": 9.5,
"learning_rate": 5.831078515142885e-05,
"loss": 0.0022,
"step": 133475
},
{
"epoch": 9.6,
"learning_rate": 5.775499224659009e-05,
"loss": 0.0021,
"step": 134880
},
{
"epoch": 9.7,
"learning_rate": 5.719919934175132e-05,
"loss": 0.0021,
"step": 136285
},
{
"epoch": 9.8,
"learning_rate": 5.664340643691256e-05,
"loss": 0.0021,
"step": 137690
},
{
"epoch": 9.9,
"learning_rate": 5.60876135320738e-05,
"loss": 0.0023,
"step": 139095
},
{
"epoch": 10.0,
"eval_loss": 0.03852245956659317,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.131,
"eval_samples_per_second": 20.608,
"eval_steps_per_second": 1.401,
"step": 140440
},
{
"epoch": 10.0,
"learning_rate": 5.5531820627235044e-05,
"loss": 0.0023,
"step": 140500
},
{
"epoch": 10.1,
"learning_rate": 5.497602772239628e-05,
"loss": 0.0018,
"step": 141905
},
{
"epoch": 10.2,
"learning_rate": 5.4420234817557526e-05,
"loss": 0.0021,
"step": 143310
},
{
"epoch": 10.3,
"learning_rate": 5.386444191271876e-05,
"loss": 0.0021,
"step": 144715
},
{
"epoch": 10.4,
"learning_rate": 5.3308649007879995e-05,
"loss": 0.0021,
"step": 146120
},
{
"epoch": 10.5,
"learning_rate": 5.275285610304124e-05,
"loss": 0.0021,
"step": 147525
},
{
"epoch": 10.6,
"learning_rate": 5.219706319820248e-05,
"loss": 0.002,
"step": 148930
},
{
"epoch": 10.7,
"learning_rate": 5.1641270293363716e-05,
"loss": 0.002,
"step": 150335
},
{
"epoch": 10.8,
"learning_rate": 5.108547738852496e-05,
"loss": 0.0019,
"step": 151740
},
{
"epoch": 10.9,
"learning_rate": 5.052968448368619e-05,
"loss": 0.002,
"step": 153145
},
{
"epoch": 11.0,
"eval_loss": 0.040671207010746,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.2718,
"eval_samples_per_second": 20.372,
"eval_steps_per_second": 1.385,
"step": 154484
},
{
"epoch": 11.0,
"learning_rate": 4.997389157884743e-05,
"loss": 0.0018,
"step": 154550
},
{
"epoch": 11.1,
"learning_rate": 4.9418098674008675e-05,
"loss": 0.002,
"step": 155955
},
{
"epoch": 11.2,
"learning_rate": 4.886230576916991e-05,
"loss": 0.0019,
"step": 157360
},
{
"epoch": 11.3,
"learning_rate": 4.830651286433115e-05,
"loss": 0.0018,
"step": 158765
},
{
"epoch": 11.4,
"learning_rate": 4.775071995949239e-05,
"loss": 0.0018,
"step": 160170
},
{
"epoch": 11.5,
"learning_rate": 4.7194927054653634e-05,
"loss": 0.002,
"step": 161575
},
{
"epoch": 11.6,
"learning_rate": 4.6639134149814865e-05,
"loss": 0.0019,
"step": 162980
},
{
"epoch": 11.7,
"learning_rate": 4.608334124497611e-05,
"loss": 0.0017,
"step": 164385
},
{
"epoch": 11.81,
"learning_rate": 4.552754834013735e-05,
"loss": 0.002,
"step": 165790
},
{
"epoch": 11.91,
"learning_rate": 4.4971755435298586e-05,
"loss": 0.0018,
"step": 167195
},
{
"epoch": 12.0,
"eval_loss": 0.04258317872881889,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 12.8224,
"eval_samples_per_second": 19.497,
"eval_steps_per_second": 1.326,
"step": 168528
},
{
"epoch": 12.01,
"learning_rate": 4.4415962530459824e-05,
"loss": 0.0017,
"step": 168600
},
{
"epoch": 12.11,
"learning_rate": 4.386016962562107e-05,
"loss": 0.0016,
"step": 170005
},
{
"epoch": 12.21,
"learning_rate": 4.33043767207823e-05,
"loss": 0.0018,
"step": 171410
},
{
"epoch": 12.31,
"learning_rate": 4.2748583815943544e-05,
"loss": 0.0016,
"step": 172815
},
{
"epoch": 12.41,
"learning_rate": 4.219279091110478e-05,
"loss": 0.0018,
"step": 174220
},
{
"epoch": 12.51,
"learning_rate": 4.163699800626602e-05,
"loss": 0.0018,
"step": 175625
},
{
"epoch": 12.61,
"learning_rate": 4.108120510142726e-05,
"loss": 0.0018,
"step": 177030
},
{
"epoch": 12.71,
"learning_rate": 4.05254121965885e-05,
"loss": 0.0016,
"step": 178435
},
{
"epoch": 12.81,
"learning_rate": 3.996961929174974e-05,
"loss": 0.002,
"step": 179840
},
{
"epoch": 12.91,
"learning_rate": 3.941382638691098e-05,
"loss": 0.0018,
"step": 181245
},
{
"epoch": 13.0,
"eval_loss": 0.042234089225530624,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 13.9619,
"eval_samples_per_second": 17.906,
"eval_steps_per_second": 1.218,
"step": 182572
},
{
"epoch": 13.01,
"learning_rate": 3.885803348207222e-05,
"loss": 0.0018,
"step": 182650
},
{
"epoch": 13.11,
"learning_rate": 3.8302240577233455e-05,
"loss": 0.0017,
"step": 184055
},
{
"epoch": 13.21,
"learning_rate": 3.77464476723947e-05,
"loss": 0.0017,
"step": 185460
},
{
"epoch": 13.31,
"learning_rate": 3.719065476755594e-05,
"loss": 0.0016,
"step": 186865
},
{
"epoch": 13.41,
"learning_rate": 3.6634861862717176e-05,
"loss": 0.0016,
"step": 188270
},
{
"epoch": 13.51,
"learning_rate": 3.6079068957878414e-05,
"loss": 0.0016,
"step": 189675
},
{
"epoch": 13.61,
"learning_rate": 3.552327605303966e-05,
"loss": 0.0017,
"step": 191080
},
{
"epoch": 13.71,
"learning_rate": 3.4967483148200896e-05,
"loss": 0.0017,
"step": 192485
},
{
"epoch": 13.81,
"learning_rate": 3.4411690243362134e-05,
"loss": 0.0015,
"step": 193890
},
{
"epoch": 13.91,
"learning_rate": 3.385589733852337e-05,
"loss": 0.0016,
"step": 195295
},
{
"epoch": 14.0,
"eval_loss": 0.04207869619131088,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.3358,
"eval_samples_per_second": 20.266,
"eval_steps_per_second": 1.378,
"step": 196616
},
{
"epoch": 14.01,
"learning_rate": 3.330010443368462e-05,
"loss": 0.0016,
"step": 196700
},
{
"epoch": 14.11,
"learning_rate": 3.274431152884585e-05,
"loss": 0.0016,
"step": 198105
},
{
"epoch": 14.21,
"learning_rate": 3.218851862400709e-05,
"loss": 0.0015,
"step": 199510
},
{
"epoch": 14.31,
"learning_rate": 3.163272571916833e-05,
"loss": 0.0016,
"step": 200915
},
{
"epoch": 14.41,
"learning_rate": 3.107693281432957e-05,
"loss": 0.0016,
"step": 202320
},
{
"epoch": 14.51,
"learning_rate": 3.052113990949081e-05,
"loss": 0.0016,
"step": 203725
},
{
"epoch": 14.61,
"learning_rate": 2.9965347004652052e-05,
"loss": 0.0016,
"step": 205130
},
{
"epoch": 14.71,
"learning_rate": 2.9409554099813286e-05,
"loss": 0.0015,
"step": 206535
},
{
"epoch": 14.81,
"learning_rate": 2.8853761194974528e-05,
"loss": 0.0016,
"step": 207940
},
{
"epoch": 14.91,
"learning_rate": 2.829796829013577e-05,
"loss": 0.0016,
"step": 209345
},
{
"epoch": 15.0,
"eval_loss": 0.04015611857175827,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.1736,
"eval_samples_per_second": 20.536,
"eval_steps_per_second": 1.396,
"step": 210660
},
{
"epoch": 15.01,
"learning_rate": 2.7742175385297004e-05,
"loss": 0.0015,
"step": 210750
},
{
"epoch": 15.11,
"learning_rate": 2.7186382480458245e-05,
"loss": 0.0015,
"step": 212155
},
{
"epoch": 15.21,
"learning_rate": 2.6630589575619486e-05,
"loss": 0.0014,
"step": 213560
},
{
"epoch": 15.31,
"learning_rate": 2.607479667078072e-05,
"loss": 0.0016,
"step": 214965
},
{
"epoch": 15.41,
"learning_rate": 2.5519003765941962e-05,
"loss": 0.0014,
"step": 216370
},
{
"epoch": 15.51,
"learning_rate": 2.49632108611032e-05,
"loss": 0.0016,
"step": 217775
},
{
"epoch": 15.61,
"learning_rate": 2.440741795626444e-05,
"loss": 0.0014,
"step": 219180
},
{
"epoch": 15.71,
"learning_rate": 2.385162505142568e-05,
"loss": 0.0015,
"step": 220585
},
{
"epoch": 15.81,
"learning_rate": 2.3295832146586918e-05,
"loss": 0.0015,
"step": 221990
},
{
"epoch": 15.91,
"learning_rate": 2.2740039241748156e-05,
"loss": 0.0014,
"step": 223395
},
{
"epoch": 16.0,
"eval_loss": 0.04069029539823532,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.2719,
"eval_samples_per_second": 20.372,
"eval_steps_per_second": 1.385,
"step": 224704
},
{
"epoch": 16.01,
"learning_rate": 2.2184246336909397e-05,
"loss": 0.0015,
"step": 224800
},
{
"epoch": 16.11,
"learning_rate": 2.1628453432070635e-05,
"loss": 0.0013,
"step": 226205
},
{
"epoch": 16.21,
"learning_rate": 2.1072660527231873e-05,
"loss": 0.0014,
"step": 227610
},
{
"epoch": 16.31,
"learning_rate": 2.0516867622393114e-05,
"loss": 0.0014,
"step": 229015
},
{
"epoch": 16.41,
"learning_rate": 1.9961074717554352e-05,
"loss": 0.0015,
"step": 230420
},
{
"epoch": 16.51,
"learning_rate": 1.9405281812715594e-05,
"loss": 0.0016,
"step": 231825
},
{
"epoch": 16.61,
"learning_rate": 1.8849488907876832e-05,
"loss": 0.0015,
"step": 233230
},
{
"epoch": 16.71,
"learning_rate": 1.8293696003038073e-05,
"loss": 0.0014,
"step": 234635
},
{
"epoch": 16.81,
"learning_rate": 1.773790309819931e-05,
"loss": 0.0013,
"step": 236040
},
{
"epoch": 16.91,
"learning_rate": 1.7182110193360553e-05,
"loss": 0.0014,
"step": 237445
},
{
"epoch": 17.0,
"eval_loss": 0.042702946811914444,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.1336,
"eval_samples_per_second": 20.604,
"eval_steps_per_second": 1.401,
"step": 238748
},
{
"epoch": 17.01,
"learning_rate": 1.662631728852179e-05,
"loss": 0.0013,
"step": 238850
},
{
"epoch": 17.11,
"learning_rate": 1.607052438368303e-05,
"loss": 0.0014,
"step": 240255
},
{
"epoch": 17.21,
"learning_rate": 1.551473147884427e-05,
"loss": 0.0014,
"step": 241660
},
{
"epoch": 17.31,
"learning_rate": 1.4958938574005508e-05,
"loss": 0.0013,
"step": 243065
},
{
"epoch": 17.41,
"learning_rate": 1.4403145669166746e-05,
"loss": 0.0014,
"step": 244470
},
{
"epoch": 17.51,
"learning_rate": 1.3847352764327987e-05,
"loss": 0.0013,
"step": 245875
},
{
"epoch": 17.61,
"learning_rate": 1.3291559859489225e-05,
"loss": 0.0013,
"step": 247280
},
{
"epoch": 17.71,
"learning_rate": 1.2735766954650463e-05,
"loss": 0.0014,
"step": 248685
},
{
"epoch": 17.81,
"learning_rate": 1.2179974049811703e-05,
"loss": 0.0014,
"step": 250090
},
{
"epoch": 17.91,
"learning_rate": 1.1624181144972943e-05,
"loss": 0.0014,
"step": 251495
},
{
"epoch": 18.0,
"eval_loss": 0.04110053926706314,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.0446,
"eval_samples_per_second": 20.756,
"eval_steps_per_second": 1.411,
"step": 252792
},
{
"epoch": 18.01,
"learning_rate": 1.1068388240134182e-05,
"loss": 0.0014,
"step": 252900
},
{
"epoch": 18.11,
"learning_rate": 1.051259533529542e-05,
"loss": 0.0014,
"step": 254305
},
{
"epoch": 18.21,
"learning_rate": 9.95680243045666e-06,
"loss": 0.0012,
"step": 255710
},
{
"epoch": 18.31,
"learning_rate": 9.4010095256179e-06,
"loss": 0.0012,
"step": 257115
},
{
"epoch": 18.41,
"learning_rate": 8.84521662077914e-06,
"loss": 0.0014,
"step": 258520
},
{
"epoch": 18.51,
"learning_rate": 8.289423715940379e-06,
"loss": 0.0012,
"step": 259925
},
{
"epoch": 18.61,
"learning_rate": 7.733630811101619e-06,
"loss": 0.0013,
"step": 261330
},
{
"epoch": 18.71,
"learning_rate": 7.1778379062628565e-06,
"loss": 0.0014,
"step": 262735
},
{
"epoch": 18.81,
"learning_rate": 6.622045001424096e-06,
"loss": 0.0012,
"step": 264140
},
{
"epoch": 18.91,
"learning_rate": 6.066252096585336e-06,
"loss": 0.0013,
"step": 265545
},
{
"epoch": 19.0,
"eval_loss": 0.040625352412462234,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.0757,
"eval_samples_per_second": 20.703,
"eval_steps_per_second": 1.408,
"step": 266836
},
{
"epoch": 19.01,
"learning_rate": 5.510459191746575e-06,
"loss": 0.0013,
"step": 266950
},
{
"epoch": 19.11,
"learning_rate": 4.9546662869078136e-06,
"loss": 0.0014,
"step": 268355
},
{
"epoch": 19.21,
"learning_rate": 4.398873382069053e-06,
"loss": 0.0013,
"step": 269760
},
{
"epoch": 19.31,
"learning_rate": 3.843080477230292e-06,
"loss": 0.0013,
"step": 271165
},
{
"epoch": 19.41,
"learning_rate": 3.2872875723915313e-06,
"loss": 0.0013,
"step": 272570
},
{
"epoch": 19.51,
"learning_rate": 2.7314946675527706e-06,
"loss": 0.0012,
"step": 273975
},
{
"epoch": 19.61,
"learning_rate": 2.1757017627140103e-06,
"loss": 0.0012,
"step": 275380
},
{
"epoch": 19.71,
"learning_rate": 1.619908857875249e-06,
"loss": 0.0013,
"step": 276785
},
{
"epoch": 19.81,
"learning_rate": 1.0641159530364886e-06,
"loss": 0.0014,
"step": 278190
},
{
"epoch": 19.91,
"learning_rate": 5.083230481977278e-07,
"loss": 0.0013,
"step": 279595
},
{
"epoch": 20.0,
"eval_loss": 0.040508754551410675,
"eval_max_distance": 6,
"eval_mean_distance": 0,
"eval_runtime": 12.1336,
"eval_samples_per_second": 20.604,
"eval_steps_per_second": 1.401,
"step": 280880
},
{
"epoch": 20.0,
"step": 280880,
"total_flos": 7.269950341627085e+16,
"train_loss": 0.0021448437322240947,
"train_runtime": 20054.4611,
"train_samples_per_second": 210.084,
"train_steps_per_second": 14.006
}
],
"logging_steps": 1405,
"max_steps": 280880,
"num_train_epochs": 20,
"save_steps": 2809,
"total_flos": 7.269950341627085e+16,
"trial_name": null,
"trial_params": null
}