Mistral-Codon-v2-13M / trainer_state.json
RaphaelMourad's picture
Upload 10 files
e73b08c verified
{
"best_metric": 2.9399616718292236,
"best_model_checkpoint": "./results/models/checkpoint-43032",
"epoch": 44.0,
"eval_steps": 500,
"global_step": 43032,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5112474437627812,
"grad_norm": 0.056640625,
"learning_rate": 0.001979550102249489,
"loss": 3.8421,
"step": 500
},
{
"epoch": 1.0,
"eval_loss": 3.714660882949829,
"eval_runtime": 2.4299,
"eval_samples_per_second": 205.767,
"eval_steps_per_second": 0.412,
"step": 978
},
{
"epoch": 1.0224948875255624,
"grad_norm": 0.0849609375,
"learning_rate": 0.0019591002044989777,
"loss": 3.7427,
"step": 1000
},
{
"epoch": 1.5337423312883436,
"grad_norm": 0.2236328125,
"learning_rate": 0.0019386503067484661,
"loss": 3.6832,
"step": 1500
},
{
"epoch": 2.0,
"eval_loss": 3.5354397296905518,
"eval_runtime": 2.3994,
"eval_samples_per_second": 208.386,
"eval_steps_per_second": 0.417,
"step": 1956
},
{
"epoch": 2.044989775051125,
"grad_norm": 0.197265625,
"learning_rate": 0.001918200408997955,
"loss": 3.5848,
"step": 2000
},
{
"epoch": 2.556237218813906,
"grad_norm": 0.26171875,
"learning_rate": 0.0018977505112474438,
"loss": 3.474,
"step": 2500
},
{
"epoch": 3.0,
"eval_loss": 3.3599166870117188,
"eval_runtime": 2.2955,
"eval_samples_per_second": 217.822,
"eval_steps_per_second": 0.436,
"step": 2934
},
{
"epoch": 3.067484662576687,
"grad_norm": 0.2216796875,
"learning_rate": 0.0018773006134969327,
"loss": 3.3901,
"step": 3000
},
{
"epoch": 3.5787321063394684,
"grad_norm": 0.263671875,
"learning_rate": 0.0018568507157464215,
"loss": 3.3209,
"step": 3500
},
{
"epoch": 4.0,
"eval_loss": 3.2504658699035645,
"eval_runtime": 2.7262,
"eval_samples_per_second": 183.409,
"eval_steps_per_second": 0.367,
"step": 3912
},
{
"epoch": 4.08997955010225,
"grad_norm": 0.302734375,
"learning_rate": 0.00183640081799591,
"loss": 3.2742,
"step": 4000
},
{
"epoch": 4.601226993865031,
"grad_norm": 0.44140625,
"learning_rate": 0.0018159509202453987,
"loss": 3.2314,
"step": 4500
},
{
"epoch": 5.0,
"eval_loss": 3.1896719932556152,
"eval_runtime": 2.6127,
"eval_samples_per_second": 191.371,
"eval_steps_per_second": 0.383,
"step": 4890
},
{
"epoch": 5.112474437627812,
"grad_norm": 0.302734375,
"learning_rate": 0.0017955010224948876,
"loss": 3.2012,
"step": 5000
},
{
"epoch": 5.623721881390593,
"grad_norm": 0.296875,
"learning_rate": 0.0017750511247443764,
"loss": 3.1715,
"step": 5500
},
{
"epoch": 6.0,
"eval_loss": 3.143582820892334,
"eval_runtime": 2.3471,
"eval_samples_per_second": 213.027,
"eval_steps_per_second": 0.426,
"step": 5868
},
{
"epoch": 6.134969325153374,
"grad_norm": 0.318359375,
"learning_rate": 0.001754601226993865,
"loss": 3.1533,
"step": 6000
},
{
"epoch": 6.6462167689161555,
"grad_norm": 0.283203125,
"learning_rate": 0.0017341513292433537,
"loss": 3.1316,
"step": 6500
},
{
"epoch": 7.0,
"eval_loss": 3.1050572395324707,
"eval_runtime": 2.7309,
"eval_samples_per_second": 183.09,
"eval_steps_per_second": 0.366,
"step": 6846
},
{
"epoch": 7.157464212678937,
"grad_norm": 0.291015625,
"learning_rate": 0.0017137014314928425,
"loss": 3.1166,
"step": 7000
},
{
"epoch": 7.668711656441718,
"grad_norm": 0.4296875,
"learning_rate": 0.0016932515337423314,
"loss": 3.1031,
"step": 7500
},
{
"epoch": 8.0,
"eval_loss": 3.0762298107147217,
"eval_runtime": 3.005,
"eval_samples_per_second": 166.388,
"eval_steps_per_second": 0.333,
"step": 7824
},
{
"epoch": 8.1799591002045,
"grad_norm": 0.310546875,
"learning_rate": 0.0016728016359918202,
"loss": 3.0867,
"step": 8000
},
{
"epoch": 8.69120654396728,
"grad_norm": 0.283203125,
"learning_rate": 0.0016523517382413088,
"loss": 3.0785,
"step": 8500
},
{
"epoch": 9.0,
"eval_loss": 3.063835382461548,
"eval_runtime": 0.9235,
"eval_samples_per_second": 541.411,
"eval_steps_per_second": 1.083,
"step": 8802
},
{
"epoch": 9.202453987730062,
"grad_norm": 0.3125,
"learning_rate": 0.0016319018404907975,
"loss": 3.0683,
"step": 9000
},
{
"epoch": 9.713701431492842,
"grad_norm": 0.39453125,
"learning_rate": 0.0016114519427402863,
"loss": 3.0582,
"step": 9500
},
{
"epoch": 10.0,
"eval_loss": 3.043813943862915,
"eval_runtime": 0.9286,
"eval_samples_per_second": 538.419,
"eval_steps_per_second": 1.077,
"step": 9780
},
{
"epoch": 10.224948875255624,
"grad_norm": 0.341796875,
"learning_rate": 0.0015910020449897751,
"loss": 3.05,
"step": 10000
},
{
"epoch": 10.736196319018404,
"grad_norm": 0.412109375,
"learning_rate": 0.0015705521472392638,
"loss": 3.0436,
"step": 10500
},
{
"epoch": 11.0,
"eval_loss": 3.0265145301818848,
"eval_runtime": 0.8628,
"eval_samples_per_second": 579.525,
"eval_steps_per_second": 1.159,
"step": 10758
},
{
"epoch": 11.247443762781186,
"grad_norm": 0.287109375,
"learning_rate": 0.0015501022494887526,
"loss": 3.0351,
"step": 11000
},
{
"epoch": 11.758691206543967,
"grad_norm": 0.34765625,
"learning_rate": 0.0015296523517382412,
"loss": 3.0337,
"step": 11500
},
{
"epoch": 12.0,
"eval_loss": 3.0209317207336426,
"eval_runtime": 1.0057,
"eval_samples_per_second": 497.152,
"eval_steps_per_second": 0.994,
"step": 11736
},
{
"epoch": 12.269938650306749,
"grad_norm": 0.29296875,
"learning_rate": 0.00150920245398773,
"loss": 3.0209,
"step": 12000
},
{
"epoch": 12.781186094069529,
"grad_norm": 0.333984375,
"learning_rate": 0.001488752556237219,
"loss": 3.0193,
"step": 12500
},
{
"epoch": 13.0,
"eval_loss": 3.0119516849517822,
"eval_runtime": 0.9281,
"eval_samples_per_second": 538.717,
"eval_steps_per_second": 1.077,
"step": 12714
},
{
"epoch": 13.292433537832311,
"grad_norm": 0.373046875,
"learning_rate": 0.0014683026584867075,
"loss": 3.0169,
"step": 13000
},
{
"epoch": 13.803680981595091,
"grad_norm": 0.337890625,
"learning_rate": 0.0014478527607361964,
"loss": 3.0083,
"step": 13500
},
{
"epoch": 14.0,
"eval_loss": 3.005497455596924,
"eval_runtime": 0.9352,
"eval_samples_per_second": 534.66,
"eval_steps_per_second": 1.069,
"step": 13692
},
{
"epoch": 14.314928425357873,
"grad_norm": 0.373046875,
"learning_rate": 0.001427402862985685,
"loss": 3.0039,
"step": 14000
},
{
"epoch": 14.826175869120654,
"grad_norm": 0.35546875,
"learning_rate": 0.0014069529652351738,
"loss": 3.0026,
"step": 14500
},
{
"epoch": 15.0,
"eval_loss": 2.997267007827759,
"eval_runtime": 0.8134,
"eval_samples_per_second": 614.694,
"eval_steps_per_second": 1.229,
"step": 14670
},
{
"epoch": 15.337423312883436,
"grad_norm": 0.306640625,
"learning_rate": 0.0013865030674846627,
"loss": 2.9966,
"step": 15000
},
{
"epoch": 15.848670756646216,
"grad_norm": 0.32421875,
"learning_rate": 0.0013660531697341513,
"loss": 2.9944,
"step": 15500
},
{
"epoch": 16.0,
"eval_loss": 2.986419916152954,
"eval_runtime": 1.0985,
"eval_samples_per_second": 455.159,
"eval_steps_per_second": 0.91,
"step": 15648
},
{
"epoch": 16.359918200409,
"grad_norm": 0.3203125,
"learning_rate": 0.0013456032719836402,
"loss": 2.9894,
"step": 16000
},
{
"epoch": 16.87116564417178,
"grad_norm": 0.3046875,
"learning_rate": 0.0013251533742331288,
"loss": 2.9893,
"step": 16500
},
{
"epoch": 17.0,
"eval_loss": 2.9802663326263428,
"eval_runtime": 0.9902,
"eval_samples_per_second": 504.964,
"eval_steps_per_second": 1.01,
"step": 16626
},
{
"epoch": 17.38241308793456,
"grad_norm": 0.318359375,
"learning_rate": 0.0013047034764826176,
"loss": 2.9857,
"step": 17000
},
{
"epoch": 17.893660531697343,
"grad_norm": 0.33203125,
"learning_rate": 0.0012842535787321062,
"loss": 2.9819,
"step": 17500
},
{
"epoch": 18.0,
"eval_loss": 2.978501796722412,
"eval_runtime": 0.9842,
"eval_samples_per_second": 508.013,
"eval_steps_per_second": 1.016,
"step": 17604
},
{
"epoch": 18.404907975460123,
"grad_norm": 0.337890625,
"learning_rate": 0.001263803680981595,
"loss": 2.9785,
"step": 18000
},
{
"epoch": 18.916155419222903,
"grad_norm": 0.30859375,
"learning_rate": 0.001243353783231084,
"loss": 2.9779,
"step": 18500
},
{
"epoch": 19.0,
"eval_loss": 2.9747886657714844,
"eval_runtime": 1.1337,
"eval_samples_per_second": 441.025,
"eval_steps_per_second": 0.882,
"step": 18582
},
{
"epoch": 19.427402862985684,
"grad_norm": 0.296875,
"learning_rate": 0.0012229038854805726,
"loss": 2.9719,
"step": 19000
},
{
"epoch": 19.938650306748468,
"grad_norm": 0.33984375,
"learning_rate": 0.0012024539877300614,
"loss": 2.9763,
"step": 19500
},
{
"epoch": 20.0,
"eval_loss": 2.9712185859680176,
"eval_runtime": 0.8388,
"eval_samples_per_second": 596.063,
"eval_steps_per_second": 1.192,
"step": 19560
},
{
"epoch": 20.449897750511248,
"grad_norm": 0.298828125,
"learning_rate": 0.00118200408997955,
"loss": 2.9675,
"step": 20000
},
{
"epoch": 20.961145194274028,
"grad_norm": 0.447265625,
"learning_rate": 0.0011615541922290389,
"loss": 2.9715,
"step": 20500
},
{
"epoch": 21.0,
"eval_loss": 2.9683492183685303,
"eval_runtime": 1.2443,
"eval_samples_per_second": 401.838,
"eval_steps_per_second": 0.804,
"step": 20538
},
{
"epoch": 21.47239263803681,
"grad_norm": 0.2890625,
"learning_rate": 0.0011411042944785277,
"loss": 2.9674,
"step": 21000
},
{
"epoch": 21.983640081799592,
"grad_norm": 0.322265625,
"learning_rate": 0.0011206543967280163,
"loss": 2.9658,
"step": 21500
},
{
"epoch": 22.0,
"eval_loss": 2.96321177482605,
"eval_runtime": 1.0239,
"eval_samples_per_second": 488.314,
"eval_steps_per_second": 0.977,
"step": 21516
},
{
"epoch": 22.494887525562373,
"grad_norm": 0.296875,
"learning_rate": 0.0011002044989775052,
"loss": 2.9617,
"step": 22000
},
{
"epoch": 23.0,
"eval_loss": 2.962963342666626,
"eval_runtime": 1.176,
"eval_samples_per_second": 425.172,
"eval_steps_per_second": 0.85,
"step": 22494
},
{
"epoch": 23.006134969325153,
"grad_norm": 0.392578125,
"learning_rate": 0.0010797546012269938,
"loss": 2.9646,
"step": 22500
},
{
"epoch": 23.517382413087933,
"grad_norm": 0.345703125,
"learning_rate": 0.0010593047034764826,
"loss": 2.959,
"step": 23000
},
{
"epoch": 24.0,
"eval_loss": 2.9584052562713623,
"eval_runtime": 0.8936,
"eval_samples_per_second": 559.54,
"eval_steps_per_second": 1.119,
"step": 23472
},
{
"epoch": 24.028629856850717,
"grad_norm": 0.33984375,
"learning_rate": 0.0010388548057259715,
"loss": 2.9606,
"step": 23500
},
{
"epoch": 24.539877300613497,
"grad_norm": 0.333984375,
"learning_rate": 0.00101840490797546,
"loss": 2.9591,
"step": 24000
},
{
"epoch": 25.0,
"eval_loss": 2.9545176029205322,
"eval_runtime": 0.9712,
"eval_samples_per_second": 514.829,
"eval_steps_per_second": 1.03,
"step": 24450
},
{
"epoch": 25.051124744376278,
"grad_norm": 0.279296875,
"learning_rate": 0.000997955010224949,
"loss": 2.9567,
"step": 24500
},
{
"epoch": 25.562372188139058,
"grad_norm": 0.380859375,
"learning_rate": 0.0009775051124744376,
"loss": 2.9566,
"step": 25000
},
{
"epoch": 26.0,
"eval_loss": 2.9551963806152344,
"eval_runtime": 0.8333,
"eval_samples_per_second": 600.037,
"eval_steps_per_second": 1.2,
"step": 25428
},
{
"epoch": 26.073619631901842,
"grad_norm": 0.3046875,
"learning_rate": 0.0009570552147239264,
"loss": 2.9551,
"step": 25500
},
{
"epoch": 26.584867075664622,
"grad_norm": 0.2734375,
"learning_rate": 0.0009366053169734151,
"loss": 2.9523,
"step": 26000
},
{
"epoch": 27.0,
"eval_loss": 2.957306146621704,
"eval_runtime": 0.7301,
"eval_samples_per_second": 684.842,
"eval_steps_per_second": 1.37,
"step": 26406
},
{
"epoch": 27.096114519427402,
"grad_norm": 0.318359375,
"learning_rate": 0.0009161554192229039,
"loss": 2.9531,
"step": 26500
},
{
"epoch": 27.607361963190183,
"grad_norm": 0.318359375,
"learning_rate": 0.0008957055214723927,
"loss": 2.9524,
"step": 27000
},
{
"epoch": 28.0,
"eval_loss": 2.9476640224456787,
"eval_runtime": 0.8281,
"eval_samples_per_second": 603.779,
"eval_steps_per_second": 1.208,
"step": 27384
},
{
"epoch": 28.118609406952967,
"grad_norm": 0.294921875,
"learning_rate": 0.0008752556237218813,
"loss": 2.9493,
"step": 27500
},
{
"epoch": 28.629856850715747,
"grad_norm": 0.31640625,
"learning_rate": 0.0008548057259713702,
"loss": 2.9487,
"step": 28000
},
{
"epoch": 29.0,
"eval_loss": 2.9507575035095215,
"eval_runtime": 0.9351,
"eval_samples_per_second": 534.698,
"eval_steps_per_second": 1.069,
"step": 28362
},
{
"epoch": 29.141104294478527,
"grad_norm": 0.287109375,
"learning_rate": 0.0008343558282208589,
"loss": 2.95,
"step": 28500
},
{
"epoch": 29.652351738241308,
"grad_norm": 0.294921875,
"learning_rate": 0.0008139059304703477,
"loss": 2.9467,
"step": 29000
},
{
"epoch": 30.0,
"eval_loss": 2.9493441581726074,
"eval_runtime": 0.9913,
"eval_samples_per_second": 504.379,
"eval_steps_per_second": 1.009,
"step": 29340
},
{
"epoch": 30.16359918200409,
"grad_norm": 0.26171875,
"learning_rate": 0.0007934560327198365,
"loss": 2.9481,
"step": 29500
},
{
"epoch": 30.67484662576687,
"grad_norm": 0.283203125,
"learning_rate": 0.0007730061349693251,
"loss": 2.945,
"step": 30000
},
{
"epoch": 31.0,
"eval_loss": 2.9469032287597656,
"eval_runtime": 0.7298,
"eval_samples_per_second": 685.073,
"eval_steps_per_second": 1.37,
"step": 30318
},
{
"epoch": 31.186094069529652,
"grad_norm": 0.28515625,
"learning_rate": 0.000752556237218814,
"loss": 2.9452,
"step": 30500
},
{
"epoch": 31.697341513292432,
"grad_norm": 0.28125,
"learning_rate": 0.0007321063394683026,
"loss": 2.9431,
"step": 31000
},
{
"epoch": 32.0,
"eval_loss": 2.947190046310425,
"eval_runtime": 0.8045,
"eval_samples_per_second": 621.476,
"eval_steps_per_second": 1.243,
"step": 31296
},
{
"epoch": 32.20858895705521,
"grad_norm": 0.265625,
"learning_rate": 0.0007116564417177914,
"loss": 2.9454,
"step": 31500
},
{
"epoch": 32.719836400818,
"grad_norm": 0.287109375,
"learning_rate": 0.0006912065439672803,
"loss": 2.9428,
"step": 32000
},
{
"epoch": 33.0,
"eval_loss": 2.9455933570861816,
"eval_runtime": 0.7507,
"eval_samples_per_second": 666.034,
"eval_steps_per_second": 1.332,
"step": 32274
},
{
"epoch": 33.23108384458078,
"grad_norm": 0.27734375,
"learning_rate": 0.0006707566462167689,
"loss": 2.9424,
"step": 32500
},
{
"epoch": 33.74233128834356,
"grad_norm": 0.2890625,
"learning_rate": 0.0006503067484662577,
"loss": 2.9426,
"step": 33000
},
{
"epoch": 34.0,
"eval_loss": 2.945394992828369,
"eval_runtime": 0.91,
"eval_samples_per_second": 549.459,
"eval_steps_per_second": 1.099,
"step": 33252
},
{
"epoch": 34.25357873210634,
"grad_norm": 0.3125,
"learning_rate": 0.0006298568507157464,
"loss": 2.94,
"step": 33500
},
{
"epoch": 34.76482617586912,
"grad_norm": 0.2734375,
"learning_rate": 0.0006094069529652352,
"loss": 2.9438,
"step": 34000
},
{
"epoch": 35.0,
"eval_loss": 2.9434268474578857,
"eval_runtime": 0.9642,
"eval_samples_per_second": 518.581,
"eval_steps_per_second": 1.037,
"step": 34230
},
{
"epoch": 35.2760736196319,
"grad_norm": 0.287109375,
"learning_rate": 0.0005889570552147239,
"loss": 2.939,
"step": 34500
},
{
"epoch": 35.787321063394685,
"grad_norm": 0.263671875,
"learning_rate": 0.0005685071574642127,
"loss": 2.9407,
"step": 35000
},
{
"epoch": 36.0,
"eval_loss": 2.9426307678222656,
"eval_runtime": 0.9434,
"eval_samples_per_second": 530.024,
"eval_steps_per_second": 1.06,
"step": 35208
},
{
"epoch": 36.29856850715746,
"grad_norm": 0.2578125,
"learning_rate": 0.0005480572597137015,
"loss": 2.9372,
"step": 35500
},
{
"epoch": 36.809815950920246,
"grad_norm": 0.283203125,
"learning_rate": 0.0005276073619631901,
"loss": 2.941,
"step": 36000
},
{
"epoch": 37.0,
"eval_loss": 2.9429283142089844,
"eval_runtime": 0.9172,
"eval_samples_per_second": 545.157,
"eval_steps_per_second": 1.09,
"step": 36186
},
{
"epoch": 37.32106339468303,
"grad_norm": 0.26171875,
"learning_rate": 0.000507157464212679,
"loss": 2.9368,
"step": 36500
},
{
"epoch": 37.83231083844581,
"grad_norm": 0.2578125,
"learning_rate": 0.00048670756646216766,
"loss": 2.9385,
"step": 37000
},
{
"epoch": 38.0,
"eval_loss": 2.9418914318084717,
"eval_runtime": 0.9937,
"eval_samples_per_second": 503.192,
"eval_steps_per_second": 1.006,
"step": 37164
},
{
"epoch": 38.34355828220859,
"grad_norm": 0.25390625,
"learning_rate": 0.00046625766871165645,
"loss": 2.9349,
"step": 37500
},
{
"epoch": 38.85480572597137,
"grad_norm": 0.255859375,
"learning_rate": 0.00044580777096114523,
"loss": 2.9384,
"step": 38000
},
{
"epoch": 39.0,
"eval_loss": 2.943824291229248,
"eval_runtime": 0.9909,
"eval_samples_per_second": 504.6,
"eval_steps_per_second": 1.009,
"step": 38142
},
{
"epoch": 39.36605316973415,
"grad_norm": 0.271484375,
"learning_rate": 0.00042535787321063397,
"loss": 2.9386,
"step": 38500
},
{
"epoch": 39.877300613496935,
"grad_norm": 0.24609375,
"learning_rate": 0.0004049079754601227,
"loss": 2.9364,
"step": 39000
},
{
"epoch": 40.0,
"eval_loss": 2.941757917404175,
"eval_runtime": 0.8169,
"eval_samples_per_second": 612.068,
"eval_steps_per_second": 1.224,
"step": 39120
},
{
"epoch": 40.38854805725971,
"grad_norm": 0.25,
"learning_rate": 0.00038445807770961143,
"loss": 2.9374,
"step": 39500
},
{
"epoch": 40.899795501022496,
"grad_norm": 0.2431640625,
"learning_rate": 0.0003640081799591002,
"loss": 2.9341,
"step": 40000
},
{
"epoch": 41.0,
"eval_loss": 2.9411299228668213,
"eval_runtime": 0.9525,
"eval_samples_per_second": 524.937,
"eval_steps_per_second": 1.05,
"step": 40098
},
{
"epoch": 41.41104294478528,
"grad_norm": 0.2451171875,
"learning_rate": 0.00034355828220858896,
"loss": 2.9323,
"step": 40500
},
{
"epoch": 41.922290388548056,
"grad_norm": 0.2353515625,
"learning_rate": 0.00032310838445807774,
"loss": 2.9361,
"step": 41000
},
{
"epoch": 42.0,
"eval_loss": 2.94166898727417,
"eval_runtime": 0.7476,
"eval_samples_per_second": 668.768,
"eval_steps_per_second": 1.338,
"step": 41076
},
{
"epoch": 42.43353783231084,
"grad_norm": 0.2470703125,
"learning_rate": 0.0003026584867075665,
"loss": 2.9344,
"step": 41500
},
{
"epoch": 42.94478527607362,
"grad_norm": 0.255859375,
"learning_rate": 0.0002822085889570552,
"loss": 2.9345,
"step": 42000
},
{
"epoch": 43.0,
"eval_loss": 2.9405741691589355,
"eval_runtime": 0.8055,
"eval_samples_per_second": 620.71,
"eval_steps_per_second": 1.241,
"step": 42054
},
{
"epoch": 43.4560327198364,
"grad_norm": 0.24609375,
"learning_rate": 0.000261758691206544,
"loss": 2.9342,
"step": 42500
},
{
"epoch": 43.967280163599185,
"grad_norm": 0.2451171875,
"learning_rate": 0.00024130879345603273,
"loss": 2.9347,
"step": 43000
},
{
"epoch": 44.0,
"eval_loss": 2.9399616718292236,
"eval_runtime": 0.7311,
"eval_samples_per_second": 683.897,
"eval_steps_per_second": 1.368,
"step": 43032
}
],
"logging_steps": 500,
"max_steps": 48900,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.377598882696192e+17,
"train_batch_size": 1024,
"trial_name": null,
"trial_params": null
}