vntl-7b-v0.2-qlora / checkpoint-200 /trainer_state.json
lmg-anon's picture
Upload 50 files
86f9061
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.47961630695443647,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00013,
"loss": 1.1241,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 0.00026,
"loss": 1.0107,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 0.00039,
"loss": 1.1086,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 0.00052,
"loss": 1.0044,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 0.00065,
"loss": 1.0496,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 0.0005933661039639299,
"loss": 1.0199,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 0.0005493502655735357,
"loss": 1.0198,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 0.0005138701197773616,
"loss": 0.969,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 0.0004844813951249544,
"loss": 0.9383,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 0.0004596194077712558,
"loss": 0.8776,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 0.0004382299106011073,
"loss": 1.0173,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 0.0004195731958391368,
"loss": 1.1173,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 0.0004031128874149274,
"loss": 1.0876,
"step": 13
},
{
"epoch": 0.03,
"learning_rate": 0.0003884492980336779,
"loss": 1.0524,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 0.0003752776749732568,
"loss": 0.8953,
"step": 15
},
{
"epoch": 0.04,
"learning_rate": 0.00036336104634371584,
"loss": 1.1335,
"step": 16
},
{
"epoch": 0.04,
"learning_rate": 0.00035251199395531623,
"loss": 0.9837,
"step": 17
},
{
"epoch": 0.04,
"learning_rate": 0.00034258007985157445,
"loss": 0.9707,
"step": 18
},
{
"epoch": 0.05,
"learning_rate": 0.0003334429644276751,
"loss": 0.9149,
"step": 19
},
{
"epoch": 0.05,
"learning_rate": 0.000325,
"loss": 1.0043,
"step": 20
},
{
"epoch": 0.05,
"learning_rate": 0.00031716752370827323,
"loss": 1.001,
"step": 21
},
{
"epoch": 0.05,
"learning_rate": 0.00030987534150481746,
"loss": 1.0395,
"step": 22
},
{
"epoch": 0.06,
"learning_rate": 0.000303064062678102,
"loss": 0.8718,
"step": 23
},
{
"epoch": 0.06,
"learning_rate": 0.00029668305198196496,
"loss": 1.1114,
"step": 24
},
{
"epoch": 0.06,
"learning_rate": 0.00029068883707497264,
"loss": 0.7765,
"step": 25
},
{
"epoch": 0.06,
"learning_rate": 0.0002850438562747845,
"loss": 0.9522,
"step": 26
},
{
"epoch": 0.06,
"learning_rate": 0.00027971546389275785,
"loss": 0.9588,
"step": 27
},
{
"epoch": 0.07,
"learning_rate": 0.00027467513278676785,
"loss": 1.0313,
"step": 28
},
{
"epoch": 0.07,
"learning_rate": 0.0002698978095246549,
"loss": 0.9338,
"step": 29
},
{
"epoch": 0.07,
"learning_rate": 0.000265361388801511,
"loss": 0.892,
"step": 30
},
{
"epoch": 0.07,
"learning_rate": 0.00026104628189331215,
"loss": 0.893,
"step": 31
},
{
"epoch": 0.08,
"learning_rate": 0.0002569350598886808,
"loss": 0.8983,
"step": 32
},
{
"epoch": 0.08,
"learning_rate": 0.00025301215685249496,
"loss": 0.9277,
"step": 33
},
{
"epoch": 0.08,
"learning_rate": 0.00024926362137539537,
"loss": 0.8962,
"step": 34
},
{
"epoch": 0.08,
"learning_rate": 0.00024567690745599767,
"loss": 0.9124,
"step": 35
},
{
"epoch": 0.09,
"learning_rate": 0.0002422406975624772,
"loss": 0.9535,
"step": 36
},
{
"epoch": 0.09,
"learning_rate": 0.00023894475218048754,
"loss": 0.9019,
"step": 37
},
{
"epoch": 0.09,
"learning_rate": 0.0002357797812857538,
"loss": 1.024,
"step": 38
},
{
"epoch": 0.09,
"learning_rate": 0.00023273733406281566,
"loss": 0.8549,
"step": 39
},
{
"epoch": 0.1,
"learning_rate": 0.0002298097038856279,
"loss": 1.0489,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 0.00022698984612511293,
"loss": 0.751,
"step": 41
},
{
"epoch": 0.1,
"learning_rate": 0.00022427130678626507,
"loss": 0.834,
"step": 42
},
{
"epoch": 0.1,
"learning_rate": 0.00022164816032790388,
"loss": 0.889,
"step": 43
},
{
"epoch": 0.11,
"learning_rate": 0.00021911495530055366,
"loss": 1.0103,
"step": 44
},
{
"epoch": 0.11,
"learning_rate": 0.00021666666666666666,
"loss": 0.8766,
"step": 45
},
{
"epoch": 0.11,
"learning_rate": 0.0002142986538536308,
"loss": 0.8181,
"step": 46
},
{
"epoch": 0.11,
"learning_rate": 0.0002120066237423687,
"loss": 0.8754,
"step": 47
},
{
"epoch": 0.12,
"learning_rate": 0.0002097865979195684,
"loss": 0.9038,
"step": 48
},
{
"epoch": 0.12,
"learning_rate": 0.00020763488362498048,
"loss": 0.8646,
"step": 49
},
{
"epoch": 0.12,
"learning_rate": 0.00020554804791094464,
"loss": 0.8836,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 0.0002035228946026736,
"loss": 0.9962,
"step": 51
},
{
"epoch": 0.12,
"learning_rate": 0.0002015564437074637,
"loss": 0.8835,
"step": 52
},
{
"epoch": 0.13,
"learning_rate": 0.00019964591297103414,
"loss": 0.9196,
"step": 53
},
{
"epoch": 0.13,
"learning_rate": 0.00019778870132130996,
"loss": 0.8995,
"step": 54
},
{
"epoch": 0.13,
"learning_rate": 0.00019598237397554634,
"loss": 1.0178,
"step": 55
},
{
"epoch": 0.13,
"learning_rate": 0.00019422464901683895,
"loss": 0.9395,
"step": 56
},
{
"epoch": 0.14,
"learning_rate": 0.00019251338527170498,
"loss": 0.9882,
"step": 57
},
{
"epoch": 0.14,
"learning_rate": 0.00019084657134227863,
"loss": 0.9274,
"step": 58
},
{
"epoch": 0.14,
"learning_rate": 0.00018922231566536414,
"loss": 0.9517,
"step": 59
},
{
"epoch": 0.14,
"learning_rate": 0.0001876388374866284,
"loss": 0.865,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 0.00018609445865200715,
"loss": 0.9314,
"step": 61
},
{
"epoch": 0.15,
"learning_rate": 0.00018458759613029606,
"loss": 0.9224,
"step": 62
},
{
"epoch": 0.15,
"learning_rate": 0.00018311675519117857,
"loss": 0.788,
"step": 63
},
{
"epoch": 0.15,
"learning_rate": 0.00018168052317185792,
"loss": 0.9739,
"step": 64
},
{
"epoch": 0.16,
"learning_rate": 0.00018027756377319947,
"loss": 0.9419,
"step": 65
},
{
"epoch": 0.16,
"learning_rate": 0.0001789066118330336,
"loss": 0.8772,
"step": 66
},
{
"epoch": 0.16,
"learning_rate": 0.00017756646853014972,
"loss": 0.8707,
"step": 67
},
{
"epoch": 0.16,
"learning_rate": 0.00017625599697765812,
"loss": 0.8089,
"step": 68
},
{
"epoch": 0.17,
"learning_rate": 0.00017497411816890378,
"loss": 0.9303,
"step": 69
},
{
"epoch": 0.17,
"learning_rate": 0.00017371980724307585,
"loss": 0.9161,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 0.00017249209004113945,
"loss": 0.9064,
"step": 71
},
{
"epoch": 0.17,
"learning_rate": 0.00017129003992578723,
"loss": 1.0988,
"step": 72
},
{
"epoch": 0.18,
"learning_rate": 0.00017011277484181944,
"loss": 0.9804,
"step": 73
},
{
"epoch": 0.18,
"learning_rate": 0.0001689594545957618,
"loss": 0.8382,
"step": 74
},
{
"epoch": 0.18,
"learning_rate": 0.00016782927833565472,
"loss": 0.9632,
"step": 75
},
{
"epoch": 0.18,
"learning_rate": 0.00016672148221383754,
"loss": 0.9494,
"step": 76
},
{
"epoch": 0.18,
"learning_rate": 0.00016563533721722828,
"loss": 0.9253,
"step": 77
},
{
"epoch": 0.19,
"learning_rate": 0.0001645701471510958,
"loss": 0.9143,
"step": 78
},
{
"epoch": 0.19,
"learning_rate": 0.00016352524676365398,
"loss": 0.8907,
"step": 79
},
{
"epoch": 0.19,
"learning_rate": 0.0001625,
"loss": 0.9748,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 0.00016149379837498482,
"loss": 0.893,
"step": 81
},
{
"epoch": 0.2,
"learning_rate": 0.00016050605945555833,
"loss": 0.839,
"step": 82
},
{
"epoch": 0.2,
"learning_rate": 0.0001595362254439902,
"loss": 0.9276,
"step": 83
},
{
"epoch": 0.2,
"learning_rate": 0.00015858376185413662,
"loss": 0.8758,
"step": 84
},
{
"epoch": 0.2,
"learning_rate": 0.00015764815627361642,
"loss": 0.9125,
"step": 85
},
{
"epoch": 0.21,
"learning_rate": 0.00015672891720538393,
"loss": 0.955,
"step": 86
},
{
"epoch": 0.21,
"learning_rate": 0.00015582557298274985,
"loss": 0.9104,
"step": 87
},
{
"epoch": 0.21,
"learning_rate": 0.00015493767075240873,
"loss": 0.8861,
"step": 88
},
{
"epoch": 0.21,
"learning_rate": 0.0001540647755204926,
"loss": 0.9693,
"step": 89
},
{
"epoch": 0.22,
"learning_rate": 0.0001532064692570853,
"loss": 0.7245,
"step": 90
},
{
"epoch": 0.22,
"learning_rate": 0.000152362350055011,
"loss": 0.7523,
"step": 91
},
{
"epoch": 0.22,
"learning_rate": 0.000151532031339051,
"loss": 0.8522,
"step": 92
},
{
"epoch": 0.22,
"learning_rate": 0.00015071514112205468,
"loss": 0.9273,
"step": 93
},
{
"epoch": 0.23,
"learning_rate": 0.0001499113213046938,
"loss": 1.0303,
"step": 94
},
{
"epoch": 0.23,
"learning_rate": 0.00014912022701586513,
"loss": 0.9273,
"step": 95
},
{
"epoch": 0.23,
"learning_rate": 0.00014834152599098248,
"loss": 0.9071,
"step": 96
},
{
"epoch": 0.23,
"learning_rate": 0.00014757489798561242,
"loss": 0.954,
"step": 97
},
{
"epoch": 0.24,
"learning_rate": 0.00014682003422210332,
"loss": 0.7897,
"step": 98
},
{
"epoch": 0.24,
"learning_rate": 0.00014607663686703578,
"loss": 0.9045,
"step": 99
},
{
"epoch": 0.24,
"learning_rate": 0.00014534441853748632,
"loss": 0.7919,
"step": 100
},
{
"epoch": 0.24,
"learning_rate": 0.00014462310183424506,
"loss": 0.7449,
"step": 101
},
{
"epoch": 0.24,
"learning_rate": 0.0001439124189002655,
"loss": 0.8953,
"step": 102
},
{
"epoch": 0.25,
"learning_rate": 0.0001432121110027503,
"loss": 0.974,
"step": 103
},
{
"epoch": 0.25,
"learning_rate": 0.00014252192813739225,
"loss": 0.959,
"step": 104
},
{
"epoch": 0.25,
"learning_rate": 0.00014184162865339505,
"loss": 0.8767,
"step": 105
},
{
"epoch": 0.25,
"learning_rate": 0.00014117097889799755,
"loss": 0.9206,
"step": 106
},
{
"epoch": 0.26,
"learning_rate": 0.000140509752879313,
"loss": 0.8096,
"step": 107
},
{
"epoch": 0.26,
"learning_rate": 0.00013985773194637893,
"loss": 0.9726,
"step": 108
},
{
"epoch": 0.26,
"learning_rate": 0.00013921470448538878,
"loss": 0.7764,
"step": 109
},
{
"epoch": 0.26,
"learning_rate": 0.00013858046563114675,
"loss": 0.8414,
"step": 110
},
{
"epoch": 0.27,
"learning_rate": 0.0001379548169928529,
"loss": 0.9365,
"step": 111
},
{
"epoch": 0.27,
"learning_rate": 0.00013733756639338393,
"loss": 0.9857,
"step": 112
},
{
"epoch": 0.27,
"learning_rate": 0.00013672852762129314,
"loss": 0.8209,
"step": 113
},
{
"epoch": 0.27,
"learning_rate": 0.00013612752019480102,
"loss": 0.9954,
"step": 114
},
{
"epoch": 0.28,
"learning_rate": 0.0001355343691370986,
"loss": 0.9425,
"step": 115
},
{
"epoch": 0.28,
"learning_rate": 0.00013494890476232745,
"loss": 0.9199,
"step": 116
},
{
"epoch": 0.28,
"learning_rate": 0.0001343709624716425,
"loss": 1.0011,
"step": 117
},
{
"epoch": 0.28,
"learning_rate": 0.00013380038255880045,
"loss": 0.9335,
"step": 118
},
{
"epoch": 0.29,
"learning_rate": 0.000133237010024753,
"loss": 1.0612,
"step": 119
},
{
"epoch": 0.29,
"learning_rate": 0.0001326806944007555,
"loss": 0.8794,
"step": 120
},
{
"epoch": 0.29,
"learning_rate": 0.00013213128957953303,
"loss": 0.8557,
"step": 121
},
{
"epoch": 0.29,
"learning_rate": 0.00013158865365407385,
"loss": 0.931,
"step": 122
},
{
"epoch": 0.29,
"learning_rate": 0.00013105264876364566,
"loss": 0.9648,
"step": 123
},
{
"epoch": 0.3,
"learning_rate": 0.00013052314094665608,
"loss": 0.8448,
"step": 124
},
{
"epoch": 0.3,
"learning_rate": 0.00013,
"loss": 0.9247,
"step": 125
},
{
"epoch": 0.3,
"learning_rate": 0.0001294830993445593,
"loss": 0.9537,
"step": 126
},
{
"epoch": 0.3,
"learning_rate": 0.00012897231589653857,
"loss": 0.8049,
"step": 127
},
{
"epoch": 0.31,
"learning_rate": 0.0001284675299443404,
"loss": 0.8177,
"step": 128
},
{
"epoch": 0.31,
"learning_rate": 0.00012796862503070062,
"loss": 0.9717,
"step": 129
},
{
"epoch": 0.31,
"learning_rate": 0.00012747548783981962,
"loss": 0.8813,
"step": 130
},
{
"epoch": 0.31,
"learning_rate": 0.00012698800808924157,
"loss": 0.9708,
"step": 131
},
{
"epoch": 0.32,
"learning_rate": 0.00012650607842624748,
"loss": 0.8776,
"step": 132
},
{
"epoch": 0.32,
"learning_rate": 0.0001260295943285407,
"loss": 0.8564,
"step": 133
},
{
"epoch": 0.32,
"learning_rate": 0.00012555845400901656,
"loss": 0.8793,
"step": 134
},
{
"epoch": 0.32,
"learning_rate": 0.0001250925583244189,
"loss": 0.9288,
"step": 135
},
{
"epoch": 0.33,
"learning_rate": 0.00012463181068769768,
"loss": 0.9407,
"step": 136
},
{
"epoch": 0.33,
"learning_rate": 0.0001241761169838914,
"loss": 0.9746,
"step": 137
},
{
"epoch": 0.33,
"learning_rate": 0.00012372538548936814,
"loss": 1.0109,
"step": 138
},
{
"epoch": 0.33,
"learning_rate": 0.00012327952679426827,
"loss": 1.0695,
"step": 139
},
{
"epoch": 0.34,
"learning_rate": 0.00012283845372799884,
"loss": 0.9092,
"step": 140
},
{
"epoch": 0.34,
"learning_rate": 0.00012240208128764027,
"loss": 0.7535,
"step": 141
},
{
"epoch": 0.34,
"learning_rate": 0.00012197032656913024,
"loss": 0.7952,
"step": 142
},
{
"epoch": 0.34,
"learning_rate": 0.00012154310870109942,
"loss": 0.8747,
"step": 143
},
{
"epoch": 0.35,
"learning_rate": 0.0001211203487812386,
"loss": 0.823,
"step": 144
},
{
"epoch": 0.35,
"learning_rate": 0.0001207019698150837,
"loss": 0.838,
"step": 145
},
{
"epoch": 0.35,
"learning_rate": 0.00012028789665711085,
"loss": 0.8352,
"step": 146
},
{
"epoch": 0.35,
"learning_rate": 0.00011987805595403907,
"loss": 0.9483,
"step": 147
},
{
"epoch": 0.35,
"learning_rate": 0.00011947237609024377,
"loss": 0.8841,
"step": 148
},
{
"epoch": 0.36,
"learning_rate": 0.00011907078713518815,
"loss": 1.0176,
"step": 149
},
{
"epoch": 0.36,
"learning_rate": 0.00011867322079278597,
"loss": 0.9113,
"step": 150
},
{
"epoch": 0.36,
"learning_rate": 0.00011827961035261132,
"loss": 0.8683,
"step": 151
},
{
"epoch": 0.36,
"learning_rate": 0.0001178898906428769,
"loss": 0.8158,
"step": 152
},
{
"epoch": 0.37,
"learning_rate": 0.0001175039979851054,
"loss": 0.8146,
"step": 153
},
{
"epoch": 0.37,
"learning_rate": 0.00011712187015042266,
"loss": 0.8513,
"step": 154
},
{
"epoch": 0.37,
"learning_rate": 0.00011674344631740369,
"loss": 0.8071,
"step": 155
},
{
"epoch": 0.37,
"learning_rate": 0.00011636866703140783,
"loss": 0.8923,
"step": 156
},
{
"epoch": 0.38,
"learning_rate": 0.00011599747416534057,
"loss": 0.9082,
"step": 157
},
{
"epoch": 0.38,
"learning_rate": 0.00011562981088178324,
"loss": 0.8323,
"step": 158
},
{
"epoch": 0.38,
"learning_rate": 0.00011526562159643515,
"loss": 0.8079,
"step": 159
},
{
"epoch": 0.38,
"learning_rate": 0.00011490485194281395,
"loss": 0.8623,
"step": 160
},
{
"epoch": 0.39,
"learning_rate": 0.00011454744873816422,
"loss": 0.8465,
"step": 161
},
{
"epoch": 0.39,
"learning_rate": 0.0001141933599505248,
"loss": 0.9027,
"step": 162
},
{
"epoch": 0.39,
"learning_rate": 0.00011384253466690954,
"loss": 0.907,
"step": 163
},
{
"epoch": 0.39,
"learning_rate": 0.00011349492306255647,
"loss": 0.9152,
"step": 164
},
{
"epoch": 0.4,
"learning_rate": 0.0001131504763712036,
"loss": 0.7418,
"step": 165
},
{
"epoch": 0.4,
"learning_rate": 0.00011280914685635128,
"loss": 0.8328,
"step": 166
},
{
"epoch": 0.4,
"learning_rate": 0.0001124708877834722,
"loss": 0.9287,
"step": 167
},
{
"epoch": 0.4,
"learning_rate": 0.00011213565339313254,
"loss": 0.7967,
"step": 168
},
{
"epoch": 0.41,
"learning_rate": 0.00011180339887498949,
"loss": 0.84,
"step": 169
},
{
"epoch": 0.41,
"learning_rate": 0.00011147408034263073,
"loss": 0.8149,
"step": 170
},
{
"epoch": 0.41,
"learning_rate": 0.00011114765480922503,
"loss": 0.8555,
"step": 171
},
{
"epoch": 0.41,
"learning_rate": 0.00011082408016395194,
"loss": 0.814,
"step": 172
},
{
"epoch": 0.41,
"learning_rate": 0.00011050331514918246,
"loss": 0.8139,
"step": 173
},
{
"epoch": 0.42,
"learning_rate": 0.0001101853193383817,
"loss": 0.885,
"step": 174
},
{
"epoch": 0.42,
"learning_rate": 0.00010987005311470715,
"loss": 0.7682,
"step": 175
},
{
"epoch": 0.42,
"learning_rate": 0.00010955747765027683,
"loss": 0.8266,
"step": 176
},
{
"epoch": 0.42,
"learning_rate": 0.00010924755488608232,
"loss": 0.8699,
"step": 177
},
{
"epoch": 0.43,
"learning_rate": 0.00010894024751252352,
"loss": 0.957,
"step": 178
},
{
"epoch": 0.43,
"learning_rate": 0.00010863551895054227,
"loss": 0.854,
"step": 179
},
{
"epoch": 0.43,
"learning_rate": 0.00010833333333333333,
"loss": 0.7239,
"step": 180
},
{
"epoch": 0.43,
"learning_rate": 0.00010803365548861171,
"loss": 0.7825,
"step": 181
},
{
"epoch": 0.44,
"learning_rate": 0.00010773645092141682,
"loss": 0.8531,
"step": 182
},
{
"epoch": 0.44,
"learning_rate": 0.00010744168579743401,
"loss": 0.7602,
"step": 183
},
{
"epoch": 0.44,
"learning_rate": 0.0001071493269268154,
"loss": 0.8768,
"step": 184
},
{
"epoch": 0.44,
"learning_rate": 0.00010685934174848223,
"loss": 0.8294,
"step": 185
},
{
"epoch": 0.45,
"learning_rate": 0.00010657169831489234,
"loss": 0.8872,
"step": 186
},
{
"epoch": 0.45,
"learning_rate": 0.0001062863652772559,
"loss": 0.7016,
"step": 187
},
{
"epoch": 0.45,
"learning_rate": 0.00010600331187118435,
"loss": 0.8942,
"step": 188
},
{
"epoch": 0.45,
"learning_rate": 0.00010572250790275775,
"loss": 0.7416,
"step": 189
},
{
"epoch": 0.46,
"learning_rate": 0.00010544392373499565,
"loss": 0.8104,
"step": 190
},
{
"epoch": 0.46,
"learning_rate": 0.0001051675302747182,
"loss": 0.8349,
"step": 191
},
{
"epoch": 0.46,
"learning_rate": 0.0001048932989597842,
"loss": 0.9013,
"step": 192
},
{
"epoch": 0.46,
"learning_rate": 0.00010462120174669319,
"loss": 0.7646,
"step": 193
},
{
"epoch": 0.47,
"learning_rate": 0.00010435121109853953,
"loss": 0.8087,
"step": 194
},
{
"epoch": 0.47,
"learning_rate": 0.00010408329997330662,
"loss": 0.9798,
"step": 195
},
{
"epoch": 0.47,
"learning_rate": 0.00010381744181249024,
"loss": 0.7266,
"step": 196
},
{
"epoch": 0.47,
"learning_rate": 0.0001035536105300395,
"loss": 0.8502,
"step": 197
},
{
"epoch": 0.47,
"learning_rate": 0.00010329178050160582,
"loss": 0.7797,
"step": 198
},
{
"epoch": 0.48,
"learning_rate": 0.00010303192655408924,
"loss": 0.7328,
"step": 199
},
{
"epoch": 0.48,
"learning_rate": 0.00010277402395547232,
"loss": 0.7916,
"step": 200
}
],
"logging_steps": 1,
"max_steps": 417,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 6.487869967879373e+16,
"trial_name": null,
"trial_params": null
}