|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.47961630695443647, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013, |
|
"loss": 1.1241, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00026, |
|
"loss": 1.0107, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00039, |
|
"loss": 1.1086, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00052, |
|
"loss": 1.0044, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00065, |
|
"loss": 1.0496, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005933661039639299, |
|
"loss": 1.0199, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005493502655735357, |
|
"loss": 1.0198, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005138701197773616, |
|
"loss": 0.969, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004844813951249544, |
|
"loss": 0.9383, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004596194077712558, |
|
"loss": 0.8776, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004382299106011073, |
|
"loss": 1.0173, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004195731958391368, |
|
"loss": 1.1173, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004031128874149274, |
|
"loss": 1.0876, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0003884492980336779, |
|
"loss": 1.0524, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0003752776749732568, |
|
"loss": 0.8953, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00036336104634371584, |
|
"loss": 1.1335, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00035251199395531623, |
|
"loss": 0.9837, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00034258007985157445, |
|
"loss": 0.9707, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0003334429644276751, |
|
"loss": 0.9149, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000325, |
|
"loss": 1.0043, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00031716752370827323, |
|
"loss": 1.001, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00030987534150481746, |
|
"loss": 1.0395, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000303064062678102, |
|
"loss": 0.8718, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029668305198196496, |
|
"loss": 1.1114, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029068883707497264, |
|
"loss": 0.7765, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002850438562747845, |
|
"loss": 0.9522, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00027971546389275785, |
|
"loss": 0.9588, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00027467513278676785, |
|
"loss": 1.0313, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002698978095246549, |
|
"loss": 0.9338, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000265361388801511, |
|
"loss": 0.892, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00026104628189331215, |
|
"loss": 0.893, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002569350598886808, |
|
"loss": 0.8983, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00025301215685249496, |
|
"loss": 0.9277, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00024926362137539537, |
|
"loss": 0.8962, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00024567690745599767, |
|
"loss": 0.9124, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002422406975624772, |
|
"loss": 0.9535, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00023894475218048754, |
|
"loss": 0.9019, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002357797812857538, |
|
"loss": 1.024, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00023273733406281566, |
|
"loss": 0.8549, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002298097038856279, |
|
"loss": 1.0489, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022698984612511293, |
|
"loss": 0.751, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022427130678626507, |
|
"loss": 0.834, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022164816032790388, |
|
"loss": 0.889, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00021911495530055366, |
|
"loss": 1.0103, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00021666666666666666, |
|
"loss": 0.8766, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002142986538536308, |
|
"loss": 0.8181, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002120066237423687, |
|
"loss": 0.8754, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002097865979195684, |
|
"loss": 0.9038, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00020763488362498048, |
|
"loss": 0.8646, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00020554804791094464, |
|
"loss": 0.8836, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002035228946026736, |
|
"loss": 0.9962, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002015564437074637, |
|
"loss": 0.8835, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019964591297103414, |
|
"loss": 0.9196, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019778870132130996, |
|
"loss": 0.8995, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019598237397554634, |
|
"loss": 1.0178, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019422464901683895, |
|
"loss": 0.9395, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019251338527170498, |
|
"loss": 0.9882, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019084657134227863, |
|
"loss": 0.9274, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018922231566536414, |
|
"loss": 0.9517, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001876388374866284, |
|
"loss": 0.865, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018609445865200715, |
|
"loss": 0.9314, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018458759613029606, |
|
"loss": 0.9224, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018311675519117857, |
|
"loss": 0.788, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018168052317185792, |
|
"loss": 0.9739, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018027756377319947, |
|
"loss": 0.9419, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001789066118330336, |
|
"loss": 0.8772, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017756646853014972, |
|
"loss": 0.8707, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017625599697765812, |
|
"loss": 0.8089, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00017497411816890378, |
|
"loss": 0.9303, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00017371980724307585, |
|
"loss": 0.9161, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00017249209004113945, |
|
"loss": 0.9064, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00017129003992578723, |
|
"loss": 1.0988, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00017011277484181944, |
|
"loss": 0.9804, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001689594545957618, |
|
"loss": 0.8382, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016782927833565472, |
|
"loss": 0.9632, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016672148221383754, |
|
"loss": 0.9494, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016563533721722828, |
|
"loss": 0.9253, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001645701471510958, |
|
"loss": 0.9143, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016352524676365398, |
|
"loss": 0.8907, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001625, |
|
"loss": 0.9748, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016149379837498482, |
|
"loss": 0.893, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016050605945555833, |
|
"loss": 0.839, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001595362254439902, |
|
"loss": 0.9276, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00015858376185413662, |
|
"loss": 0.8758, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00015764815627361642, |
|
"loss": 0.9125, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00015672891720538393, |
|
"loss": 0.955, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00015582557298274985, |
|
"loss": 0.9104, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00015493767075240873, |
|
"loss": 0.8861, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001540647755204926, |
|
"loss": 0.9693, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001532064692570853, |
|
"loss": 0.7245, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000152362350055011, |
|
"loss": 0.7523, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000151532031339051, |
|
"loss": 0.8522, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015071514112205468, |
|
"loss": 0.9273, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001499113213046938, |
|
"loss": 1.0303, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00014912022701586513, |
|
"loss": 0.9273, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00014834152599098248, |
|
"loss": 0.9071, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00014757489798561242, |
|
"loss": 0.954, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014682003422210332, |
|
"loss": 0.7897, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014607663686703578, |
|
"loss": 0.9045, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014534441853748632, |
|
"loss": 0.7919, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014462310183424506, |
|
"loss": 0.7449, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001439124189002655, |
|
"loss": 0.8953, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001432121110027503, |
|
"loss": 0.974, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00014252192813739225, |
|
"loss": 0.959, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00014184162865339505, |
|
"loss": 0.8767, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00014117097889799755, |
|
"loss": 0.9206, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.000140509752879313, |
|
"loss": 0.8096, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00013985773194637893, |
|
"loss": 0.9726, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00013921470448538878, |
|
"loss": 0.7764, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00013858046563114675, |
|
"loss": 0.8414, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001379548169928529, |
|
"loss": 0.9365, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00013733756639338393, |
|
"loss": 0.9857, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00013672852762129314, |
|
"loss": 0.8209, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00013612752019480102, |
|
"loss": 0.9954, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001355343691370986, |
|
"loss": 0.9425, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00013494890476232745, |
|
"loss": 0.9199, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001343709624716425, |
|
"loss": 1.0011, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00013380038255880045, |
|
"loss": 0.9335, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000133237010024753, |
|
"loss": 1.0612, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001326806944007555, |
|
"loss": 0.8794, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00013213128957953303, |
|
"loss": 0.8557, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00013158865365407385, |
|
"loss": 0.931, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00013105264876364566, |
|
"loss": 0.9648, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00013052314094665608, |
|
"loss": 0.8448, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00013, |
|
"loss": 0.9247, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001294830993445593, |
|
"loss": 0.9537, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00012897231589653857, |
|
"loss": 0.8049, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001284675299443404, |
|
"loss": 0.8177, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00012796862503070062, |
|
"loss": 0.9717, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00012747548783981962, |
|
"loss": 0.8813, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00012698800808924157, |
|
"loss": 0.9708, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00012650607842624748, |
|
"loss": 0.8776, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001260295943285407, |
|
"loss": 0.8564, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00012555845400901656, |
|
"loss": 0.8793, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001250925583244189, |
|
"loss": 0.9288, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00012463181068769768, |
|
"loss": 0.9407, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001241761169838914, |
|
"loss": 0.9746, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00012372538548936814, |
|
"loss": 1.0109, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00012327952679426827, |
|
"loss": 1.0695, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00012283845372799884, |
|
"loss": 0.9092, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00012240208128764027, |
|
"loss": 0.7535, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00012197032656913024, |
|
"loss": 0.7952, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00012154310870109942, |
|
"loss": 0.8747, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001211203487812386, |
|
"loss": 0.823, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001207019698150837, |
|
"loss": 0.838, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00012028789665711085, |
|
"loss": 0.8352, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00011987805595403907, |
|
"loss": 0.9483, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00011947237609024377, |
|
"loss": 0.8841, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00011907078713518815, |
|
"loss": 1.0176, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00011867322079278597, |
|
"loss": 0.9113, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00011827961035261132, |
|
"loss": 0.8683, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001178898906428769, |
|
"loss": 0.8158, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001175039979851054, |
|
"loss": 0.8146, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00011712187015042266, |
|
"loss": 0.8513, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00011674344631740369, |
|
"loss": 0.8071, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00011636866703140783, |
|
"loss": 0.8923, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00011599747416534057, |
|
"loss": 0.9082, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00011562981088178324, |
|
"loss": 0.8323, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00011526562159643515, |
|
"loss": 0.8079, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00011490485194281395, |
|
"loss": 0.8623, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00011454744873816422, |
|
"loss": 0.8465, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001141933599505248, |
|
"loss": 0.9027, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00011384253466690954, |
|
"loss": 0.907, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00011349492306255647, |
|
"loss": 0.9152, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001131504763712036, |
|
"loss": 0.7418, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00011280914685635128, |
|
"loss": 0.8328, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001124708877834722, |
|
"loss": 0.9287, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00011213565339313254, |
|
"loss": 0.7967, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00011180339887498949, |
|
"loss": 0.84, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00011147408034263073, |
|
"loss": 0.8149, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00011114765480922503, |
|
"loss": 0.8555, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00011082408016395194, |
|
"loss": 0.814, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00011050331514918246, |
|
"loss": 0.8139, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001101853193383817, |
|
"loss": 0.885, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00010987005311470715, |
|
"loss": 0.7682, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00010955747765027683, |
|
"loss": 0.8266, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00010924755488608232, |
|
"loss": 0.8699, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00010894024751252352, |
|
"loss": 0.957, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00010863551895054227, |
|
"loss": 0.854, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00010833333333333333, |
|
"loss": 0.7239, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00010803365548861171, |
|
"loss": 0.7825, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00010773645092141682, |
|
"loss": 0.8531, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00010744168579743401, |
|
"loss": 0.7602, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001071493269268154, |
|
"loss": 0.8768, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00010685934174848223, |
|
"loss": 0.8294, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00010657169831489234, |
|
"loss": 0.8872, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001062863652772559, |
|
"loss": 0.7016, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00010600331187118435, |
|
"loss": 0.8942, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00010572250790275775, |
|
"loss": 0.7416, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00010544392373499565, |
|
"loss": 0.8104, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001051675302747182, |
|
"loss": 0.8349, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001048932989597842, |
|
"loss": 0.9013, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00010462120174669319, |
|
"loss": 0.7646, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010435121109853953, |
|
"loss": 0.8087, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010408329997330662, |
|
"loss": 0.9798, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010381744181249024, |
|
"loss": 0.7266, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001035536105300395, |
|
"loss": 0.8502, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010329178050160582, |
|
"loss": 0.7797, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010303192655408924, |
|
"loss": 0.7328, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010277402395547232, |
|
"loss": 0.7916, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 417, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"total_flos": 6.487869967879373e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|