|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 20, |
|
"global_step": 699, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-05, |
|
"loss": 3.8527, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-05, |
|
"loss": 2.357, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6e-05, |
|
"loss": 1.6448, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8e-05, |
|
"loss": 3.7495, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001, |
|
"loss": 3.6283, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012, |
|
"loss": 2.8617, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00014, |
|
"loss": 2.1554, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016, |
|
"loss": 1.5715, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018, |
|
"loss": 1.162, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9593, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999988670134103, |
|
"loss": 0.8023, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999954680562074, |
|
"loss": 0.9134, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999898031360943, |
|
"loss": 0.8533, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999818722659068, |
|
"loss": 0.792, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999716754636165, |
|
"loss": 0.9903, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999592127523287, |
|
"loss": 0.7565, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001999944484160284, |
|
"loss": 0.8171, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019999274897208565, |
|
"loss": 0.9344, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019999082294725555, |
|
"loss": 0.8733, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019998867034590241, |
|
"loss": 0.9758, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 0.6869800090789795, |
|
"eval_runtime": 29.3692, |
|
"eval_samples_per_second": 55.466, |
|
"eval_steps_per_second": 27.75, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019998629117290396, |
|
"loss": 0.8391, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001999836854336514, |
|
"loss": 1.032, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019998085313404916, |
|
"loss": 0.9099, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019997779428051522, |
|
"loss": 0.8684, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019997450887998086, |
|
"loss": 0.9439, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001999709969398907, |
|
"loss": 1.0515, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001999672584682027, |
|
"loss": 1.1631, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019996329347338814, |
|
"loss": 0.7389, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019995910196443157, |
|
"loss": 0.6839, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019995468395083088, |
|
"loss": 1.0289, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019995003944259715, |
|
"loss": 0.9143, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019994516845025468, |
|
"loss": 0.95, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001999400709848411, |
|
"loss": 0.9214, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000199934747057907, |
|
"loss": 0.9471, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019992919668151635, |
|
"loss": 0.7858, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019992341986824612, |
|
"loss": 0.6643, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019991741663118642, |
|
"loss": 0.7344, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019991118698394042, |
|
"loss": 0.7909, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019990473094062434, |
|
"loss": 0.7732, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019989804851586743, |
|
"loss": 0.7228, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 0.6790834069252014, |
|
"eval_runtime": 29.3451, |
|
"eval_samples_per_second": 55.512, |
|
"eval_steps_per_second": 27.773, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019989113972481183, |
|
"loss": 0.8713, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001998840045831127, |
|
"loss": 0.6862, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019987664310693805, |
|
"loss": 0.7524, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019986905531296884, |
|
"loss": 0.7405, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001998612412183988, |
|
"loss": 0.7113, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019985320084093443, |
|
"loss": 0.7264, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019984493419879503, |
|
"loss": 0.717, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019983644131071256, |
|
"loss": 0.9136, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019982772219593172, |
|
"loss": 0.662, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019981877687420975, |
|
"loss": 0.6811, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019980960536581654, |
|
"loss": 0.7089, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001998002076915345, |
|
"loss": 0.774, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019979058387265843, |
|
"loss": 0.9975, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001997807339309957, |
|
"loss": 0.8504, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019977065788886602, |
|
"loss": 0.7098, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001997603557691014, |
|
"loss": 0.6549, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019974982759504625, |
|
"loss": 0.6731, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000199739073390557, |
|
"loss": 0.8107, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019972809318000246, |
|
"loss": 0.7196, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019971688698826353, |
|
"loss": 0.6804, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 0.6613275408744812, |
|
"eval_runtime": 29.3333, |
|
"eval_samples_per_second": 55.534, |
|
"eval_steps_per_second": 27.784, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019970545484073306, |
|
"loss": 0.7939, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019969379676331602, |
|
"loss": 0.7578, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019968191278242934, |
|
"loss": 0.792, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019966980292500174, |
|
"loss": 0.9361, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019965746721847387, |
|
"loss": 0.7841, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019964490569079812, |
|
"loss": 0.7706, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019963211837043852, |
|
"loss": 0.8221, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019961910528637088, |
|
"loss": 0.6315, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001996058664680824, |
|
"loss": 0.6358, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019959240194557197, |
|
"loss": 0.7126, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019957871174934978, |
|
"loss": 0.5788, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019956479591043752, |
|
"loss": 0.7683, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000199550654460368, |
|
"loss": 0.7092, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019953628743118546, |
|
"loss": 0.7995, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019952169485544516, |
|
"loss": 0.6204, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019950687676621352, |
|
"loss": 0.6226, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001994918331970679, |
|
"loss": 0.5133, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001994765641820966, |
|
"loss": 0.9482, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019946106975589884, |
|
"loss": 0.7825, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019944534995358458, |
|
"loss": 0.8117, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 0.6359772682189941, |
|
"eval_runtime": 29.2458, |
|
"eval_samples_per_second": 55.7, |
|
"eval_steps_per_second": 27.867, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019942940481077446, |
|
"loss": 0.844, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019941323436359972, |
|
"loss": 0.766, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019939683864870217, |
|
"loss": 0.7071, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001993802177032341, |
|
"loss": 0.7645, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001993633715648581, |
|
"loss": 0.9787, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019934630027174707, |
|
"loss": 0.6916, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019932900386258407, |
|
"loss": 0.6979, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001993114823765623, |
|
"loss": 0.688, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019929373585338508, |
|
"loss": 0.8155, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019927576433326544, |
|
"loss": 0.6204, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001992575678569264, |
|
"loss": 0.8641, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001992391464656007, |
|
"loss": 0.758, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001992205002010307, |
|
"loss": 0.6035, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019920162910546833, |
|
"loss": 0.857, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019918253322167502, |
|
"loss": 0.6189, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019916321259292152, |
|
"loss": 0.8429, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019914366726298782, |
|
"loss": 0.9827, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019912389727616314, |
|
"loss": 0.9278, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019910390267724576, |
|
"loss": 0.6625, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001990836835115429, |
|
"loss": 0.6458, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.6334865093231201, |
|
"eval_runtime": 29.3599, |
|
"eval_samples_per_second": 55.484, |
|
"eval_steps_per_second": 27.759, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001990632398248706, |
|
"loss": 0.6327, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019904257166355376, |
|
"loss": 0.719, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019902167907442583, |
|
"loss": 0.6946, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019900056210482892, |
|
"loss": 0.6667, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019897922080261345, |
|
"loss": 0.6793, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001989576552161383, |
|
"loss": 0.7095, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019893586539427048, |
|
"loss": 0.6903, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001989138513863851, |
|
"loss": 0.9088, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001988916132423654, |
|
"loss": 0.7673, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019886915101260234, |
|
"loss": 0.671, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019884646474799475, |
|
"loss": 0.6958, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019882355449994915, |
|
"loss": 0.6109, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019880042032037947, |
|
"loss": 0.7404, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001987770622617072, |
|
"loss": 0.7017, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019875348037686106, |
|
"loss": 0.8272, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019872967471927692, |
|
"loss": 0.8391, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019870564534289783, |
|
"loss": 0.6695, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001986813923021737, |
|
"loss": 0.7992, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019865691565206122, |
|
"loss": 0.7508, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019863221544802386, |
|
"loss": 0.7509, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 0.6245399713516235, |
|
"eval_runtime": 29.3529, |
|
"eval_samples_per_second": 55.497, |
|
"eval_steps_per_second": 27.766, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019860729174603163, |
|
"loss": 0.6813, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019858214460256095, |
|
"loss": 0.6432, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019855677407459458, |
|
"loss": 0.672, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019853118021962148, |
|
"loss": 0.7951, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019850536309563656, |
|
"loss": 0.7095, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019847932276114083, |
|
"loss": 0.6037, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019845305927514094, |
|
"loss": 0.5812, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019842657269714923, |
|
"loss": 0.6124, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001983998630871836, |
|
"loss": 0.6501, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001983729305057673, |
|
"loss": 0.9065, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019834577501392885, |
|
"loss": 0.8046, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019831839667320183, |
|
"loss": 0.904, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019829079554562487, |
|
"loss": 0.5364, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001982629716937414, |
|
"loss": 0.4704, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019823492518059946, |
|
"loss": 0.6457, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019820665606975175, |
|
"loss": 0.752, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019817816442525526, |
|
"loss": 0.7523, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019814945031167134, |
|
"loss": 0.675, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001981205137940654, |
|
"loss": 0.7782, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019809135493800679, |
|
"loss": 0.6174, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.6312826871871948, |
|
"eval_runtime": 29.36, |
|
"eval_samples_per_second": 55.484, |
|
"eval_steps_per_second": 27.759, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019806197380956874, |
|
"loss": 0.5355, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019803237047532802, |
|
"loss": 0.5741, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019800254500236503, |
|
"loss": 0.7357, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019797249745826353, |
|
"loss": 0.6641, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001979422279111104, |
|
"loss": 0.5884, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019791173642949564, |
|
"loss": 0.4902, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019788102308251212, |
|
"loss": 0.6585, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019785008793975548, |
|
"loss": 0.6445, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019781893107132393, |
|
"loss": 0.8865, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019778755254781807, |
|
"loss": 0.6996, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019775595244034077, |
|
"loss": 0.5696, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019772413082049713, |
|
"loss": 0.6353, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019769208776039397, |
|
"loss": 0.6547, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019765982333264006, |
|
"loss": 1.059, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001976273376103457, |
|
"loss": 0.6234, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001975946306671227, |
|
"loss": 0.5276, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019756170257708412, |
|
"loss": 0.6942, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001975285534148441, |
|
"loss": 0.7216, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019749518325551778, |
|
"loss": 0.6563, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019746159217472097, |
|
"loss": 0.7549, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 0.6180239319801331, |
|
"eval_runtime": 29.3051, |
|
"eval_samples_per_second": 55.588, |
|
"eval_steps_per_second": 27.811, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019742778024857028, |
|
"loss": 0.6399, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019739374755368253, |
|
"loss": 0.6149, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019735949416717493, |
|
"loss": 0.5183, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019732502016666476, |
|
"loss": 0.5566, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019729032563026914, |
|
"loss": 0.7261, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019725541063660498, |
|
"loss": 0.7432, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001972202752647887, |
|
"loss": 0.6235, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019718491959443615, |
|
"loss": 0.8306, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019714934370566227, |
|
"loss": 0.7739, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019711354767908113, |
|
"loss": 0.5514, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001970775315958055, |
|
"loss": 0.6287, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019704129553744696, |
|
"loss": 0.5172, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019700483958611534, |
|
"loss": 0.5628, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001969681638244189, |
|
"loss": 0.5371, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019693126833546392, |
|
"loss": 0.5714, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001968941532028546, |
|
"loss": 0.7978, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019685681851069285, |
|
"loss": 0.7627, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019681926434357802, |
|
"loss": 0.5672, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019678149078660692, |
|
"loss": 0.639, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019674349792537336, |
|
"loss": 0.6015, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.616681694984436, |
|
"eval_runtime": 29.315, |
|
"eval_samples_per_second": 55.569, |
|
"eval_steps_per_second": 27.801, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001967052858459682, |
|
"loss": 0.564, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001966668546349789, |
|
"loss": 0.6838, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019662820437948967, |
|
"loss": 0.6492, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019658933516708085, |
|
"loss": 0.7017, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019655024708582904, |
|
"loss": 0.7092, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019651094022430686, |
|
"loss": 0.4604, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001964714146715826, |
|
"loss": 0.6142, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019643167051722, |
|
"loss": 0.6881, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019639170785127835, |
|
"loss": 0.5784, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001963515267643119, |
|
"loss": 0.8617, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019631112734737, |
|
"loss": 0.7697, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019627050969199655, |
|
"loss": 0.5587, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019622967389023015, |
|
"loss": 0.5915, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001961886200346036, |
|
"loss": 0.9243, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019614734821814383, |
|
"loss": 0.7143, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019610585853437165, |
|
"loss": 0.9429, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019606415107730165, |
|
"loss": 0.5176, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001960222259414417, |
|
"loss": 0.5437, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019598008322179312, |
|
"loss": 0.6687, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019593772301385016, |
|
"loss": 0.716, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.6165403127670288, |
|
"eval_runtime": 29.3359, |
|
"eval_samples_per_second": 55.529, |
|
"eval_steps_per_second": 27.782, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001958951454135999, |
|
"loss": 0.6579, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000195852350517522, |
|
"loss": 0.6, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019580933842258867, |
|
"loss": 0.6689, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019576610922626402, |
|
"loss": 0.679, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019572266302650434, |
|
"loss": 0.8533, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019567899992175753, |
|
"loss": 0.5989, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000195635120010963, |
|
"loss": 0.5986, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019559102339355148, |
|
"loss": 0.7082, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019554671016944474, |
|
"loss": 0.6403, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019550218043905526, |
|
"loss": 0.5886, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019545743430328632, |
|
"loss": 0.5391, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001954124718635314, |
|
"loss": 0.6303, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001953672932216742, |
|
"loss": 0.7248, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019532189848008833, |
|
"loss": 0.6166, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019527628774163705, |
|
"loss": 0.5582, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019523046110967305, |
|
"loss": 0.6682, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019518441868803828, |
|
"loss": 0.6317, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001951381605810636, |
|
"loss": 0.7702, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019509168689356866, |
|
"loss": 0.584, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001950449977308616, |
|
"loss": 0.6304, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 0.6014450192451477, |
|
"eval_runtime": 29.301, |
|
"eval_samples_per_second": 55.595, |
|
"eval_steps_per_second": 27.815, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019499809319873873, |
|
"loss": 0.5839, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019495097340348458, |
|
"loss": 0.589, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019490363845187125, |
|
"loss": 0.8189, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019485608845115854, |
|
"loss": 0.5217, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019480832350909344, |
|
"loss": 0.6624, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019476034373391005, |
|
"loss": 0.9488, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001947121492343292, |
|
"loss": 0.7354, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001946637401195584, |
|
"loss": 0.6767, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019461511649929137, |
|
"loss": 0.6337, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019456627848370793, |
|
"loss": 0.436, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001945172261834737, |
|
"loss": 0.4473, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019446795970973993, |
|
"loss": 0.5208, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019441847917414307, |
|
"loss": 0.5907, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001943687846888047, |
|
"loss": 0.6171, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019431887636633125, |
|
"loss": 0.7125, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019426875431981355, |
|
"loss": 0.46, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019421841866282686, |
|
"loss": 0.71, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019416786950943044, |
|
"loss": 0.6707, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019411710697416726, |
|
"loss": 0.4639, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019406613117206397, |
|
"loss": 0.5781, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.6106613874435425, |
|
"eval_runtime": 29.3327, |
|
"eval_samples_per_second": 55.535, |
|
"eval_steps_per_second": 27.785, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019401494221863024, |
|
"loss": 0.5778, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019396354022985896, |
|
"loss": 0.6418, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001939119253222256, |
|
"loss": 0.4679, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019386009761268821, |
|
"loss": 0.6215, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019380805721868694, |
|
"loss": 0.5621, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019375580425814396, |
|
"loss": 0.5727, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019370333884946307, |
|
"loss": 0.7134, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001936506611115295, |
|
"loss": 0.6081, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019359777116370955, |
|
"loss": 0.9212, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001935446691258504, |
|
"loss": 0.5593, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019349135511827995, |
|
"loss": 0.6382, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001934378292618062, |
|
"loss": 0.6483, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019338409167771734, |
|
"loss": 0.5854, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019333014248778133, |
|
"loss": 0.6317, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019327598181424557, |
|
"loss": 0.556, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001932216097798367, |
|
"loss": 0.7319, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001931670265077602, |
|
"loss": 0.6195, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019311223212170045, |
|
"loss": 0.5316, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019305722674581996, |
|
"loss": 0.753, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019300201050475948, |
|
"loss": 0.8, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 0.594850480556488, |
|
"eval_runtime": 29.3304, |
|
"eval_samples_per_second": 55.54, |
|
"eval_steps_per_second": 27.787, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001929465835236375, |
|
"loss": 0.5674, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019289094592805011, |
|
"loss": 0.5302, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019283509784407058, |
|
"loss": 0.621, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019277903939824914, |
|
"loss": 0.6329, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019272277071761282, |
|
"loss": 0.5637, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019266629192966485, |
|
"loss": 0.5367, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019260960316238467, |
|
"loss": 0.6705, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019255270454422756, |
|
"loss": 0.7982, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019249559620412418, |
|
"loss": 0.5728, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019243827827148055, |
|
"loss": 0.5841, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019238075087617759, |
|
"loss": 0.7056, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019232301414857074, |
|
"loss": 0.6554, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019226506821948998, |
|
"loss": 0.6277, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019220691322023917, |
|
"loss": 0.5815, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019214854928259603, |
|
"loss": 0.722, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019208997653881164, |
|
"loss": 0.6855, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019203119512161023, |
|
"loss": 0.532, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019197220516418902, |
|
"loss": 0.5008, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019191300680021755, |
|
"loss": 0.5297, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001918536001638378, |
|
"loss": 0.6845, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.595288097858429, |
|
"eval_runtime": 29.2376, |
|
"eval_samples_per_second": 55.716, |
|
"eval_steps_per_second": 27.875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019179398538966358, |
|
"loss": 0.6998, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019173416261278044, |
|
"loss": 0.8184, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001916741319687451, |
|
"loss": 0.4835, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001916138935935854, |
|
"loss": 0.7273, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019155344762379994, |
|
"loss": 0.854, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001914927941963576, |
|
"loss": 0.6895, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001914319334486975, |
|
"loss": 0.6412, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001913708655187284, |
|
"loss": 0.5484, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019130959054482858, |
|
"loss": 0.5316, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019124810866584554, |
|
"loss": 0.5696, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019118642002109552, |
|
"loss": 0.6214, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019112452475036337, |
|
"loss": 0.5533, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019106242299390212, |
|
"loss": 0.4784, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019100011489243263, |
|
"loss": 0.856, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019093760058714346, |
|
"loss": 0.6587, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019087488021969027, |
|
"loss": 0.6849, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001908119539321958, |
|
"loss": 0.5236, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019074882186724928, |
|
"loss": 0.584, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001906854841679063, |
|
"loss": 0.5225, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001906219409776884, |
|
"loss": 0.5857, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.5940439701080322, |
|
"eval_runtime": 29.3378, |
|
"eval_samples_per_second": 55.526, |
|
"eval_steps_per_second": 27.78, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019055819244058272, |
|
"loss": 0.9588, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019049423870104174, |
|
"loss": 0.5345, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019043007990398293, |
|
"loss": 0.827, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001903657161947884, |
|
"loss": 0.53, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001903011477193046, |
|
"loss": 0.469, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019023637462384194, |
|
"loss": 0.6566, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019017139705517454, |
|
"loss": 0.5136, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019010621516053977, |
|
"loss": 0.6217, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019004082908763813, |
|
"loss": 0.5953, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018997523898463267, |
|
"loss": 0.6747, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018990944500014883, |
|
"loss": 0.5683, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018984344728327395, |
|
"loss": 0.8048, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018977724598355717, |
|
"loss": 0.5998, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018971084125100882, |
|
"loss": 0.6533, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018964423323610026, |
|
"loss": 0.5986, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018957742208976344, |
|
"loss": 0.5504, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001895104079633906, |
|
"loss": 0.6991, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018944319100883404, |
|
"loss": 0.566, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001893757713784055, |
|
"loss": 0.5838, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00018930814922487607, |
|
"loss": 0.6369, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.5889019966125488, |
|
"eval_runtime": 29.353, |
|
"eval_samples_per_second": 55.497, |
|
"eval_steps_per_second": 27.765, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00018924032470147575, |
|
"loss": 0.6911, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001891722979618931, |
|
"loss": 0.839, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001891040691602749, |
|
"loss": 0.664, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001890356384512257, |
|
"loss": 0.7154, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00018896700598980775, |
|
"loss": 0.6249, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001888981719315403, |
|
"loss": 0.6035, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00018882913643239953, |
|
"loss": 0.542, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00018875989964881797, |
|
"loss": 0.7356, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001886904617376844, |
|
"loss": 0.5376, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001886208228563432, |
|
"loss": 0.5325, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001885509831625942, |
|
"loss": 0.4952, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001884809428146923, |
|
"loss": 0.5887, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018841070197134706, |
|
"loss": 0.5159, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018834026079172237, |
|
"loss": 0.6428, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018826961943543594, |
|
"loss": 0.624, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018819877806255933, |
|
"loss": 0.7188, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018812773683361708, |
|
"loss": 0.4654, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018805649590958678, |
|
"loss": 0.7584, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018798505545189844, |
|
"loss": 0.5568, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018791341562243418, |
|
"loss": 0.4767, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 0.5945929288864136, |
|
"eval_runtime": 29.3121, |
|
"eval_samples_per_second": 55.574, |
|
"eval_steps_per_second": 27.804, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000187841576583528, |
|
"loss": 0.6751, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018776953849796514, |
|
"loss": 0.4626, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018769730152898208, |
|
"loss": 0.7323, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018762486584026578, |
|
"loss": 0.4619, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001875522315959536, |
|
"loss": 0.6054, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018747939896063276, |
|
"loss": 0.6465, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018740636809934009, |
|
"loss": 0.8281, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018733313917756154, |
|
"loss": 0.6565, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001872597123612319, |
|
"loss": 0.6819, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001871860878167344, |
|
"loss": 0.7214, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001871122657109002, |
|
"loss": 0.6414, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018703824621100825, |
|
"loss": 0.3563, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018696402948478475, |
|
"loss": 0.6458, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018688961570040283, |
|
"loss": 0.4847, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018681500502648214, |
|
"loss": 0.6457, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018674019763208842, |
|
"loss": 0.7152, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018666519368673324, |
|
"loss": 0.8583, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018658999336037356, |
|
"loss": 0.7007, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018651459682341126, |
|
"loss": 0.5519, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018643900424669286, |
|
"loss": 0.4848, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 0.5990561842918396, |
|
"eval_runtime": 29.3397, |
|
"eval_samples_per_second": 55.522, |
|
"eval_steps_per_second": 27.778, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018636321580150917, |
|
"loss": 0.5851, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001862872316595947, |
|
"loss": 0.5745, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018621105199312753, |
|
"loss": 0.5597, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018613467697472876, |
|
"loss": 0.6364, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001860581067774621, |
|
"loss": 0.5668, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018598134157483354, |
|
"loss": 0.7182, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000185904381540791, |
|
"loss": 0.5478, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018582722684972383, |
|
"loss": 0.485, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018574987767646254, |
|
"loss": 0.7902, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018567233419627824, |
|
"loss": 0.6895, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018559459658488238, |
|
"loss": 0.6304, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018551666501842636, |
|
"loss": 0.4389, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001854385396735009, |
|
"loss": 0.7157, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018536022072713606, |
|
"loss": 0.552, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018528170835680036, |
|
"loss": 0.5701, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018520300274040084, |
|
"loss": 0.7629, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018512410405628225, |
|
"loss": 0.4954, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018504501248322686, |
|
"loss": 1.0569, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018496572820045413, |
|
"loss": 0.5473, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018488625138762008, |
|
"loss": 0.9067, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 0.5943491458892822, |
|
"eval_runtime": 29.3323, |
|
"eval_samples_per_second": 55.536, |
|
"eval_steps_per_second": 27.785, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018480658222481703, |
|
"loss": 0.4678, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001847267208925732, |
|
"loss": 0.7051, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018464666757185216, |
|
"loss": 0.3741, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018456642244405266, |
|
"loss": 0.6583, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.000184485985691008, |
|
"loss": 0.4571, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018440535749498563, |
|
"loss": 0.4962, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018432453803868696, |
|
"loss": 0.6971, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018424352750524668, |
|
"loss": 0.7043, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001841623260782325, |
|
"loss": 0.5788, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018408093394164468, |
|
"loss": 0.6969, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018399935127991554, |
|
"loss": 0.6253, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018391757827790933, |
|
"loss": 0.6763, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018383561512092138, |
|
"loss": 0.6583, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018375346199467807, |
|
"loss": 0.5904, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018367111908533615, |
|
"loss": 0.8813, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001835885865794824, |
|
"loss": 0.7437, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018350586466413336, |
|
"loss": 0.6893, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018342295352673463, |
|
"loss": 0.5746, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018333985335516057, |
|
"loss": 0.7338, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000183256564337714, |
|
"loss": 0.5943, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 0.5853814482688904, |
|
"eval_runtime": 29.3481, |
|
"eval_samples_per_second": 55.506, |
|
"eval_steps_per_second": 27.77, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001831730866631256, |
|
"loss": 0.631, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018308942052055354, |
|
"loss": 0.5297, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018300556609958304, |
|
"loss": 0.6393, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018292152359022595, |
|
"loss": 0.4876, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018283729318292036, |
|
"loss": 0.5634, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018275287506853017, |
|
"loss": 0.4768, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018266826943834445, |
|
"loss": 0.9219, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001825834764840774, |
|
"loss": 0.5326, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018249849639786749, |
|
"loss": 0.717, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018241332937227734, |
|
"loss": 0.7521, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001823279756002932, |
|
"loss": 0.8977, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001822424352753244, |
|
"loss": 0.7813, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000182156708591203, |
|
"loss": 0.8893, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018207079574218338, |
|
"loss": 0.6064, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018198469692294174, |
|
"loss": 0.8171, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018189841232857571, |
|
"loss": 0.6169, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018181194215460388, |
|
"loss": 0.5828, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018172528659696533, |
|
"loss": 0.5173, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001816384458520192, |
|
"loss": 0.5217, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018155142011654435, |
|
"loss": 0.6999, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.5941495299339294, |
|
"eval_runtime": 29.3194, |
|
"eval_samples_per_second": 55.561, |
|
"eval_steps_per_second": 27.797, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001814642095877387, |
|
"loss": 0.5381, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018137681446321903, |
|
"loss": 0.7032, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018128923494102028, |
|
"loss": 0.5898, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018120147121959536, |
|
"loss": 0.4766, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001811135234978145, |
|
"loss": 0.5916, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018102539197496482, |
|
"loss": 0.5091, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018093707685075004, |
|
"loss": 0.7035, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018084857832528987, |
|
"loss": 0.7507, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018075989659911956, |
|
"loss": 0.4905, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018067103187318956, |
|
"loss": 0.5706, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018058198434886492, |
|
"loss": 0.784, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018049275422792497, |
|
"loss": 0.5877, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018040334171256277, |
|
"loss": 0.6392, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018031374700538467, |
|
"loss": 0.5623, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018022397030940986, |
|
"loss": 0.6878, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018013401182806994, |
|
"loss": 0.5297, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018004387176520843, |
|
"loss": 0.4828, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017995355032508027, |
|
"loss": 0.5836, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017986304771235143, |
|
"loss": 0.5292, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001797723641320984, |
|
"loss": 0.5173, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.5887213349342346, |
|
"eval_runtime": 29.3237, |
|
"eval_samples_per_second": 55.552, |
|
"eval_steps_per_second": 27.793, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017968149978980774, |
|
"loss": 0.6018, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017959045489137566, |
|
"loss": 0.5353, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017949922964310738, |
|
"loss": 0.7697, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017940782425171693, |
|
"loss": 0.4775, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001793162389243264, |
|
"loss": 0.6834, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017922447386846578, |
|
"loss": 0.8259, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017913252929207217, |
|
"loss": 0.7376, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001790404054034895, |
|
"loss": 0.7481, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001789481024114681, |
|
"loss": 0.8861, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017885562052516398, |
|
"loss": 0.4938, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017876295995413867, |
|
"loss": 0.5681, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017867012090835854, |
|
"loss": 0.4892, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017857710359819436, |
|
"loss": 0.6888, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017848390823442086, |
|
"loss": 0.5947, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001783905350282163, |
|
"loss": 0.7089, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017829698419116177, |
|
"loss": 0.5473, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017820325593524098, |
|
"loss": 0.6345, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017810935047283966, |
|
"loss": 0.6343, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017801526801674506, |
|
"loss": 0.5587, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017792100878014552, |
|
"loss": 0.4201, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.5952056646347046, |
|
"eval_runtime": 29.408, |
|
"eval_samples_per_second": 55.393, |
|
"eval_steps_per_second": 27.714, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017782657297662992, |
|
"loss": 0.6726, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00017773196082018728, |
|
"loss": 0.5181, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001776371725252062, |
|
"loss": 0.4734, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001775422083064744, |
|
"loss": 0.5406, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00017744706837917828, |
|
"loss": 0.4941, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00017735175295890233, |
|
"loss": 0.5945, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00017725626226162874, |
|
"loss": 0.5451, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0001771605965037369, |
|
"loss": 0.6012, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00017706475590200285, |
|
"loss": 0.6645, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0001769687406735988, |
|
"loss": 0.8397, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00017687255103609266, |
|
"loss": 0.4745, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00017677618720744764, |
|
"loss": 0.9309, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001766796494060215, |
|
"loss": 0.5752, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00017658293785056638, |
|
"loss": 0.846, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00017648605276022808, |
|
"loss": 0.4726, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00017638899435454554, |
|
"loss": 0.5713, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001762917628534506, |
|
"loss": 0.5512, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00017619435847726712, |
|
"loss": 0.538, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017609678144671093, |
|
"loss": 0.5254, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017599903198288886, |
|
"loss": 0.667, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.5801939964294434, |
|
"eval_runtime": 29.3619, |
|
"eval_samples_per_second": 55.48, |
|
"eval_steps_per_second": 27.757, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017590111030729862, |
|
"loss": 0.8356, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017580301664182812, |
|
"loss": 0.6087, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017570475120875495, |
|
"loss": 0.5965, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017560631423074591, |
|
"loss": 0.5231, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017550770593085662, |
|
"loss": 0.595, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001754089265325308, |
|
"loss": 0.5015, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00017530997625959992, |
|
"loss": 0.4863, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00017521085533628265, |
|
"loss": 0.7477, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00017511156398718439, |
|
"loss": 0.5633, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001750121024372966, |
|
"loss": 0.6316, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001749124709119965, |
|
"loss": 0.4627, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001748126696370465, |
|
"loss": 0.662, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017471269883859356, |
|
"loss": 0.6426, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017461255874316885, |
|
"loss": 0.6185, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017451224957768717, |
|
"loss": 0.6106, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017441177156944635, |
|
"loss": 0.7092, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001743111249461269, |
|
"loss": 0.816, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017421030993579133, |
|
"loss": 0.8185, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017410932676688377, |
|
"loss": 0.4779, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017400817566822938, |
|
"loss": 0.8568, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.5921781063079834, |
|
"eval_runtime": 29.3482, |
|
"eval_samples_per_second": 55.506, |
|
"eval_steps_per_second": 27.77, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001739068568690338, |
|
"loss": 0.6461, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017380537059888277, |
|
"loss": 0.6208, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017370371708774141, |
|
"loss": 0.5665, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001736018965659539, |
|
"loss": 0.5903, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017349990926424273, |
|
"loss": 0.5639, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001733977554137085, |
|
"loss": 0.5138, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017329543524582898, |
|
"loss": 0.5628, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.000173192948992459, |
|
"loss": 0.3835, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017309029688582963, |
|
"loss": 0.6234, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017298747915854782, |
|
"loss": 0.86, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017288449604359576, |
|
"loss": 0.7151, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017278134777433044, |
|
"loss": 0.554, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017267803458448307, |
|
"loss": 0.583, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017257455670815854, |
|
"loss": 0.5642, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017247091437983496, |
|
"loss": 1.0091, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017236710783436306, |
|
"loss": 0.5834, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017226313730696574, |
|
"loss": 0.6862, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017215900303323736, |
|
"loss": 0.4787, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017205470524914342, |
|
"loss": 0.4951, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017195024419101987, |
|
"loss": 0.515, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.5800089836120605, |
|
"eval_runtime": 29.3282, |
|
"eval_samples_per_second": 55.544, |
|
"eval_steps_per_second": 27.789, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017184562009557271, |
|
"loss": 0.6123, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017174083319987732, |
|
"loss": 0.6898, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.000171635883741378, |
|
"loss": 0.487, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001715307719578874, |
|
"loss": 0.6113, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.000171425498087586, |
|
"loss": 0.4276, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017132006236902155, |
|
"loss": 0.5996, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017121446504110859, |
|
"loss": 0.564, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017110870634312784, |
|
"loss": 0.8961, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017100278651472562, |
|
"loss": 0.4929, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001708967057959135, |
|
"loss": 0.5149, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017079046442706748, |
|
"loss": 0.4424, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017068406264892768, |
|
"loss": 1.0212, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017057750070259765, |
|
"loss": 0.4541, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017047077882954392, |
|
"loss": 0.7563, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001703638972715954, |
|
"loss": 0.644, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017025685627094283, |
|
"loss": 0.4545, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017014965607013824, |
|
"loss": 0.61, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001700422969120944, |
|
"loss": 0.6925, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00016993477904008432, |
|
"loss": 0.4985, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00016982710269774058, |
|
"loss": 0.504, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 0.5893893241882324, |
|
"eval_runtime": 29.3252, |
|
"eval_samples_per_second": 55.55, |
|
"eval_steps_per_second": 27.792, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00016971926812905488, |
|
"loss": 0.3721, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00016961127557837751, |
|
"loss": 0.5396, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00016950312529041663, |
|
"loss": 0.4731, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001693948175102379, |
|
"loss": 0.5411, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001692863524832639, |
|
"loss": 0.6364, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00016917773045527343, |
|
"loss": 0.5857, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00016906895167240112, |
|
"loss": 0.7359, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001689600163811367, |
|
"loss": 0.5994, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001688509248283247, |
|
"loss": 0.5892, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00016874167726116363, |
|
"loss": 0.7209, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00016863227392720554, |
|
"loss": 0.5054, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00016852271507435544, |
|
"loss": 0.5956, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00016841300095087077, |
|
"loss": 0.5568, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00016830313180536078, |
|
"loss": 0.6406, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00016819310788678603, |
|
"loss": 0.6066, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016808292944445774, |
|
"loss": 0.4708, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001679725967280373, |
|
"loss": 0.6026, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016786210998753575, |
|
"loss": 0.8752, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016775146947331298, |
|
"loss": 0.7405, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016764067543607753, |
|
"loss": 0.6361, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.5982840657234192, |
|
"eval_runtime": 29.3385, |
|
"eval_samples_per_second": 55.524, |
|
"eval_steps_per_second": 27.779, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016752972812688564, |
|
"loss": 0.6528, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016741862779714098, |
|
"loss": 0.5128, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00016730737469859388, |
|
"loss": 0.723, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00016719596908334092, |
|
"loss": 0.5223, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0001670844112038242, |
|
"loss": 0.5353, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0001669727013128309, |
|
"loss": 0.7645, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00016686083966349266, |
|
"loss": 0.6167, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00016674882650928493, |
|
"loss": 0.3836, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00016663666210402656, |
|
"loss": 0.6045, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00016652434670187907, |
|
"loss": 0.5973, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001664118805573461, |
|
"loss": 0.5661, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.000166299263925273, |
|
"loss": 0.4528, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00016618649706084596, |
|
"loss": 0.6458, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00016607358021959173, |
|
"loss": 0.412, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001659605136573768, |
|
"loss": 0.5031, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00016584729763040697, |
|
"loss": 0.4902, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016573393239522678, |
|
"loss": 0.8064, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016562041820871874, |
|
"loss": 0.5497, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.000165506755328103, |
|
"loss": 0.6602, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016539294401093658, |
|
"loss": 0.4896, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.5770359039306641, |
|
"eval_runtime": 29.3487, |
|
"eval_samples_per_second": 55.505, |
|
"eval_steps_per_second": 27.77, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016527898451511287, |
|
"loss": 0.3522, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016516487709886105, |
|
"loss": 0.6396, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0001650506220207454, |
|
"loss": 0.8431, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016493621953966495, |
|
"loss": 0.5146, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016482166991485265, |
|
"loss": 0.5748, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016470697340587476, |
|
"loss": 0.5961, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016459213027263063, |
|
"loss": 0.5165, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016447714077535167, |
|
"loss": 0.8974, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.000164362005174601, |
|
"loss": 0.839, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016424672373127277, |
|
"loss": 0.5579, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00016413129670659167, |
|
"loss": 0.832, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00016401572436211222, |
|
"loss": 0.7778, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001639000069597183, |
|
"loss": 0.4416, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001637841447616224, |
|
"loss": 0.4954, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001636681380303652, |
|
"loss": 0.487, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00016355198702881478, |
|
"loss": 0.6773, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001634356920201662, |
|
"loss": 0.5598, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016331925326794087, |
|
"loss": 0.4758, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016320267103598585, |
|
"loss": 0.5392, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016308594558847337, |
|
"loss": 0.6044, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 0.5716915726661682, |
|
"eval_runtime": 29.3528, |
|
"eval_samples_per_second": 55.497, |
|
"eval_steps_per_second": 27.766, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016296907718990015, |
|
"loss": 0.543, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016285206610508685, |
|
"loss": 0.677, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016273491259917745, |
|
"loss": 0.8737, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001626176169376387, |
|
"loss": 0.6001, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001625001793862593, |
|
"loss": 0.6186, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001623826002111497, |
|
"loss": 0.6821, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00016226487967874116, |
|
"loss": 0.4421, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00016214701805578518, |
|
"loss": 0.5087, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001620290156093531, |
|
"loss": 0.465, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00016191087260683523, |
|
"loss": 0.5147, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00016179258931594051, |
|
"loss": 0.6099, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001616741660046957, |
|
"loss": 0.6062, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016155560294144479, |
|
"loss": 0.5735, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016143690039484857, |
|
"loss": 0.6008, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016131805863388378, |
|
"loss": 0.6302, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016119907792784267, |
|
"loss": 0.5624, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016107995854633235, |
|
"loss": 0.5803, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016096070075927415, |
|
"loss": 0.5977, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016084130483690295, |
|
"loss": 0.4958, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001607217710497668, |
|
"loss": 0.4925, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 0.5715158581733704, |
|
"eval_runtime": 29.3023, |
|
"eval_samples_per_second": 55.593, |
|
"eval_steps_per_second": 27.813, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.000160602099668726, |
|
"loss": 0.641, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016048229096495272, |
|
"loss": 0.5944, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016036234520993024, |
|
"loss": 0.655, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001602422626754524, |
|
"loss": 0.3301, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001601220436336231, |
|
"loss": 0.6563, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016000168835685535, |
|
"loss": 0.6173, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00015988119711787105, |
|
"loss": 0.7029, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0001597605701897001, |
|
"loss": 0.7588, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00015963980784567986, |
|
"loss": 0.527, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00015951891035945464, |
|
"loss": 0.6378, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0001593978780049748, |
|
"loss": 0.5056, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00015927671105649648, |
|
"loss": 0.6517, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015915540978858066, |
|
"loss": 1.0716, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015903397447609288, |
|
"loss": 0.5948, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001589124053942022, |
|
"loss": 0.4883, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001587907028183809, |
|
"loss": 0.5172, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015866886702440384, |
|
"loss": 0.4618, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015854689828834757, |
|
"loss": 0.7591, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015842479688659003, |
|
"loss": 0.435, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00015830256309580968, |
|
"loss": 0.4704, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 0.5707182288169861, |
|
"eval_runtime": 29.3397, |
|
"eval_samples_per_second": 55.522, |
|
"eval_steps_per_second": 27.778, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00015818019719298504, |
|
"loss": 0.6049, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00015805769945539394, |
|
"loss": 0.5501, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00015793507016061305, |
|
"loss": 0.7212, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00015781230958651694, |
|
"loss": 0.6531, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00015768941801127783, |
|
"loss": 0.9526, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00015756639571336476, |
|
"loss": 0.5878, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015744324297154293, |
|
"loss": 0.46, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015731996006487317, |
|
"loss": 0.4893, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015719654727271122, |
|
"loss": 0.5149, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015707300487470717, |
|
"loss": 0.6435, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015694933315080477, |
|
"loss": 0.5271, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015682553238124082, |
|
"loss": 0.4587, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015670160284654458, |
|
"loss": 0.4208, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00015657754482753704, |
|
"loss": 0.5887, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001564533586053303, |
|
"loss": 0.6264, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00015632904446132706, |
|
"loss": 0.6527, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00015620460267721983, |
|
"loss": 0.6423, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00015608003353499033, |
|
"loss": 0.5313, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001559553373169089, |
|
"loss": 0.58, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00015583051430553385, |
|
"loss": 0.5342, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 0.5747966766357422, |
|
"eval_runtime": 29.3347, |
|
"eval_samples_per_second": 55.531, |
|
"eval_steps_per_second": 27.783, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00015570556478371075, |
|
"loss": 0.6231, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001555804890345719, |
|
"loss": 0.3901, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00015545528734153553, |
|
"loss": 0.5303, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001553299599883054, |
|
"loss": 0.5951, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00015520450725886988, |
|
"loss": 0.4837, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00015507892943750147, |
|
"loss": 0.6, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001549532268087562, |
|
"loss": 0.5726, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015482739965747282, |
|
"loss": 0.5652, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001547014482687723, |
|
"loss": 0.8029, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001545753729280571, |
|
"loss": 0.6563, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015444917392101054, |
|
"loss": 0.5851, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001543228515335962, |
|
"loss": 0.4526, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015419640605205727, |
|
"loss": 0.6139, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001540698377629157, |
|
"loss": 0.5471, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.000153943146952972, |
|
"loss": 0.5244, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00015381633390930402, |
|
"loss": 0.6256, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001536893989192668, |
|
"loss": 0.4401, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00015356234227049154, |
|
"loss": 0.5637, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00015343516425088524, |
|
"loss": 0.5212, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001533078651486299, |
|
"loss": 0.755, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.5672922730445862, |
|
"eval_runtime": 29.3376, |
|
"eval_samples_per_second": 55.526, |
|
"eval_steps_per_second": 27.78, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001531804452521818, |
|
"loss": 0.6678, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015305290485027114, |
|
"loss": 0.413, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015292524423190094, |
|
"loss": 0.7693, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015279746368634673, |
|
"loss": 0.7246, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015266956350315586, |
|
"loss": 0.5489, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001525415439721467, |
|
"loss": 0.6732, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015241340538340808, |
|
"loss": 0.4387, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001522851480272986, |
|
"loss": 0.7792, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00015215677219444594, |
|
"loss": 0.4398, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001520282781757464, |
|
"loss": 0.5612, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00015189966626236385, |
|
"loss": 0.7103, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001517709367457295, |
|
"loss": 0.6044, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.000151642089917541, |
|
"loss": 0.6622, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001515131260697618, |
|
"loss": 0.4819, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00015138404549462053, |
|
"loss": 0.5547, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015125484848461026, |
|
"loss": 0.6227, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.000151125535332488, |
|
"loss": 0.4646, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015099610633127387, |
|
"loss": 0.5327, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015086656177425048, |
|
"loss": 0.5109, |
|
"step": 699 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2097, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 9.190285595765637e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|