{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00909090909090909, "grad_norm": 4.071784019470215, "learning_rate": 0.00025, "loss": 3.1182, "step": 1 }, { "epoch": 0.01818181818181818, "grad_norm": 4.663088321685791, "learning_rate": 0.0005, "loss": 3.4235, "step": 2 }, { "epoch": 0.02727272727272727, "grad_norm": 3.732215404510498, "learning_rate": 0.0004984756097560976, "loss": 3.1436, "step": 3 }, { "epoch": 0.03636363636363636, "grad_norm": 3.961653232574463, "learning_rate": 0.0004969512195121951, "loss": 3.167, "step": 4 }, { "epoch": 0.045454545454545456, "grad_norm": 3.813910484313965, "learning_rate": 0.0004954268292682927, "loss": 3.2398, "step": 5 }, { "epoch": 0.05454545454545454, "grad_norm": 3.4221489429473877, "learning_rate": 0.0004939024390243902, "loss": 3.0624, "step": 6 }, { "epoch": 0.06363636363636363, "grad_norm": 2.992903232574463, "learning_rate": 0.0004923780487804878, "loss": 2.2161, "step": 7 }, { "epoch": 0.07272727272727272, "grad_norm": 4.314737319946289, "learning_rate": 0.0004908536585365854, "loss": 3.2214, "step": 8 }, { "epoch": 0.08181818181818182, "grad_norm": 3.5429513454437256, "learning_rate": 0.0004893292682926829, "loss": 2.6857, "step": 9 }, { "epoch": 0.09090909090909091, "grad_norm": NaN, "learning_rate": 0.0004893292682926829, "loss": 2.4876, "step": 10 }, { "epoch": 0.1, "grad_norm": 3.5051212310791016, "learning_rate": 0.0004878048780487805, "loss": 2.6111, "step": 11 }, { "epoch": 0.10909090909090909, "grad_norm": 4.561686038970947, "learning_rate": 0.00048628048780487805, "loss": 2.8702, "step": 12 }, { "epoch": 0.11818181818181818, "grad_norm": 3.360241413116455, "learning_rate": 0.0004847560975609756, "loss": 2.3469, "step": 13 }, { "epoch": 0.12727272727272726, "grad_norm": 4.544058799743652, "learning_rate": 0.00048323170731707317, "loss": 2.4873, "step": 14 }, { "epoch": 0.13636363636363635, "grad_norm": 3.3817787170410156, "learning_rate": 0.0004817073170731707, "loss": 2.5345, "step": 15 }, { "epoch": 0.14545454545454545, "grad_norm": 3.405874013900757, "learning_rate": 0.0004801829268292683, "loss": 2.1892, "step": 16 }, { "epoch": 0.15454545454545454, "grad_norm": 3.5852108001708984, "learning_rate": 0.00047865853658536585, "loss": 2.1381, "step": 17 }, { "epoch": 0.16363636363636364, "grad_norm": 3.0975358486175537, "learning_rate": 0.0004771341463414634, "loss": 2.3727, "step": 18 }, { "epoch": 0.17272727272727273, "grad_norm": 3.514904260635376, "learning_rate": 0.00047560975609756096, "loss": 2.4526, "step": 19 }, { "epoch": 0.18181818181818182, "grad_norm": 4.176087379455566, "learning_rate": 0.0004740853658536585, "loss": 2.2999, "step": 20 }, { "epoch": 0.19090909090909092, "grad_norm": 2.7949447631835938, "learning_rate": 0.0004725609756097561, "loss": 2.2605, "step": 21 }, { "epoch": 0.2, "grad_norm": 3.023271322250366, "learning_rate": 0.00047103658536585364, "loss": 2.2398, "step": 22 }, { "epoch": 0.20909090909090908, "grad_norm": 3.608038902282715, "learning_rate": 0.0004695121951219512, "loss": 2.2346, "step": 23 }, { "epoch": 0.21818181818181817, "grad_norm": 2.7044689655303955, "learning_rate": 0.0004679878048780488, "loss": 1.8128, "step": 24 }, { "epoch": 0.22727272727272727, "grad_norm": 2.9385201930999756, "learning_rate": 0.0004664634146341464, "loss": 1.9915, "step": 25 }, { "epoch": 0.23636363636363636, "grad_norm": 2.5581564903259277, "learning_rate": 0.00046493902439024394, "loss": 2.0105, "step": 26 }, { "epoch": 0.24545454545454545, "grad_norm": 3.774770498275757, "learning_rate": 0.0004634146341463415, "loss": 2.4997, "step": 27 }, { "epoch": 0.2545454545454545, "grad_norm": 3.650287628173828, "learning_rate": 0.00046189024390243906, "loss": 2.6754, "step": 28 }, { "epoch": 0.2636363636363636, "grad_norm": 3.0995235443115234, "learning_rate": 0.0004603658536585366, "loss": 2.5039, "step": 29 }, { "epoch": 0.2727272727272727, "grad_norm": 2.6960413455963135, "learning_rate": 0.0004588414634146342, "loss": 2.2671, "step": 30 }, { "epoch": 0.2818181818181818, "grad_norm": 2.8800947666168213, "learning_rate": 0.00045731707317073173, "loss": 1.9923, "step": 31 }, { "epoch": 0.2909090909090909, "grad_norm": 2.768280506134033, "learning_rate": 0.0004557926829268293, "loss": 2.323, "step": 32 }, { "epoch": 0.3, "grad_norm": 3.0355064868927, "learning_rate": 0.00045426829268292685, "loss": 2.1258, "step": 33 }, { "epoch": 0.3090909090909091, "grad_norm": 2.860558032989502, "learning_rate": 0.0004527439024390244, "loss": 2.2574, "step": 34 }, { "epoch": 0.3181818181818182, "grad_norm": 3.0588767528533936, "learning_rate": 0.00045121951219512197, "loss": 2.2413, "step": 35 }, { "epoch": 0.32727272727272727, "grad_norm": 3.065910577774048, "learning_rate": 0.00044969512195121953, "loss": 1.899, "step": 36 }, { "epoch": 0.33636363636363636, "grad_norm": 2.7733941078186035, "learning_rate": 0.0004481707317073171, "loss": 2.1158, "step": 37 }, { "epoch": 0.34545454545454546, "grad_norm": 2.811892509460449, "learning_rate": 0.00044664634146341465, "loss": 2.48, "step": 38 }, { "epoch": 0.35454545454545455, "grad_norm": 2.3305535316467285, "learning_rate": 0.0004451219512195122, "loss": 2.0319, "step": 39 }, { "epoch": 0.36363636363636365, "grad_norm": 2.5622308254241943, "learning_rate": 0.00044359756097560977, "loss": 1.9229, "step": 40 }, { "epoch": 0.37272727272727274, "grad_norm": 3.1169259548187256, "learning_rate": 0.00044207317073170733, "loss": 2.3218, "step": 41 }, { "epoch": 0.38181818181818183, "grad_norm": 2.607619285583496, "learning_rate": 0.0004405487804878049, "loss": 2.5146, "step": 42 }, { "epoch": 0.39090909090909093, "grad_norm": 3.1160788536071777, "learning_rate": 0.00043902439024390245, "loss": 2.234, "step": 43 }, { "epoch": 0.4, "grad_norm": 3.0238115787506104, "learning_rate": 0.0004375, "loss": 1.6721, "step": 44 }, { "epoch": 0.4090909090909091, "grad_norm": 2.4802684783935547, "learning_rate": 0.00043597560975609757, "loss": 1.7682, "step": 45 }, { "epoch": 0.41818181818181815, "grad_norm": 2.3478028774261475, "learning_rate": 0.00043445121951219513, "loss": 1.7369, "step": 46 }, { "epoch": 0.42727272727272725, "grad_norm": 3.590702772140503, "learning_rate": 0.0004329268292682927, "loss": 2.2643, "step": 47 }, { "epoch": 0.43636363636363634, "grad_norm": 2.9007341861724854, "learning_rate": 0.00043140243902439025, "loss": 2.0415, "step": 48 }, { "epoch": 0.44545454545454544, "grad_norm": 2.371359348297119, "learning_rate": 0.0004298780487804878, "loss": 1.7783, "step": 49 }, { "epoch": 0.45454545454545453, "grad_norm": 2.953730344772339, "learning_rate": 0.00042835365853658537, "loss": 2.3273, "step": 50 }, { "epoch": 0.4636363636363636, "grad_norm": 3.101320266723633, "learning_rate": 0.0004268292682926829, "loss": 2.2416, "step": 51 }, { "epoch": 0.4727272727272727, "grad_norm": 2.7180256843566895, "learning_rate": 0.0004253048780487805, "loss": 2.3769, "step": 52 }, { "epoch": 0.4818181818181818, "grad_norm": 3.001201868057251, "learning_rate": 0.00042378048780487805, "loss": 2.189, "step": 53 }, { "epoch": 0.4909090909090909, "grad_norm": 3.2738707065582275, "learning_rate": 0.0004222560975609756, "loss": 2.2839, "step": 54 }, { "epoch": 0.5, "grad_norm": 2.4475655555725098, "learning_rate": 0.00042073170731707316, "loss": 1.8438, "step": 55 }, { "epoch": 0.509090909090909, "grad_norm": 2.2483181953430176, "learning_rate": 0.0004192073170731707, "loss": 1.6211, "step": 56 }, { "epoch": 0.5181818181818182, "grad_norm": 3.2838337421417236, "learning_rate": 0.0004176829268292683, "loss": 2.255, "step": 57 }, { "epoch": 0.5272727272727272, "grad_norm": 2.5919408798217773, "learning_rate": 0.00041615853658536584, "loss": 2.0132, "step": 58 }, { "epoch": 0.5363636363636364, "grad_norm": 2.7850112915039062, "learning_rate": 0.0004146341463414634, "loss": 2.1505, "step": 59 }, { "epoch": 0.5454545454545454, "grad_norm": 2.3396100997924805, "learning_rate": 0.00041310975609756096, "loss": 1.8747, "step": 60 }, { "epoch": 0.5545454545454546, "grad_norm": 2.788200616836548, "learning_rate": 0.0004115853658536585, "loss": 2.305, "step": 61 }, { "epoch": 0.5636363636363636, "grad_norm": 2.750195264816284, "learning_rate": 0.0004100609756097561, "loss": 1.8235, "step": 62 }, { "epoch": 0.5727272727272728, "grad_norm": 3.041684865951538, "learning_rate": 0.00040853658536585364, "loss": 2.1002, "step": 63 }, { "epoch": 0.5818181818181818, "grad_norm": 3.0440473556518555, "learning_rate": 0.0004070121951219512, "loss": 1.7584, "step": 64 }, { "epoch": 0.5909090909090909, "grad_norm": 3.090240955352783, "learning_rate": 0.0004054878048780488, "loss": 1.8738, "step": 65 }, { "epoch": 0.6, "grad_norm": 2.376418352127075, "learning_rate": 0.0004039634146341464, "loss": 1.8655, "step": 66 }, { "epoch": 0.6090909090909091, "grad_norm": 2.83278489112854, "learning_rate": 0.00040243902439024393, "loss": 2.6537, "step": 67 }, { "epoch": 0.6181818181818182, "grad_norm": 3.2327969074249268, "learning_rate": 0.0004009146341463415, "loss": 1.9686, "step": 68 }, { "epoch": 0.6272727272727273, "grad_norm": 2.8755669593811035, "learning_rate": 0.00039939024390243905, "loss": 2.1451, "step": 69 }, { "epoch": 0.6363636363636364, "grad_norm": 2.6152892112731934, "learning_rate": 0.0003978658536585366, "loss": 1.5981, "step": 70 }, { "epoch": 0.6454545454545455, "grad_norm": 3.076869010925293, "learning_rate": 0.0003963414634146342, "loss": 1.9654, "step": 71 }, { "epoch": 0.6545454545454545, "grad_norm": 2.6173629760742188, "learning_rate": 0.00039481707317073173, "loss": 2.0392, "step": 72 }, { "epoch": 0.6636363636363637, "grad_norm": 3.449314594268799, "learning_rate": 0.0003932926829268293, "loss": 2.0893, "step": 73 }, { "epoch": 0.6727272727272727, "grad_norm": 2.5918149948120117, "learning_rate": 0.00039176829268292685, "loss": 1.969, "step": 74 }, { "epoch": 0.6818181818181818, "grad_norm": 2.780331611633301, "learning_rate": 0.0003902439024390244, "loss": 2.0783, "step": 75 }, { "epoch": 0.6909090909090909, "grad_norm": 2.8215320110321045, "learning_rate": 0.00038871951219512197, "loss": 1.9011, "step": 76 }, { "epoch": 0.7, "grad_norm": 2.9159982204437256, "learning_rate": 0.00038719512195121953, "loss": 1.9432, "step": 77 }, { "epoch": 0.7090909090909091, "grad_norm": 2.684636116027832, "learning_rate": 0.0003856707317073171, "loss": 1.9473, "step": 78 }, { "epoch": 0.7181818181818181, "grad_norm": 2.554502248764038, "learning_rate": 0.00038414634146341465, "loss": 1.9211, "step": 79 }, { "epoch": 0.7272727272727273, "grad_norm": 2.93910551071167, "learning_rate": 0.0003826219512195122, "loss": 1.7052, "step": 80 }, { "epoch": 0.7363636363636363, "grad_norm": 3.246049165725708, "learning_rate": 0.00038109756097560977, "loss": 1.9143, "step": 81 }, { "epoch": 0.7454545454545455, "grad_norm": 2.407006025314331, "learning_rate": 0.00037957317073170733, "loss": 1.8885, "step": 82 }, { "epoch": 0.7545454545454545, "grad_norm": 3.0212433338165283, "learning_rate": 0.0003780487804878049, "loss": 2.2017, "step": 83 }, { "epoch": 0.7636363636363637, "grad_norm": 2.7870118618011475, "learning_rate": 0.00037652439024390245, "loss": 1.8038, "step": 84 }, { "epoch": 0.7727272727272727, "grad_norm": 2.3417677879333496, "learning_rate": 0.000375, "loss": 1.6184, "step": 85 }, { "epoch": 0.7818181818181819, "grad_norm": 3.359086036682129, "learning_rate": 0.00037347560975609757, "loss": 2.3235, "step": 86 }, { "epoch": 0.7909090909090909, "grad_norm": 2.854968786239624, "learning_rate": 0.0003719512195121951, "loss": 1.9161, "step": 87 }, { "epoch": 0.8, "grad_norm": 3.0338873863220215, "learning_rate": 0.0003704268292682927, "loss": 2.5778, "step": 88 }, { "epoch": 0.8090909090909091, "grad_norm": 2.7215914726257324, "learning_rate": 0.00036890243902439025, "loss": 2.2668, "step": 89 }, { "epoch": 0.8181818181818182, "grad_norm": 3.0825934410095215, "learning_rate": 0.0003673780487804878, "loss": 2.0312, "step": 90 }, { "epoch": 0.8272727272727273, "grad_norm": 4.025455951690674, "learning_rate": 0.00036585365853658537, "loss": 2.0015, "step": 91 }, { "epoch": 0.8363636363636363, "grad_norm": 2.394049882888794, "learning_rate": 0.0003643292682926829, "loss": 1.7331, "step": 92 }, { "epoch": 0.8454545454545455, "grad_norm": 3.181750535964966, "learning_rate": 0.0003628048780487805, "loss": 1.8799, "step": 93 }, { "epoch": 0.8545454545454545, "grad_norm": 3.0317776203155518, "learning_rate": 0.00036128048780487804, "loss": 1.7447, "step": 94 }, { "epoch": 0.8636363636363636, "grad_norm": 2.537506341934204, "learning_rate": 0.0003597560975609756, "loss": 1.5733, "step": 95 }, { "epoch": 0.8727272727272727, "grad_norm": 2.7143495082855225, "learning_rate": 0.00035823170731707316, "loss": 1.7666, "step": 96 }, { "epoch": 0.8818181818181818, "grad_norm": 2.9140336513519287, "learning_rate": 0.0003567073170731707, "loss": 1.5887, "step": 97 }, { "epoch": 0.8909090909090909, "grad_norm": 2.7197532653808594, "learning_rate": 0.0003551829268292683, "loss": 2.0022, "step": 98 }, { "epoch": 0.9, "grad_norm": 3.1423003673553467, "learning_rate": 0.00035365853658536584, "loss": 1.9328, "step": 99 }, { "epoch": 0.9090909090909091, "grad_norm": 2.541865110397339, "learning_rate": 0.0003521341463414634, "loss": 1.7566, "step": 100 }, { "epoch": 0.9181818181818182, "grad_norm": 2.9177896976470947, "learning_rate": 0.00035060975609756096, "loss": 1.7848, "step": 101 }, { "epoch": 0.9272727272727272, "grad_norm": 2.671201467514038, "learning_rate": 0.0003490853658536585, "loss": 2.0213, "step": 102 }, { "epoch": 0.9363636363636364, "grad_norm": 2.950990676879883, "learning_rate": 0.0003475609756097561, "loss": 1.8383, "step": 103 }, { "epoch": 0.9454545454545454, "grad_norm": 2.508028984069824, "learning_rate": 0.00034603658536585364, "loss": 1.8774, "step": 104 }, { "epoch": 0.9545454545454546, "grad_norm": 2.4655253887176514, "learning_rate": 0.0003445121951219512, "loss": 1.7972, "step": 105 }, { "epoch": 0.9636363636363636, "grad_norm": 3.085402727127075, "learning_rate": 0.0003429878048780488, "loss": 1.8712, "step": 106 }, { "epoch": 0.9727272727272728, "grad_norm": 2.305389642715454, "learning_rate": 0.0003414634146341464, "loss": 1.48, "step": 107 }, { "epoch": 0.9818181818181818, "grad_norm": 2.7900662422180176, "learning_rate": 0.00033993902439024393, "loss": 1.6433, "step": 108 }, { "epoch": 0.990909090909091, "grad_norm": 2.7554140090942383, "learning_rate": 0.0003384146341463415, "loss": 2.2131, "step": 109 }, { "epoch": 1.0, "grad_norm": 2.8607048988342285, "learning_rate": 0.00033689024390243905, "loss": 2.062, "step": 110 }, { "epoch": 1.0, "eval_f1": 0.8911, "eval_gen_len": 49.7273, "eval_loss": 1.8471709489822388, "eval_precision": 0.8897, "eval_recall": 0.8927, "eval_rouge1": 0.4633, "eval_rouge2": 0.2177, "eval_rougeL": 0.3919, "eval_rougeLsum": 0.428, "eval_runtime": 46.8612, "eval_samples_per_second": 2.347, "eval_steps_per_second": 0.299, "step": 110 }, { "epoch": 1.009090909090909, "grad_norm": 2.5493690967559814, "learning_rate": 0.0003353658536585366, "loss": 1.9424, "step": 111 }, { "epoch": 1.018181818181818, "grad_norm": 2.030336618423462, "learning_rate": 0.00033384146341463417, "loss": 1.4907, "step": 112 }, { "epoch": 1.0272727272727273, "grad_norm": 2.3484795093536377, "learning_rate": 0.00033231707317073173, "loss": 1.8514, "step": 113 }, { "epoch": 1.0363636363636364, "grad_norm": 3.1272692680358887, "learning_rate": 0.0003307926829268293, "loss": 2.2966, "step": 114 }, { "epoch": 1.0454545454545454, "grad_norm": 2.4133095741271973, "learning_rate": 0.00032926829268292685, "loss": 1.5813, "step": 115 }, { "epoch": 1.0545454545454545, "grad_norm": 2.597964286804199, "learning_rate": 0.0003277439024390244, "loss": 1.9743, "step": 116 }, { "epoch": 1.0636363636363637, "grad_norm": 2.755969524383545, "learning_rate": 0.00032621951219512197, "loss": 1.4676, "step": 117 }, { "epoch": 1.0727272727272728, "grad_norm": 2.3715810775756836, "learning_rate": 0.00032469512195121953, "loss": 1.9406, "step": 118 }, { "epoch": 1.0818181818181818, "grad_norm": 2.3229820728302, "learning_rate": 0.0003231707317073171, "loss": 1.9787, "step": 119 }, { "epoch": 1.0909090909090908, "grad_norm": 2.724597215652466, "learning_rate": 0.00032164634146341465, "loss": 2.1574, "step": 120 }, { "epoch": 1.1, "grad_norm": 3.342278242111206, "learning_rate": 0.0003201219512195122, "loss": 1.8255, "step": 121 }, { "epoch": 1.1090909090909091, "grad_norm": 2.4172379970550537, "learning_rate": 0.00031859756097560977, "loss": 1.7576, "step": 122 }, { "epoch": 1.1181818181818182, "grad_norm": 3.158111095428467, "learning_rate": 0.00031707317073170733, "loss": 1.9855, "step": 123 }, { "epoch": 1.1272727272727272, "grad_norm": 2.9254257678985596, "learning_rate": 0.0003155487804878049, "loss": 2.0363, "step": 124 }, { "epoch": 1.1363636363636362, "grad_norm": 2.5448672771453857, "learning_rate": 0.00031402439024390245, "loss": 1.8373, "step": 125 }, { "epoch": 1.1454545454545455, "grad_norm": 2.693936347961426, "learning_rate": 0.0003125, "loss": 1.8654, "step": 126 }, { "epoch": 1.1545454545454545, "grad_norm": 2.5871312618255615, "learning_rate": 0.00031097560975609757, "loss": 1.7372, "step": 127 }, { "epoch": 1.1636363636363636, "grad_norm": 2.709867000579834, "learning_rate": 0.0003094512195121951, "loss": 1.8044, "step": 128 }, { "epoch": 1.1727272727272728, "grad_norm": 2.741077423095703, "learning_rate": 0.0003079268292682927, "loss": 1.9361, "step": 129 }, { "epoch": 1.1818181818181819, "grad_norm": 2.9570066928863525, "learning_rate": 0.00030640243902439024, "loss": 2.0666, "step": 130 }, { "epoch": 1.190909090909091, "grad_norm": 2.356640100479126, "learning_rate": 0.0003048780487804878, "loss": 1.7525, "step": 131 }, { "epoch": 1.2, "grad_norm": 2.7299695014953613, "learning_rate": 0.00030335365853658536, "loss": 1.8064, "step": 132 }, { "epoch": 1.209090909090909, "grad_norm": 2.3065450191497803, "learning_rate": 0.0003018292682926829, "loss": 1.652, "step": 133 }, { "epoch": 1.2181818181818183, "grad_norm": 2.8539586067199707, "learning_rate": 0.0003003048780487805, "loss": 1.8285, "step": 134 }, { "epoch": 1.2272727272727273, "grad_norm": 2.843475580215454, "learning_rate": 0.00029878048780487804, "loss": 1.9938, "step": 135 }, { "epoch": 1.2363636363636363, "grad_norm": 2.547865390777588, "learning_rate": 0.0002972560975609756, "loss": 1.9472, "step": 136 }, { "epoch": 1.2454545454545454, "grad_norm": 2.1262078285217285, "learning_rate": 0.00029573170731707316, "loss": 1.3486, "step": 137 }, { "epoch": 1.2545454545454544, "grad_norm": 2.93005633354187, "learning_rate": 0.0002942073170731707, "loss": 1.5797, "step": 138 }, { "epoch": 1.2636363636363637, "grad_norm": 2.5727319717407227, "learning_rate": 0.0002926829268292683, "loss": 1.8976, "step": 139 }, { "epoch": 1.2727272727272727, "grad_norm": 3.29372501373291, "learning_rate": 0.00029115853658536584, "loss": 1.9751, "step": 140 }, { "epoch": 1.2818181818181817, "grad_norm": NaN, "learning_rate": 0.00029115853658536584, "loss": 1.9404, "step": 141 }, { "epoch": 1.290909090909091, "grad_norm": 2.868084669113159, "learning_rate": 0.0002896341463414634, "loss": 1.5832, "step": 142 }, { "epoch": 1.3, "grad_norm": 2.792365312576294, "learning_rate": 0.00028810975609756096, "loss": 2.0512, "step": 143 }, { "epoch": 1.309090909090909, "grad_norm": 2.333376169204712, "learning_rate": 0.0002865853658536585, "loss": 1.5019, "step": 144 }, { "epoch": 1.3181818181818181, "grad_norm": 3.5344386100769043, "learning_rate": 0.0002850609756097561, "loss": 2.0819, "step": 145 }, { "epoch": 1.3272727272727272, "grad_norm": 2.7761566638946533, "learning_rate": 0.00028353658536585364, "loss": 1.8773, "step": 146 }, { "epoch": 1.3363636363636364, "grad_norm": 2.796036720275879, "learning_rate": 0.0002820121951219512, "loss": 1.8373, "step": 147 }, { "epoch": 1.3454545454545455, "grad_norm": 3.2006378173828125, "learning_rate": 0.0002804878048780488, "loss": 2.0195, "step": 148 }, { "epoch": 1.3545454545454545, "grad_norm": 3.6497931480407715, "learning_rate": 0.00027896341463414637, "loss": 1.7754, "step": 149 }, { "epoch": 1.3636363636363638, "grad_norm": 3.0451290607452393, "learning_rate": 0.00027743902439024393, "loss": 1.6708, "step": 150 }, { "epoch": 1.3727272727272728, "grad_norm": 3.2684431076049805, "learning_rate": 0.0002759146341463415, "loss": 2.0718, "step": 151 }, { "epoch": 1.3818181818181818, "grad_norm": 2.6152052879333496, "learning_rate": 0.00027439024390243905, "loss": 1.87, "step": 152 }, { "epoch": 1.3909090909090909, "grad_norm": 2.713304281234741, "learning_rate": 0.0002728658536585366, "loss": 1.707, "step": 153 }, { "epoch": 1.4, "grad_norm": 2.6585283279418945, "learning_rate": 0.00027134146341463417, "loss": 1.8551, "step": 154 }, { "epoch": 1.4090909090909092, "grad_norm": 2.903383255004883, "learning_rate": 0.00026981707317073173, "loss": 1.9101, "step": 155 }, { "epoch": 1.4181818181818182, "grad_norm": 2.6489484310150146, "learning_rate": 0.0002682926829268293, "loss": 1.8333, "step": 156 }, { "epoch": 1.4272727272727272, "grad_norm": 3.004567861557007, "learning_rate": 0.00026676829268292685, "loss": 1.4886, "step": 157 }, { "epoch": 1.4363636363636363, "grad_norm": 2.774327278137207, "learning_rate": 0.0002652439024390244, "loss": 1.8893, "step": 158 }, { "epoch": 1.4454545454545453, "grad_norm": 2.8652050495147705, "learning_rate": 0.00026371951219512197, "loss": 1.8702, "step": 159 }, { "epoch": 1.4545454545454546, "grad_norm": 2.2331173419952393, "learning_rate": 0.00026219512195121953, "loss": 1.9589, "step": 160 }, { "epoch": 1.4636363636363636, "grad_norm": 2.5273420810699463, "learning_rate": 0.0002606707317073171, "loss": 1.7515, "step": 161 }, { "epoch": 1.4727272727272727, "grad_norm": 3.0862534046173096, "learning_rate": 0.00025914634146341465, "loss": 1.4377, "step": 162 }, { "epoch": 1.481818181818182, "grad_norm": 3.167388677597046, "learning_rate": 0.0002576219512195122, "loss": 2.2095, "step": 163 }, { "epoch": 1.490909090909091, "grad_norm": 2.9668028354644775, "learning_rate": 0.00025609756097560977, "loss": 1.7428, "step": 164 }, { "epoch": 1.5, "grad_norm": 2.4375245571136475, "learning_rate": 0.0002545731707317073, "loss": 1.392, "step": 165 }, { "epoch": 1.509090909090909, "grad_norm": 2.71293568611145, "learning_rate": 0.0002530487804878049, "loss": 1.9706, "step": 166 }, { "epoch": 1.518181818181818, "grad_norm": 2.6570401191711426, "learning_rate": 0.00025152439024390245, "loss": 1.8777, "step": 167 }, { "epoch": 1.5272727272727273, "grad_norm": 2.379110813140869, "learning_rate": 0.00025, "loss": 1.4582, "step": 168 }, { "epoch": 1.5363636363636364, "grad_norm": 3.0288100242614746, "learning_rate": 0.00024847560975609756, "loss": 1.9216, "step": 169 }, { "epoch": 1.5454545454545454, "grad_norm": 2.574794054031372, "learning_rate": 0.0002469512195121951, "loss": 1.5392, "step": 170 }, { "epoch": 1.5545454545454547, "grad_norm": 2.8772997856140137, "learning_rate": 0.0002454268292682927, "loss": 1.8662, "step": 171 }, { "epoch": 1.5636363636363635, "grad_norm": 3.186906099319458, "learning_rate": 0.00024390243902439024, "loss": 1.7958, "step": 172 }, { "epoch": 1.5727272727272728, "grad_norm": 2.5595905780792236, "learning_rate": 0.0002423780487804878, "loss": 1.6044, "step": 173 }, { "epoch": 1.5818181818181818, "grad_norm": 2.952155828475952, "learning_rate": 0.00024085365853658536, "loss": 1.9509, "step": 174 }, { "epoch": 1.5909090909090908, "grad_norm": 2.6648874282836914, "learning_rate": 0.00023932926829268292, "loss": 1.8644, "step": 175 }, { "epoch": 1.6, "grad_norm": 2.649273157119751, "learning_rate": 0.00023780487804878048, "loss": 1.9846, "step": 176 }, { "epoch": 1.6090909090909091, "grad_norm": 3.196133852005005, "learning_rate": 0.00023628048780487804, "loss": 1.689, "step": 177 }, { "epoch": 1.6181818181818182, "grad_norm": 2.5493838787078857, "learning_rate": 0.0002347560975609756, "loss": 1.6899, "step": 178 }, { "epoch": 1.6272727272727274, "grad_norm": 4.60580587387085, "learning_rate": 0.0002332317073170732, "loss": 1.8183, "step": 179 }, { "epoch": 1.6363636363636362, "grad_norm": 2.7253472805023193, "learning_rate": 0.00023170731707317075, "loss": 1.7829, "step": 180 }, { "epoch": 1.6454545454545455, "grad_norm": 2.9373483657836914, "learning_rate": 0.0002301829268292683, "loss": 1.7837, "step": 181 }, { "epoch": 1.6545454545454545, "grad_norm": 2.4897494316101074, "learning_rate": 0.00022865853658536587, "loss": 1.4675, "step": 182 }, { "epoch": 1.6636363636363636, "grad_norm": 2.3043053150177, "learning_rate": 0.00022713414634146343, "loss": 1.5805, "step": 183 }, { "epoch": 1.6727272727272728, "grad_norm": 3.0130205154418945, "learning_rate": 0.00022560975609756099, "loss": 2.1577, "step": 184 }, { "epoch": 1.6818181818181817, "grad_norm": 3.0861997604370117, "learning_rate": 0.00022408536585365855, "loss": 2.011, "step": 185 }, { "epoch": 1.690909090909091, "grad_norm": 2.6685359477996826, "learning_rate": 0.0002225609756097561, "loss": 1.9333, "step": 186 }, { "epoch": 1.7, "grad_norm": 2.309566020965576, "learning_rate": 0.00022103658536585367, "loss": 1.7602, "step": 187 }, { "epoch": 1.709090909090909, "grad_norm": 2.686500072479248, "learning_rate": 0.00021951219512195122, "loss": 1.9934, "step": 188 }, { "epoch": 1.7181818181818183, "grad_norm": 2.0740697383880615, "learning_rate": 0.00021798780487804878, "loss": 1.3903, "step": 189 }, { "epoch": 1.7272727272727273, "grad_norm": 2.9675910472869873, "learning_rate": 0.00021646341463414634, "loss": 1.8017, "step": 190 }, { "epoch": 1.7363636363636363, "grad_norm": 2.5559232234954834, "learning_rate": 0.0002149390243902439, "loss": 1.8526, "step": 191 }, { "epoch": 1.7454545454545456, "grad_norm": 3.0341475009918213, "learning_rate": 0.00021341463414634146, "loss": 1.9374, "step": 192 }, { "epoch": 1.7545454545454544, "grad_norm": 2.351853609085083, "learning_rate": 0.00021189024390243902, "loss": 1.695, "step": 193 }, { "epoch": 1.7636363636363637, "grad_norm": 2.8605730533599854, "learning_rate": 0.00021036585365853658, "loss": 2.1981, "step": 194 }, { "epoch": 1.7727272727272727, "grad_norm": 2.4615988731384277, "learning_rate": 0.00020884146341463414, "loss": 1.8249, "step": 195 }, { "epoch": 1.7818181818181817, "grad_norm": 2.9569573402404785, "learning_rate": 0.0002073170731707317, "loss": 2.0083, "step": 196 }, { "epoch": 1.790909090909091, "grad_norm": 2.4674253463745117, "learning_rate": 0.00020579268292682926, "loss": 1.4235, "step": 197 }, { "epoch": 1.8, "grad_norm": 2.693068027496338, "learning_rate": 0.00020426829268292682, "loss": 2.0025, "step": 198 }, { "epoch": 1.809090909090909, "grad_norm": 2.6479923725128174, "learning_rate": 0.0002027439024390244, "loss": 1.8531, "step": 199 }, { "epoch": 1.8181818181818183, "grad_norm": 2.5726828575134277, "learning_rate": 0.00020121951219512197, "loss": 1.9046, "step": 200 }, { "epoch": 1.8272727272727272, "grad_norm": 2.286144971847534, "learning_rate": 0.00019969512195121953, "loss": 1.8147, "step": 201 }, { "epoch": 1.8363636363636364, "grad_norm": 2.4627556800842285, "learning_rate": 0.0001981707317073171, "loss": 1.7151, "step": 202 }, { "epoch": 1.8454545454545455, "grad_norm": 2.3641176223754883, "learning_rate": 0.00019664634146341465, "loss": 1.6279, "step": 203 }, { "epoch": 1.8545454545454545, "grad_norm": 2.3488380908966064, "learning_rate": 0.0001951219512195122, "loss": 1.7368, "step": 204 }, { "epoch": 1.8636363636363638, "grad_norm": 2.626936435699463, "learning_rate": 0.00019359756097560977, "loss": 1.6347, "step": 205 }, { "epoch": 1.8727272727272726, "grad_norm": 2.7492589950561523, "learning_rate": 0.00019207317073170733, "loss": 1.8449, "step": 206 }, { "epoch": 1.8818181818181818, "grad_norm": 2.2747907638549805, "learning_rate": 0.00019054878048780488, "loss": 1.669, "step": 207 }, { "epoch": 1.8909090909090909, "grad_norm": 2.540228843688965, "learning_rate": 0.00018902439024390244, "loss": 1.6892, "step": 208 }, { "epoch": 1.9, "grad_norm": 2.521294593811035, "learning_rate": 0.0001875, "loss": 1.7682, "step": 209 }, { "epoch": 1.9090909090909092, "grad_norm": 2.7691824436187744, "learning_rate": 0.00018597560975609756, "loss": 1.7694, "step": 210 }, { "epoch": 1.9181818181818182, "grad_norm": 2.7531917095184326, "learning_rate": 0.00018445121951219512, "loss": 1.7217, "step": 211 }, { "epoch": 1.9272727272727272, "grad_norm": 2.3607912063598633, "learning_rate": 0.00018292682926829268, "loss": 1.9776, "step": 212 }, { "epoch": 1.9363636363636365, "grad_norm": 2.737658739089966, "learning_rate": 0.00018140243902439024, "loss": 1.7351, "step": 213 }, { "epoch": 1.9454545454545453, "grad_norm": 2.491176128387451, "learning_rate": 0.0001798780487804878, "loss": 1.7922, "step": 214 }, { "epoch": 1.9545454545454546, "grad_norm": 2.4534223079681396, "learning_rate": 0.00017835365853658536, "loss": 1.8887, "step": 215 }, { "epoch": 1.9636363636363636, "grad_norm": 3.2689099311828613, "learning_rate": 0.00017682926829268292, "loss": 2.1455, "step": 216 }, { "epoch": 1.9727272727272727, "grad_norm": 2.444859743118286, "learning_rate": 0.00017530487804878048, "loss": 1.6893, "step": 217 }, { "epoch": 1.981818181818182, "grad_norm": 2.3944003582000732, "learning_rate": 0.00017378048780487804, "loss": 1.4158, "step": 218 }, { "epoch": 1.990909090909091, "grad_norm": 2.8656787872314453, "learning_rate": 0.0001722560975609756, "loss": 2.1827, "step": 219 }, { "epoch": 2.0, "grad_norm": 2.836941719055176, "learning_rate": 0.0001707317073170732, "loss": 1.7853, "step": 220 }, { "epoch": 2.0, "eval_f1": 0.8953, "eval_gen_len": 49.4273, "eval_loss": 1.8119523525238037, "eval_precision": 0.8945, "eval_recall": 0.8963, "eval_rouge1": 0.4633, "eval_rouge2": 0.2203, "eval_rougeL": 0.3941, "eval_rougeLsum": 0.4285, "eval_runtime": 47.0298, "eval_samples_per_second": 2.339, "eval_steps_per_second": 0.298, "step": 220 }, { "epoch": 2.0090909090909093, "grad_norm": 2.421740770339966, "learning_rate": 0.00016920731707317075, "loss": 1.3819, "step": 221 }, { "epoch": 2.018181818181818, "grad_norm": 2.5827627182006836, "learning_rate": 0.0001676829268292683, "loss": 1.7222, "step": 222 }, { "epoch": 2.0272727272727273, "grad_norm": 2.4553208351135254, "learning_rate": 0.00016615853658536587, "loss": 1.7625, "step": 223 }, { "epoch": 2.036363636363636, "grad_norm": 2.6801366806030273, "learning_rate": 0.00016463414634146343, "loss": 1.6591, "step": 224 }, { "epoch": 2.0454545454545454, "grad_norm": 2.8583321571350098, "learning_rate": 0.00016310975609756098, "loss": 1.8691, "step": 225 }, { "epoch": 2.0545454545454547, "grad_norm": 2.8577945232391357, "learning_rate": 0.00016158536585365854, "loss": 2.2397, "step": 226 }, { "epoch": 2.0636363636363635, "grad_norm": 2.5665602684020996, "learning_rate": 0.0001600609756097561, "loss": 1.6606, "step": 227 }, { "epoch": 2.0727272727272728, "grad_norm": 2.4023563861846924, "learning_rate": 0.00015853658536585366, "loss": 1.6621, "step": 228 }, { "epoch": 2.081818181818182, "grad_norm": 2.426421880722046, "learning_rate": 0.00015701219512195122, "loss": 1.5207, "step": 229 }, { "epoch": 2.090909090909091, "grad_norm": 2.462782382965088, "learning_rate": 0.00015548780487804878, "loss": 1.6258, "step": 230 }, { "epoch": 2.1, "grad_norm": 2.5428097248077393, "learning_rate": 0.00015396341463414634, "loss": 1.4525, "step": 231 }, { "epoch": 2.109090909090909, "grad_norm": 2.179856538772583, "learning_rate": 0.0001524390243902439, "loss": 1.3824, "step": 232 }, { "epoch": 2.118181818181818, "grad_norm": 2.4684817790985107, "learning_rate": 0.00015091463414634146, "loss": 1.5785, "step": 233 }, { "epoch": 2.1272727272727274, "grad_norm": 2.5517914295196533, "learning_rate": 0.00014939024390243902, "loss": 1.5097, "step": 234 }, { "epoch": 2.1363636363636362, "grad_norm": 2.6141982078552246, "learning_rate": 0.00014786585365853658, "loss": 1.4524, "step": 235 }, { "epoch": 2.1454545454545455, "grad_norm": 2.820064067840576, "learning_rate": 0.00014634146341463414, "loss": 1.8698, "step": 236 }, { "epoch": 2.1545454545454543, "grad_norm": 2.5539379119873047, "learning_rate": 0.0001448170731707317, "loss": 1.5742, "step": 237 }, { "epoch": 2.1636363636363636, "grad_norm": 2.738567352294922, "learning_rate": 0.00014329268292682926, "loss": 1.6458, "step": 238 }, { "epoch": 2.172727272727273, "grad_norm": 2.583866834640503, "learning_rate": 0.00014176829268292682, "loss": 1.7928, "step": 239 }, { "epoch": 2.1818181818181817, "grad_norm": 2.7706844806671143, "learning_rate": 0.0001402439024390244, "loss": 2.1484, "step": 240 }, { "epoch": 2.190909090909091, "grad_norm": 2.333019495010376, "learning_rate": 0.00013871951219512197, "loss": 1.4385, "step": 241 }, { "epoch": 2.2, "grad_norm": 2.4395415782928467, "learning_rate": 0.00013719512195121953, "loss": 1.1418, "step": 242 }, { "epoch": 2.209090909090909, "grad_norm": 2.8756039142608643, "learning_rate": 0.00013567073170731709, "loss": 2.121, "step": 243 }, { "epoch": 2.2181818181818183, "grad_norm": 2.7827882766723633, "learning_rate": 0.00013414634146341464, "loss": 1.829, "step": 244 }, { "epoch": 2.227272727272727, "grad_norm": 2.5495877265930176, "learning_rate": 0.0001326219512195122, "loss": 1.8662, "step": 245 }, { "epoch": 2.2363636363636363, "grad_norm": 2.831456422805786, "learning_rate": 0.00013109756097560976, "loss": 1.9483, "step": 246 }, { "epoch": 2.2454545454545456, "grad_norm": 3.0142741203308105, "learning_rate": 0.00012957317073170732, "loss": 1.7868, "step": 247 }, { "epoch": 2.2545454545454544, "grad_norm": 2.723198652267456, "learning_rate": 0.00012804878048780488, "loss": 1.7103, "step": 248 }, { "epoch": 2.2636363636363637, "grad_norm": 3.161470890045166, "learning_rate": 0.00012652439024390244, "loss": 1.8972, "step": 249 }, { "epoch": 2.2727272727272725, "grad_norm": 2.5970962047576904, "learning_rate": 0.000125, "loss": 2.1127, "step": 250 }, { "epoch": 2.2818181818181817, "grad_norm": 2.5795202255249023, "learning_rate": 0.00012347560975609756, "loss": 1.3795, "step": 251 }, { "epoch": 2.290909090909091, "grad_norm": 2.3511993885040283, "learning_rate": 0.00012195121951219512, "loss": 1.2534, "step": 252 }, { "epoch": 2.3, "grad_norm": 2.6542067527770996, "learning_rate": 0.00012042682926829268, "loss": 1.6894, "step": 253 }, { "epoch": 2.309090909090909, "grad_norm": 4.014543533325195, "learning_rate": 0.00011890243902439024, "loss": 1.9333, "step": 254 }, { "epoch": 2.3181818181818183, "grad_norm": 2.828244924545288, "learning_rate": 0.0001173780487804878, "loss": 1.7024, "step": 255 }, { "epoch": 2.327272727272727, "grad_norm": 2.9751951694488525, "learning_rate": 0.00011585365853658537, "loss": 1.7545, "step": 256 }, { "epoch": 2.3363636363636364, "grad_norm": 2.876709461212158, "learning_rate": 0.00011432926829268293, "loss": 2.1827, "step": 257 }, { "epoch": 2.3454545454545457, "grad_norm": 3.0717837810516357, "learning_rate": 0.00011280487804878049, "loss": 1.5924, "step": 258 }, { "epoch": 2.3545454545454545, "grad_norm": 2.5759644508361816, "learning_rate": 0.00011128048780487805, "loss": 1.8342, "step": 259 }, { "epoch": 2.3636363636363638, "grad_norm": 2.3989076614379883, "learning_rate": 0.00010975609756097561, "loss": 1.8479, "step": 260 }, { "epoch": 2.3727272727272726, "grad_norm": 2.9248263835906982, "learning_rate": 0.00010823170731707317, "loss": 1.5602, "step": 261 }, { "epoch": 2.381818181818182, "grad_norm": 2.715651512145996, "learning_rate": 0.00010670731707317073, "loss": 1.5377, "step": 262 }, { "epoch": 2.390909090909091, "grad_norm": 2.504502534866333, "learning_rate": 0.00010518292682926829, "loss": 1.2146, "step": 263 }, { "epoch": 2.4, "grad_norm": 2.516601085662842, "learning_rate": 0.00010365853658536585, "loss": 1.7834, "step": 264 }, { "epoch": 2.409090909090909, "grad_norm": 2.362786293029785, "learning_rate": 0.00010213414634146341, "loss": 1.5664, "step": 265 }, { "epoch": 2.418181818181818, "grad_norm": 2.057528257369995, "learning_rate": 0.00010060975609756098, "loss": 1.5126, "step": 266 }, { "epoch": 2.4272727272727272, "grad_norm": 2.4843454360961914, "learning_rate": 9.908536585365854e-05, "loss": 1.883, "step": 267 }, { "epoch": 2.4363636363636365, "grad_norm": 2.3680319786071777, "learning_rate": 9.75609756097561e-05, "loss": 1.521, "step": 268 }, { "epoch": 2.4454545454545453, "grad_norm": 2.7291035652160645, "learning_rate": 9.603658536585366e-05, "loss": 1.7955, "step": 269 }, { "epoch": 2.4545454545454546, "grad_norm": 2.395080327987671, "learning_rate": 9.451219512195122e-05, "loss": 1.7271, "step": 270 }, { "epoch": 2.463636363636364, "grad_norm": 2.8394501209259033, "learning_rate": 9.298780487804878e-05, "loss": 1.5939, "step": 271 }, { "epoch": 2.4727272727272727, "grad_norm": 2.4888384342193604, "learning_rate": 9.146341463414634e-05, "loss": 1.282, "step": 272 }, { "epoch": 2.481818181818182, "grad_norm": 2.417750835418701, "learning_rate": 8.99390243902439e-05, "loss": 1.4742, "step": 273 }, { "epoch": 2.4909090909090907, "grad_norm": 2.7631969451904297, "learning_rate": 8.841463414634146e-05, "loss": 1.7823, "step": 274 }, { "epoch": 2.5, "grad_norm": 2.7598719596862793, "learning_rate": 8.689024390243902e-05, "loss": 1.7852, "step": 275 }, { "epoch": 2.509090909090909, "grad_norm": 2.4489023685455322, "learning_rate": 8.53658536585366e-05, "loss": 1.4942, "step": 276 }, { "epoch": 2.518181818181818, "grad_norm": 2.320030927658081, "learning_rate": 8.384146341463415e-05, "loss": 1.5197, "step": 277 }, { "epoch": 2.5272727272727273, "grad_norm": 2.592423677444458, "learning_rate": 8.231707317073171e-05, "loss": 1.7495, "step": 278 }, { "epoch": 2.536363636363636, "grad_norm": 2.7762107849121094, "learning_rate": 8.079268292682927e-05, "loss": 1.9257, "step": 279 }, { "epoch": 2.5454545454545454, "grad_norm": 2.527858018875122, "learning_rate": 7.926829268292683e-05, "loss": 1.5523, "step": 280 }, { "epoch": 2.5545454545454547, "grad_norm": 3.0844714641571045, "learning_rate": 7.774390243902439e-05, "loss": 1.5701, "step": 281 }, { "epoch": 2.5636363636363635, "grad_norm": 2.9077203273773193, "learning_rate": 7.621951219512195e-05, "loss": 1.4133, "step": 282 }, { "epoch": 2.5727272727272728, "grad_norm": 3.1426498889923096, "learning_rate": 7.469512195121951e-05, "loss": 1.6216, "step": 283 }, { "epoch": 2.581818181818182, "grad_norm": 3.0162813663482666, "learning_rate": 7.317073170731707e-05, "loss": 1.6784, "step": 284 }, { "epoch": 2.590909090909091, "grad_norm": 2.502703905105591, "learning_rate": 7.164634146341463e-05, "loss": 1.6971, "step": 285 }, { "epoch": 2.6, "grad_norm": 2.4731063842773438, "learning_rate": 7.01219512195122e-05, "loss": 1.4637, "step": 286 }, { "epoch": 2.6090909090909093, "grad_norm": 2.648430109024048, "learning_rate": 6.859756097560976e-05, "loss": 1.5027, "step": 287 }, { "epoch": 2.618181818181818, "grad_norm": 3.18878173828125, "learning_rate": 6.707317073170732e-05, "loss": 1.8242, "step": 288 }, { "epoch": 2.6272727272727274, "grad_norm": 2.5465493202209473, "learning_rate": 6.554878048780488e-05, "loss": 1.4872, "step": 289 }, { "epoch": 2.6363636363636362, "grad_norm": 2.4700820446014404, "learning_rate": 6.402439024390244e-05, "loss": 1.7537, "step": 290 }, { "epoch": 2.6454545454545455, "grad_norm": 3.4680936336517334, "learning_rate": 6.25e-05, "loss": 1.8912, "step": 291 }, { "epoch": 2.6545454545454543, "grad_norm": 3.104785442352295, "learning_rate": 6.097560975609756e-05, "loss": 2.276, "step": 292 }, { "epoch": 2.6636363636363636, "grad_norm": 3.0287201404571533, "learning_rate": 5.945121951219512e-05, "loss": 1.6728, "step": 293 }, { "epoch": 2.672727272727273, "grad_norm": 2.384228229522705, "learning_rate": 5.792682926829269e-05, "loss": 1.6343, "step": 294 }, { "epoch": 2.6818181818181817, "grad_norm": 2.866724967956543, "learning_rate": 5.6402439024390247e-05, "loss": 2.0956, "step": 295 }, { "epoch": 2.690909090909091, "grad_norm": 2.9918506145477295, "learning_rate": 5.4878048780487806e-05, "loss": 1.7989, "step": 296 }, { "epoch": 2.7, "grad_norm": 2.5096092224121094, "learning_rate": 5.3353658536585366e-05, "loss": 1.6828, "step": 297 }, { "epoch": 2.709090909090909, "grad_norm": 2.7829229831695557, "learning_rate": 5.1829268292682925e-05, "loss": 1.4211, "step": 298 }, { "epoch": 2.7181818181818183, "grad_norm": 2.250296115875244, "learning_rate": 5.030487804878049e-05, "loss": 1.4267, "step": 299 }, { "epoch": 2.7272727272727275, "grad_norm": 3.163660764694214, "learning_rate": 4.878048780487805e-05, "loss": 2.1689, "step": 300 }, { "epoch": 2.7363636363636363, "grad_norm": 2.386986255645752, "learning_rate": 4.725609756097561e-05, "loss": 1.4535, "step": 301 }, { "epoch": 2.7454545454545456, "grad_norm": 2.807040214538574, "learning_rate": 4.573170731707317e-05, "loss": 1.5864, "step": 302 }, { "epoch": 2.7545454545454544, "grad_norm": 3.6512951850891113, "learning_rate": 4.420731707317073e-05, "loss": 1.6136, "step": 303 }, { "epoch": 2.7636363636363637, "grad_norm": 2.888395071029663, "learning_rate": 4.26829268292683e-05, "loss": 1.5037, "step": 304 }, { "epoch": 2.7727272727272725, "grad_norm": 2.2506160736083984, "learning_rate": 4.1158536585365856e-05, "loss": 1.2207, "step": 305 }, { "epoch": 2.7818181818181817, "grad_norm": 2.5099334716796875, "learning_rate": 3.9634146341463416e-05, "loss": 1.6804, "step": 306 }, { "epoch": 2.790909090909091, "grad_norm": 2.87251615524292, "learning_rate": 3.8109756097560976e-05, "loss": 1.8993, "step": 307 }, { "epoch": 2.8, "grad_norm": 2.648142099380493, "learning_rate": 3.6585365853658535e-05, "loss": 1.5677, "step": 308 }, { "epoch": 2.809090909090909, "grad_norm": 3.0312211513519287, "learning_rate": 3.50609756097561e-05, "loss": 2.1024, "step": 309 }, { "epoch": 2.8181818181818183, "grad_norm": 2.32504940032959, "learning_rate": 3.353658536585366e-05, "loss": 1.498, "step": 310 }, { "epoch": 2.827272727272727, "grad_norm": 2.7433340549468994, "learning_rate": 3.201219512195122e-05, "loss": 2.0186, "step": 311 }, { "epoch": 2.8363636363636364, "grad_norm": 2.5491738319396973, "learning_rate": 3.048780487804878e-05, "loss": 1.5459, "step": 312 }, { "epoch": 2.8454545454545457, "grad_norm": 3.0771000385284424, "learning_rate": 2.8963414634146343e-05, "loss": 1.8847, "step": 313 }, { "epoch": 2.8545454545454545, "grad_norm": 2.719658851623535, "learning_rate": 2.7439024390243903e-05, "loss": 1.6282, "step": 314 }, { "epoch": 2.8636363636363638, "grad_norm": 2.6213059425354004, "learning_rate": 2.5914634146341463e-05, "loss": 1.3235, "step": 315 }, { "epoch": 2.8727272727272726, "grad_norm": 2.4952800273895264, "learning_rate": 2.4390243902439026e-05, "loss": 1.6865, "step": 316 }, { "epoch": 2.881818181818182, "grad_norm": 2.896984577178955, "learning_rate": 2.2865853658536585e-05, "loss": 1.5933, "step": 317 }, { "epoch": 2.8909090909090907, "grad_norm": 2.54345965385437, "learning_rate": 2.134146341463415e-05, "loss": 1.9299, "step": 318 }, { "epoch": 2.9, "grad_norm": 2.8932416439056396, "learning_rate": 1.9817073170731708e-05, "loss": 2.0065, "step": 319 }, { "epoch": 2.909090909090909, "grad_norm": 2.6085596084594727, "learning_rate": 1.8292682926829268e-05, "loss": 1.3924, "step": 320 }, { "epoch": 2.918181818181818, "grad_norm": 2.9155259132385254, "learning_rate": 1.676829268292683e-05, "loss": 2.0032, "step": 321 }, { "epoch": 2.9272727272727272, "grad_norm": 2.5170652866363525, "learning_rate": 1.524390243902439e-05, "loss": 1.3955, "step": 322 }, { "epoch": 2.9363636363636365, "grad_norm": 2.5169925689697266, "learning_rate": 1.3719512195121952e-05, "loss": 1.5228, "step": 323 }, { "epoch": 2.9454545454545453, "grad_norm": 2.683560848236084, "learning_rate": 1.2195121951219513e-05, "loss": 1.6762, "step": 324 }, { "epoch": 2.9545454545454546, "grad_norm": 2.675593614578247, "learning_rate": 1.0670731707317074e-05, "loss": 1.6192, "step": 325 }, { "epoch": 2.963636363636364, "grad_norm": 2.860233783721924, "learning_rate": 9.146341463414634e-06, "loss": 1.9632, "step": 326 }, { "epoch": 2.9727272727272727, "grad_norm": 2.5503525733947754, "learning_rate": 7.621951219512195e-06, "loss": 1.4166, "step": 327 }, { "epoch": 2.981818181818182, "grad_norm": 2.5347251892089844, "learning_rate": 6.0975609756097564e-06, "loss": 1.4993, "step": 328 }, { "epoch": 2.990909090909091, "grad_norm": 2.628443479537964, "learning_rate": 4.573170731707317e-06, "loss": 1.4424, "step": 329 }, { "epoch": 3.0, "grad_norm": 2.5161614418029785, "learning_rate": 3.0487804878048782e-06, "loss": 1.5952, "step": 330 }, { "epoch": 3.0, "eval_f1": 0.8942, "eval_gen_len": 49.4091, "eval_loss": 1.7933717966079712, "eval_precision": 0.8941, "eval_recall": 0.8945, "eval_rouge1": 0.4708, "eval_rouge2": 0.2246, "eval_rougeL": 0.3984, "eval_rougeLsum": 0.4357, "eval_runtime": 47.9405, "eval_samples_per_second": 2.295, "eval_steps_per_second": 0.292, "step": 330 }, { "epoch": 3.0, "step": 330, "total_flos": 2506179136462848.0, "train_loss": 1.8787952170227513, "train_runtime": 633.4063, "train_samples_per_second": 4.163, "train_steps_per_second": 0.521 } ], "logging_steps": 1, "max_steps": 330, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2506179136462848.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }