|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 5000, |
|
"global_step": 107877, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.634779384501298e-08, |
|
"loss": 14.0889, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.269558769002596e-08, |
|
"loss": 13.3753, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3904338153503894e-07, |
|
"loss": 12.3675, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8539117538005193e-07, |
|
"loss": 11.1815, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.317389692250649e-07, |
|
"loss": 9.3244, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.780867630700779e-07, |
|
"loss": 7.8289, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2443455691509084e-07, |
|
"loss": 6.5179, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.7078235076010385e-07, |
|
"loss": 5.6367, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.171301446051168e-07, |
|
"loss": 5.0302, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.634779384501298e-07, |
|
"loss": 4.4714, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.098257322951427e-07, |
|
"loss": 4.0915, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.561735261401558e-07, |
|
"loss": 3.7279, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.025213199851688e-07, |
|
"loss": 3.4629, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.488691138301817e-07, |
|
"loss": 3.2831, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.952169076751947e-07, |
|
"loss": 3.0667, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.415647015202077e-07, |
|
"loss": 2.9512, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.879124953652206e-07, |
|
"loss": 2.7697, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.342602892102336e-07, |
|
"loss": 2.6436, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.806080830552465e-07, |
|
"loss": 2.5721, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.269558769002596e-07, |
|
"loss": 2.547, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.733036707452726e-07, |
|
"loss": 2.4468, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0196514645902855e-06, |
|
"loss": 2.4144, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0659992584352986e-06, |
|
"loss": 2.3397, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1123470522803115e-06, |
|
"loss": 2.2857, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1586948461253246e-06, |
|
"loss": 2.2327, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2050426399703375e-06, |
|
"loss": 2.1722, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.2513904338153504e-06, |
|
"loss": 2.1463, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.2977382276603633e-06, |
|
"loss": 2.1144, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3440860215053765e-06, |
|
"loss": 2.0433, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3904338153503894e-06, |
|
"loss": 2.0107, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4367816091954023e-06, |
|
"loss": 2.0057, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4831294030404154e-06, |
|
"loss": 1.986, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5294771968854283e-06, |
|
"loss": 1.9158, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5758249907304412e-06, |
|
"loss": 1.9486, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6221727845754543e-06, |
|
"loss": 1.85, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6685205784204673e-06, |
|
"loss": 1.8479, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7148683722654802e-06, |
|
"loss": 1.8052, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.761216166110493e-06, |
|
"loss": 1.7931, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8075639599555062e-06, |
|
"loss": 1.7661, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.853911753800519e-06, |
|
"loss": 1.7307, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.900259547645532e-06, |
|
"loss": 1.6742, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.946607341490545e-06, |
|
"loss": 1.629, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9929551353355583e-06, |
|
"loss": 1.6147, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.039302929180571e-06, |
|
"loss": 1.6556, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.085650723025584e-06, |
|
"loss": 1.5819, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.131998516870597e-06, |
|
"loss": 1.6211, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.17834631071561e-06, |
|
"loss": 1.5386, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.224694104560623e-06, |
|
"loss": 1.5553, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.271041898405636e-06, |
|
"loss": 1.5572, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3173896922506492e-06, |
|
"loss": 1.5671, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bleu": 6.5196, |
|
"eval_gen_len": 18.9699, |
|
"eval_loss": 1.1690529584884644, |
|
"eval_runtime": 968.3873, |
|
"eval_samples_per_second": 2.061, |
|
"eval_steps_per_second": 1.031, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3637374860956624e-06, |
|
"loss": 1.4969, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.410085279940675e-06, |
|
"loss": 1.5547, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.456433073785688e-06, |
|
"loss": 1.5369, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.502780867630701e-06, |
|
"loss": 1.4743, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.549128661475714e-06, |
|
"loss": 1.5065, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5954764553207267e-06, |
|
"loss": 1.4637, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.64182424916574e-06, |
|
"loss": 1.4976, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.688172043010753e-06, |
|
"loss": 1.482, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7345198368557656e-06, |
|
"loss": 1.4703, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7808676307007788e-06, |
|
"loss": 1.4279, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.827215424545792e-06, |
|
"loss": 1.4673, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8735632183908046e-06, |
|
"loss": 1.3763, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9199110122358177e-06, |
|
"loss": 1.4375, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.966258806080831e-06, |
|
"loss": 1.3887, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.0126065999258435e-06, |
|
"loss": 1.3819, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.0589543937708566e-06, |
|
"loss": 1.4471, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1053021876158698e-06, |
|
"loss": 1.3955, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1516499814608825e-06, |
|
"loss": 1.4317, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1979977753058956e-06, |
|
"loss": 1.3863, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.2443455691509087e-06, |
|
"loss": 1.3759, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2906933629959214e-06, |
|
"loss": 1.3599, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.3370411568409345e-06, |
|
"loss": 1.4061, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.383388950685947e-06, |
|
"loss": 1.339, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4297367445309603e-06, |
|
"loss": 1.377, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4760845383759734e-06, |
|
"loss": 1.3454, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.522432332220986e-06, |
|
"loss": 1.3565, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.5687801260659993e-06, |
|
"loss": 1.3381, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.6151279199110124e-06, |
|
"loss": 1.3326, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.661475713756025e-06, |
|
"loss": 1.3442, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.707823507601038e-06, |
|
"loss": 1.3053, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7541713014460513e-06, |
|
"loss": 1.3071, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.800519095291064e-06, |
|
"loss": 1.3401, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8468668891360776e-06, |
|
"loss": 1.3061, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.89321468298109e-06, |
|
"loss": 1.3304, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.939562476826103e-06, |
|
"loss": 1.3354, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9859102706711165e-06, |
|
"loss": 1.3253, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.032258064516129e-06, |
|
"loss": 1.3006, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.078605858361142e-06, |
|
"loss": 1.2852, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1249536522061554e-06, |
|
"loss": 1.2775, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.171301446051168e-06, |
|
"loss": 1.2842, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.217649239896181e-06, |
|
"loss": 1.2935, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.263997033741194e-06, |
|
"loss": 1.2656, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.310344827586207e-06, |
|
"loss": 1.27, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.35669262143122e-06, |
|
"loss": 1.2983, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.403040415276233e-06, |
|
"loss": 1.2317, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.449388209121246e-06, |
|
"loss": 1.2491, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4957360029662596e-06, |
|
"loss": 1.2438, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.542083796811272e-06, |
|
"loss": 1.2608, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.588431590656286e-06, |
|
"loss": 1.2656, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.6347793845012985e-06, |
|
"loss": 1.2277, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_bleu": 7.082, |
|
"eval_gen_len": 18.9724, |
|
"eval_loss": 1.0592412948608398, |
|
"eval_runtime": 961.2611, |
|
"eval_samples_per_second": 2.076, |
|
"eval_steps_per_second": 1.038, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.681127178346311e-06, |
|
"loss": 1.3006, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.727474972191325e-06, |
|
"loss": 1.2365, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7738227660363374e-06, |
|
"loss": 1.252, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.82017055988135e-06, |
|
"loss": 1.2351, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.866518353726364e-06, |
|
"loss": 1.2442, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.912866147571376e-06, |
|
"loss": 1.2158, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.959213941416389e-06, |
|
"loss": 1.2123, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.999999811534358e-06, |
|
"loss": 1.1988, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.999983582566248e-06, |
|
"loss": 1.2438, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.999941178008878e-06, |
|
"loss": 1.2145, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.999872598306237e-06, |
|
"loss": 1.1855, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.999777844176376e-06, |
|
"loss": 1.2148, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.999656916611398e-06, |
|
"loss": 1.2128, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.999509816877453e-06, |
|
"loss": 1.2222, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.999336546514719e-06, |
|
"loss": 1.1951, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9991371073373895e-06, |
|
"loss": 1.1656, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.998911501433653e-06, |
|
"loss": 1.1783, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9986597311656735e-06, |
|
"loss": 1.182, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.998381799169562e-06, |
|
"loss": 1.1869, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.99807770835535e-06, |
|
"loss": 1.2242, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.997747461906961e-06, |
|
"loss": 1.1884, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.997391063282177e-06, |
|
"loss": 1.2025, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.997008516212599e-06, |
|
"loss": 1.207, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.996599824703613e-06, |
|
"loss": 1.1898, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.996164993034341e-06, |
|
"loss": 1.1942, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.995704025757605e-06, |
|
"loss": 1.1647, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.995216927699872e-06, |
|
"loss": 1.1961, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.994703703961206e-06, |
|
"loss": 1.1728, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.994164359915219e-06, |
|
"loss": 1.1801, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.993598901209003e-06, |
|
"loss": 1.1607, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.993007333763086e-06, |
|
"loss": 1.1674, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9923896637713575e-06, |
|
"loss": 1.1837, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.991745897701012e-06, |
|
"loss": 1.1709, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.991076042292475e-06, |
|
"loss": 1.1672, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.990380104559337e-06, |
|
"loss": 1.1649, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.989658091788277e-06, |
|
"loss": 1.1582, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.988910011538991e-06, |
|
"loss": 1.1485, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.988135871644105e-06, |
|
"loss": 1.1887, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.987335680209099e-06, |
|
"loss": 1.1656, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.986509445612223e-06, |
|
"loss": 1.1182, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.985657176504402e-06, |
|
"loss": 1.0969, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.984778881809156e-06, |
|
"loss": 1.1648, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.983874570722496e-06, |
|
"loss": 1.1689, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.982944252712834e-06, |
|
"loss": 1.1127, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.981987937520884e-06, |
|
"loss": 1.1529, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.981005635159558e-06, |
|
"loss": 1.1279, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.979997355913859e-06, |
|
"loss": 1.1099, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.978963110340778e-06, |
|
"loss": 1.1671, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.9779029092691825e-06, |
|
"loss": 1.167, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.976816763799698e-06, |
|
"loss": 1.1316, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_bleu": 7.3283, |
|
"eval_gen_len": 18.9825, |
|
"eval_loss": 1.0111815929412842, |
|
"eval_runtime": 963.287, |
|
"eval_samples_per_second": 2.072, |
|
"eval_steps_per_second": 1.036, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.975704685304601e-06, |
|
"loss": 1.1448, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.97456668542769e-06, |
|
"loss": 1.1221, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.97340277608417e-06, |
|
"loss": 1.1549, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.972212969460528e-06, |
|
"loss": 1.1493, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9709972780144e-06, |
|
"loss": 1.1595, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.969755714474447e-06, |
|
"loss": 1.1123, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.968488291840215e-06, |
|
"loss": 1.123, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.9671950233820075e-06, |
|
"loss": 1.1096, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.965875922640738e-06, |
|
"loss": 1.1296, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.964531003427792e-06, |
|
"loss": 1.1528, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.9631602798248845e-06, |
|
"loss": 1.1315, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.961763766183908e-06, |
|
"loss": 1.0704, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.960341477126786e-06, |
|
"loss": 1.1107, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9588934275453165e-06, |
|
"loss": 1.1351, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.957419632601022e-06, |
|
"loss": 1.1132, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.955920107724982e-06, |
|
"loss": 1.0942, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.95439486861768e-06, |
|
"loss": 1.1466, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.952843931248834e-06, |
|
"loss": 1.1164, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.951267311857229e-06, |
|
"loss": 1.1336, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.949665026950551e-06, |
|
"loss": 1.0946, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.948037093305211e-06, |
|
"loss": 1.0768, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.94638352796617e-06, |
|
"loss": 1.1048, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.944704348246759e-06, |
|
"loss": 1.0964, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.942999571728503e-06, |
|
"loss": 1.115, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.941269216260929e-06, |
|
"loss": 1.0912, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.9395132999613874e-06, |
|
"loss": 1.1039, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.937731841214856e-06, |
|
"loss": 1.1001, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.935924858673751e-06, |
|
"loss": 1.1137, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.934092371257727e-06, |
|
"loss": 1.1154, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.932234398153488e-06, |
|
"loss": 1.1103, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.930350958814578e-06, |
|
"loss": 1.0984, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.9284420729611785e-06, |
|
"loss": 1.1006, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.926507760579906e-06, |
|
"loss": 1.1028, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.9245480419236015e-06, |
|
"loss": 1.0915, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.922562937511115e-06, |
|
"loss": 1.0716, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.920552468127093e-06, |
|
"loss": 1.0817, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.918516654821765e-06, |
|
"loss": 1.113, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.916455518910713e-06, |
|
"loss": 1.1086, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.9143690819746595e-06, |
|
"loss": 1.0853, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.912257365859234e-06, |
|
"loss": 1.1003, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.9101203926747465e-06, |
|
"loss": 1.074, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.907958184795958e-06, |
|
"loss": 1.069, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.905770764861842e-06, |
|
"loss": 1.0779, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.903558155775352e-06, |
|
"loss": 1.0746, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.901320380703179e-06, |
|
"loss": 1.0855, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.8990574630755085e-06, |
|
"loss": 1.0499, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.896769426585778e-06, |
|
"loss": 1.0826, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.8944562951904256e-06, |
|
"loss": 1.0936, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.892118093108641e-06, |
|
"loss": 1.0893, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.889754844822113e-06, |
|
"loss": 1.0833, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_bleu": 7.4462, |
|
"eval_gen_len": 18.977, |
|
"eval_loss": 0.972794771194458, |
|
"eval_runtime": 963.5918, |
|
"eval_samples_per_second": 2.071, |
|
"eval_steps_per_second": 1.036, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.887366575074769e-06, |
|
"loss": 1.1106, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.884953308872522e-06, |
|
"loss": 1.0961, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.882515071483003e-06, |
|
"loss": 1.0668, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.8800518884353e-06, |
|
"loss": 1.0548, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.8775637855196885e-06, |
|
"loss": 1.1031, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.875050788787367e-06, |
|
"loss": 1.0564, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.872512924550172e-06, |
|
"loss": 1.0837, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.869950219380317e-06, |
|
"loss": 1.0816, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.867362700110105e-06, |
|
"loss": 1.0352, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.864750393831651e-06, |
|
"loss": 1.068, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.8621133278965956e-06, |
|
"loss": 1.0525, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.859451529915825e-06, |
|
"loss": 1.0867, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.856765027759171e-06, |
|
"loss": 1.0544, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.8540538495551314e-06, |
|
"loss": 1.0805, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.851318023690567e-06, |
|
"loss": 1.0598, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.848557578810407e-06, |
|
"loss": 1.0418, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.845772543817351e-06, |
|
"loss": 1.0486, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.842962947871561e-06, |
|
"loss": 1.0618, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.840128820390364e-06, |
|
"loss": 1.0583, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.837270191047937e-06, |
|
"loss": 1.0703, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.834387089774999e-06, |
|
"loss": 1.039, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.8314795467585e-06, |
|
"loss": 1.0505, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.828547592441298e-06, |
|
"loss": 1.0539, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.8255912575218485e-06, |
|
"loss": 1.0375, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.8226105729538786e-06, |
|
"loss": 1.0378, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.8196055699460636e-06, |
|
"loss": 1.0304, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.816576279961699e-06, |
|
"loss": 1.065, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.813522734718373e-06, |
|
"loss": 1.0445, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.810444966187635e-06, |
|
"loss": 1.0327, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.807343006594658e-06, |
|
"loss": 1.0751, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.804216888417904e-06, |
|
"loss": 1.0444, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.801066644388781e-06, |
|
"loss": 1.005, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.797892307491303e-06, |
|
"loss": 1.0401, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.794693910961745e-06, |
|
"loss": 1.0566, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.7914714882882924e-06, |
|
"loss": 1.0478, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.788225073210691e-06, |
|
"loss": 1.0656, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.784954699719895e-06, |
|
"loss": 1.0244, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.7816604020577105e-06, |
|
"loss": 1.0288, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.778342214716438e-06, |
|
"loss": 1.0515, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.775000172438508e-06, |
|
"loss": 1.0391, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.771634310216122e-06, |
|
"loss": 1.0481, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.768244663290881e-06, |
|
"loss": 1.0502, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.76483126715342e-06, |
|
"loss": 1.0855, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.761394157543038e-06, |
|
"loss": 1.0305, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.757933370447317e-06, |
|
"loss": 1.0609, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.754448942101753e-06, |
|
"loss": 1.0152, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7509409089893695e-06, |
|
"loss": 1.0449, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7474093078403436e-06, |
|
"loss": 1.0152, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.743854175631614e-06, |
|
"loss": 1.0, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.740275549586496e-06, |
|
"loss": 1.0339, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_bleu": 8.0126, |
|
"eval_gen_len": 18.982, |
|
"eval_loss": 0.9545806646347046, |
|
"eval_runtime": 963.2133, |
|
"eval_samples_per_second": 2.072, |
|
"eval_steps_per_second": 1.036, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.736673467174295e-06, |
|
"loss": 1.0262, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.733047966109911e-06, |
|
"loss": 1.054, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.729399084353444e-06, |
|
"loss": 0.9769, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.725726860109794e-06, |
|
"loss": 1.0007, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.7220313318282704e-06, |
|
"loss": 1.0438, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.718312538202179e-06, |
|
"loss": 1.0347, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.7145705181684195e-06, |
|
"loss": 1.04, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.710805310907083e-06, |
|
"loss": 1.0099, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.707016955841034e-06, |
|
"loss": 1.0447, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.703205492635504e-06, |
|
"loss": 0.9819, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.699370961197675e-06, |
|
"loss": 1.0066, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.695513401676256e-06, |
|
"loss": 1.007, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.691632854461071e-06, |
|
"loss": 1.037, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.68772936018263e-06, |
|
"loss": 1.0054, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.683802959711709e-06, |
|
"loss": 1.0078, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.6798536941589125e-06, |
|
"loss": 1.0187, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.675881604874257e-06, |
|
"loss": 1.0209, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.6718867334467245e-06, |
|
"loss": 0.9908, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.667869121703835e-06, |
|
"loss": 0.9868, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.6638288117112075e-06, |
|
"loss": 0.985, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.6597658457721175e-06, |
|
"loss": 1.0143, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.655680266427057e-06, |
|
"loss": 0.9698, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.651572116453281e-06, |
|
"loss": 1.0149, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.6474414388643755e-06, |
|
"loss": 1.0212, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.643288276909791e-06, |
|
"loss": 1.0126, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.639112674074396e-06, |
|
"loss": 1.0163, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.634914674078025e-06, |
|
"loss": 0.9814, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.630694320875016e-06, |
|
"loss": 1.039, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.626451658653752e-06, |
|
"loss": 1.0087, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.6221867318361975e-06, |
|
"loss": 0.9795, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.617899585077436e-06, |
|
"loss": 1.0499, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.613590263265198e-06, |
|
"loss": 1.0109, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.6092588115193945e-06, |
|
"loss": 1.0347, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.604905275191647e-06, |
|
"loss": 1.0123, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.600529699864803e-06, |
|
"loss": 1.0216, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.59613213135247e-06, |
|
"loss": 0.9955, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.59171261569853e-06, |
|
"loss": 0.9794, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.587271199176654e-06, |
|
"loss": 1.0098, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.58280792828983e-06, |
|
"loss": 1.0176, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.57832284976986e-06, |
|
"loss": 1.0104, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.5738160105768815e-06, |
|
"loss": 1.0023, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.569287457898874e-06, |
|
"loss": 1.0333, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.564737239151164e-06, |
|
"loss": 0.9876, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.560165401975925e-06, |
|
"loss": 0.9966, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.555571994241685e-06, |
|
"loss": 1.0267, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.550957064042821e-06, |
|
"loss": 1.0371, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.546320659699059e-06, |
|
"loss": 0.9998, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.541662829754963e-06, |
|
"loss": 1.0174, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.536983622979429e-06, |
|
"loss": 0.99, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.532283088365179e-06, |
|
"loss": 1.025, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_bleu": 7.7648, |
|
"eval_gen_len": 18.9805, |
|
"eval_loss": 0.9337242245674133, |
|
"eval_runtime": 963.8615, |
|
"eval_samples_per_second": 2.071, |
|
"eval_steps_per_second": 1.035, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.527561275128241e-06, |
|
"loss": 1.03, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.522818232707435e-06, |
|
"loss": 1.0015, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.5180540107638634e-06, |
|
"loss": 0.9622, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.513268659180377e-06, |
|
"loss": 1.0272, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.508462228061065e-06, |
|
"loss": 0.994, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.503634767730724e-06, |
|
"loss": 0.9826, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.498786328734336e-06, |
|
"loss": 0.9823, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.49391696183653e-06, |
|
"loss": 0.9975, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.489026718021061e-06, |
|
"loss": 0.9776, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.484115648490271e-06, |
|
"loss": 0.9798, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.4791838046645545e-06, |
|
"loss": 0.9634, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.474231238181817e-06, |
|
"loss": 1.0174, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.469258000896936e-06, |
|
"loss": 0.9762, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.464264144881221e-06, |
|
"loss": 1.0287, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.459249722421866e-06, |
|
"loss": 0.977, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.454214786021399e-06, |
|
"loss": 0.9885, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.449159388397138e-06, |
|
"loss": 1.0286, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.4440835824806364e-06, |
|
"loss": 0.9907, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.438987421417126e-06, |
|
"loss": 0.9827, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.433870958564965e-06, |
|
"loss": 0.9911, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.428734247495077e-06, |
|
"loss": 1.0034, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.423577341990392e-06, |
|
"loss": 0.9539, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.41840029604528e-06, |
|
"loss": 1.0161, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.413203163864988e-06, |
|
"loss": 1.0044, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.407985999865072e-06, |
|
"loss": 0.9767, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.4027488586708274e-06, |
|
"loss": 0.9824, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.397491795116719e-06, |
|
"loss": 0.9905, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.392214864245801e-06, |
|
"loss": 0.9632, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.386918121309147e-06, |
|
"loss": 0.9724, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.381601621765267e-06, |
|
"loss": 1.0322, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.376265421279532e-06, |
|
"loss": 1.0209, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.3709095757235835e-06, |
|
"loss": 0.9452, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.365534141174756e-06, |
|
"loss": 0.9999, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.360139173915486e-06, |
|
"loss": 0.9786, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.3547247304327234e-06, |
|
"loss": 1.0074, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.34929086741734e-06, |
|
"loss": 0.9756, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.343837641763535e-06, |
|
"loss": 0.9844, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.338365110568242e-06, |
|
"loss": 1.0181, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.332873331130531e-06, |
|
"loss": 0.9706, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.3273623609509996e-06, |
|
"loss": 0.9749, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.321832257731189e-06, |
|
"loss": 0.969, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.316283079372959e-06, |
|
"loss": 1.0083, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.3107148839779e-06, |
|
"loss": 0.9969, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.305127729846711e-06, |
|
"loss": 0.9863, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.299521675478598e-06, |
|
"loss": 0.9646, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.293896779570656e-06, |
|
"loss": 0.9704, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.288253101017259e-06, |
|
"loss": 0.949, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.282590698909439e-06, |
|
"loss": 0.9667, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.276909632534269e-06, |
|
"loss": 0.9565, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.271209961374246e-06, |
|
"loss": 0.9733, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_bleu": 7.9496, |
|
"eval_gen_len": 18.9815, |
|
"eval_loss": 0.9227670431137085, |
|
"eval_runtime": 961.6825, |
|
"eval_samples_per_second": 2.076, |
|
"eval_steps_per_second": 1.038, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.26549174510666e-06, |
|
"loss": 0.9857, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.259755043602978e-06, |
|
"loss": 0.9431, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.253999916928211e-06, |
|
"loss": 0.9731, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.248226425340288e-06, |
|
"loss": 0.9833, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.242434629289421e-06, |
|
"loss": 0.9582, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.236624589417482e-06, |
|
"loss": 1.002, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.230796366557354e-06, |
|
"loss": 0.9598, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.224950021732307e-06, |
|
"loss": 0.9576, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.21908561615535e-06, |
|
"loss": 1.0025, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.213203211228596e-06, |
|
"loss": 0.9746, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.2073028685426146e-06, |
|
"loss": 0.9419, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.20138464987579e-06, |
|
"loss": 0.9092, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.195448617193676e-06, |
|
"loss": 0.9345, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.189494832648339e-06, |
|
"loss": 0.9613, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.183523358577716e-06, |
|
"loss": 0.9596, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.177534257504961e-06, |
|
"loss": 0.9412, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.171527592137783e-06, |
|
"loss": 0.9418, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.1655034253678e-06, |
|
"loss": 0.9166, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.15946182026987e-06, |
|
"loss": 0.9483, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.153402840101438e-06, |
|
"loss": 0.9604, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.1473265483018735e-06, |
|
"loss": 0.928, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.141233008491797e-06, |
|
"loss": 0.953, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.1351222844724305e-06, |
|
"loss": 0.935, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.128994440224912e-06, |
|
"loss": 0.9269, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.122849539909637e-06, |
|
"loss": 0.9423, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.1166876478655835e-06, |
|
"loss": 0.9402, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.110508828609638e-06, |
|
"loss": 0.9472, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.1043131468359155e-06, |
|
"loss": 0.9457, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.098100667415095e-06, |
|
"loss": 0.8999, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.091871455393725e-06, |
|
"loss": 0.9388, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.0856255759935515e-06, |
|
"loss": 0.9416, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.079363094610836e-06, |
|
"loss": 0.98, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.0730840768156625e-06, |
|
"loss": 0.9424, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.066788588351261e-06, |
|
"loss": 0.9192, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.0604766951333105e-06, |
|
"loss": 0.9188, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.054148463249257e-06, |
|
"loss": 0.9636, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.0478039589576146e-06, |
|
"loss": 0.9716, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.041443248687273e-06, |
|
"loss": 0.955, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.035066399036807e-06, |
|
"loss": 0.9519, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.028673476773774e-06, |
|
"loss": 0.9069, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.022264548834016e-06, |
|
"loss": 0.9085, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.015839682320959e-06, |
|
"loss": 0.9313, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.0093989445049135e-06, |
|
"loss": 0.9648, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.002942402822364e-06, |
|
"loss": 0.9398, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.9964701248752665e-06, |
|
"loss": 0.9305, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.989982178430345e-06, |
|
"loss": 0.9475, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.983478631418372e-06, |
|
"loss": 0.9448, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.976959551933464e-06, |
|
"loss": 0.9754, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.970425008232369e-06, |
|
"loss": 0.9564, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.96387506873375e-06, |
|
"loss": 0.9035, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_bleu": 7.689, |
|
"eval_gen_len": 18.9795, |
|
"eval_loss": 0.9161636233329773, |
|
"eval_runtime": 963.6243, |
|
"eval_samples_per_second": 2.071, |
|
"eval_steps_per_second": 1.036, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.957309802017466e-06, |
|
"loss": 0.9898, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.95072927682386e-06, |
|
"loss": 0.8963, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.944133562053033e-06, |
|
"loss": 0.9179, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.937522726764128e-06, |
|
"loss": 0.9368, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.930896840174603e-06, |
|
"loss": 0.9281, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.924255971659506e-06, |
|
"loss": 0.9706, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.9176001907507546e-06, |
|
"loss": 0.9401, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.910929567136401e-06, |
|
"loss": 0.9185, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.904244170659904e-06, |
|
"loss": 0.9406, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.8975440713194016e-06, |
|
"loss": 0.923, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.890829339266973e-06, |
|
"loss": 0.9427, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.884100044807907e-06, |
|
"loss": 0.9471, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.877356258399967e-06, |
|
"loss": 0.9367, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.870598050652648e-06, |
|
"loss": 0.9063, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.863825492326444e-06, |
|
"loss": 0.9604, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.857038654332102e-06, |
|
"loss": 0.8888, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.850237607729882e-06, |
|
"loss": 0.9494, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.8434224237288134e-06, |
|
"loss": 0.8925, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.836593173685946e-06, |
|
"loss": 0.9473, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.829749929105609e-06, |
|
"loss": 0.9236, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.822892761638656e-06, |
|
"loss": 0.9083, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.816021743081717e-06, |
|
"loss": 0.9135, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.8091369453764504e-06, |
|
"loss": 0.9526, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.8022384406087824e-06, |
|
"loss": 0.9345, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.79532630100816e-06, |
|
"loss": 0.9176, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.7884005989467866e-06, |
|
"loss": 0.9467, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.781461406938874e-06, |
|
"loss": 0.9271, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.774508797639874e-06, |
|
"loss": 0.9588, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.7675428438457234e-06, |
|
"loss": 0.9201, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.760563618492079e-06, |
|
"loss": 0.9285, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.7535711946535552e-06, |
|
"loss": 0.9484, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.746565645542958e-06, |
|
"loss": 0.9253, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.739547044510521e-06, |
|
"loss": 0.9179, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.7325154650431317e-06, |
|
"loss": 0.9143, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.725470980763569e-06, |
|
"loss": 0.9382, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.718413665429729e-06, |
|
"loss": 0.9533, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.711343592933851e-06, |
|
"loss": 0.9214, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.704260837301746e-06, |
|
"loss": 0.8943, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.6971654726920243e-06, |
|
"loss": 0.9063, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.690057573395311e-06, |
|
"loss": 0.8965, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.6829372138334763e-06, |
|
"loss": 0.9241, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.6758044685588547e-06, |
|
"loss": 0.9644, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.668659412253458e-06, |
|
"loss": 0.9391, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.661502119728203e-06, |
|
"loss": 0.9273, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.6543326659221213e-06, |
|
"loss": 0.9354, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.6471511259015764e-06, |
|
"loss": 0.9162, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.6399575748594796e-06, |
|
"loss": 0.9281, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.6327520881145002e-06, |
|
"loss": 0.8999, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.6255347411102777e-06, |
|
"loss": 0.909, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.6183056094146333e-06, |
|
"loss": 0.9386, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_bleu": 7.6781, |
|
"eval_gen_len": 18.9825, |
|
"eval_loss": 0.9038894176483154, |
|
"eval_runtime": 963.1893, |
|
"eval_samples_per_second": 2.072, |
|
"eval_steps_per_second": 1.036, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.611064768718777e-06, |
|
"loss": 0.8965, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.603812294836515e-06, |
|
"loss": 0.9717, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.5965482637034567e-06, |
|
"loss": 0.8826, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.58927275137622e-06, |
|
"loss": 0.9187, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.581985834031635e-06, |
|
"loss": 0.888, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.5746875879659426e-06, |
|
"loss": 0.9339, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.5673780895940034e-06, |
|
"loss": 0.9291, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.56005741544849e-06, |
|
"loss": 0.9303, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.5527256421790902e-06, |
|
"loss": 0.9123, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.5453828465517e-06, |
|
"loss": 0.9037, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.538029105447628e-06, |
|
"loss": 0.9447, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.530664495862782e-06, |
|
"loss": 0.9011, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.523289094906865e-06, |
|
"loss": 0.8897, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.5159029798025717e-06, |
|
"loss": 0.9278, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.5085062278847765e-06, |
|
"loss": 0.9204, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.5010989165997227e-06, |
|
"loss": 0.9185, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.4936811235042158e-06, |
|
"loss": 0.9067, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.4862529262648076e-06, |
|
"loss": 0.9469, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.4788144026569846e-06, |
|
"loss": 0.942, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.4713656305643543e-06, |
|
"loss": 0.9493, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.4639066879778278e-06, |
|
"loss": 0.9285, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.4564376529948045e-06, |
|
"loss": 0.9242, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.4489586038183564e-06, |
|
"loss": 0.9045, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.4414696187564035e-06, |
|
"loss": 0.9233, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.4339707762209006e-06, |
|
"loss": 0.896, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.426462154727012e-06, |
|
"loss": 0.9072, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.418943832892291e-06, |
|
"loss": 0.9419, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.411415889435856e-06, |
|
"loss": 0.8977, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.403878403177567e-06, |
|
"loss": 0.8837, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.3963314530372e-06, |
|
"loss": 0.9148, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.388775118033621e-06, |
|
"loss": 0.9005, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.381209477283957e-06, |
|
"loss": 0.8907, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.3736346100027717e-06, |
|
"loss": 0.9064, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.3660505955012308e-06, |
|
"loss": 0.9026, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.3584575131862757e-06, |
|
"loss": 0.9501, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.3508554425597896e-06, |
|
"loss": 0.9016, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.3432444632177663e-06, |
|
"loss": 0.9159, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.335624654849477e-06, |
|
"loss": 0.8766, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.327996097236636e-06, |
|
"loss": 0.9125, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.320358870252563e-06, |
|
"loss": 0.8931, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.3127130538613506e-06, |
|
"loss": 0.9088, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.3050587281170245e-06, |
|
"loss": 0.9427, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.297395973162705e-06, |
|
"loss": 0.9042, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.2897248692297678e-06, |
|
"loss": 0.9094, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.2820454966370102e-06, |
|
"loss": 0.9384, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.2743579357897997e-06, |
|
"loss": 0.888, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.266662267179238e-06, |
|
"loss": 0.9208, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.25895857138132e-06, |
|
"loss": 0.9304, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.2512469290560848e-06, |
|
"loss": 0.9346, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.2435274209467765e-06, |
|
"loss": 0.9073, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_bleu": 7.8607, |
|
"eval_gen_len": 18.9805, |
|
"eval_loss": 0.8985511064529419, |
|
"eval_runtime": 966.2977, |
|
"eval_samples_per_second": 2.066, |
|
"eval_steps_per_second": 1.033, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.235800127878995e-06, |
|
"loss": 0.8804, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.228065130759852e-06, |
|
"loss": 0.9301, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.220322510577121e-06, |
|
"loss": 0.9109, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.2125723483983935e-06, |
|
"loss": 0.9029, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.204814725370227e-06, |
|
"loss": 0.9089, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.1970497227172957e-06, |
|
"loss": 0.9184, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.1892774217415433e-06, |
|
"loss": 0.894, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.181497903821326e-06, |
|
"loss": 0.905, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.1737112504105655e-06, |
|
"loss": 0.889, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.165917543037894e-06, |
|
"loss": 0.8977, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.1581168633058002e-06, |
|
"loss": 0.9111, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.150309292889776e-06, |
|
"loss": 0.9073, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.14249491353746e-06, |
|
"loss": 0.9229, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.134673807067784e-06, |
|
"loss": 0.8961, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.1268460553701146e-06, |
|
"loss": 0.8976, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.1190117404033943e-06, |
|
"loss": 0.9186, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.111170944195286e-06, |
|
"loss": 0.8826, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.1033237488413144e-06, |
|
"loss": 0.9097, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.095470236504003e-06, |
|
"loss": 0.8805, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.0876104894120164e-06, |
|
"loss": 0.8718, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.0797445898593007e-06, |
|
"loss": 0.9029, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.07187262020422e-06, |
|
"loss": 0.9297, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.0639946628686913e-06, |
|
"loss": 0.8916, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.0561108003373275e-06, |
|
"loss": 0.9609, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.0482211151565693e-06, |
|
"loss": 0.8926, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.0403256899338236e-06, |
|
"loss": 0.8811, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.032424607336595e-06, |
|
"loss": 0.8748, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.0245179500916245e-06, |
|
"loss": 0.9036, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.016605800984021e-06, |
|
"loss": 0.9, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.0086882428563948e-06, |
|
"loss": 0.8919, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.0007653586079884e-06, |
|
"loss": 0.924, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.9928372311938134e-06, |
|
"loss": 0.9289, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.984903943623779e-06, |
|
"loss": 0.8922, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.9769655789618185e-06, |
|
"loss": 0.904, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.9690222203250286e-06, |
|
"loss": 0.9239, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.961073950882793e-06, |
|
"loss": 0.9248, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.9531208538559114e-06, |
|
"loss": 0.8867, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.945163012515732e-06, |
|
"loss": 0.8872, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.9372005101832767e-06, |
|
"loss": 0.9082, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.9292334302283683e-06, |
|
"loss": 0.8745, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.9212618560687604e-06, |
|
"loss": 0.8917, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.9132858711692607e-06, |
|
"loss": 0.8771, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.9053055590408603e-06, |
|
"loss": 0.8999, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.8973210032398567e-06, |
|
"loss": 0.8902, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.88933228736698e-06, |
|
"loss": 0.8912, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.881339495066518e-06, |
|
"loss": 0.8482, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.8733427100254383e-06, |
|
"loss": 0.9028, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.8653420159725166e-06, |
|
"loss": 0.8881, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.8573374966774546e-06, |
|
"loss": 0.8913, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.849329235950007e-06, |
|
"loss": 0.8928, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_bleu": 8.0666, |
|
"eval_gen_len": 18.981, |
|
"eval_loss": 0.8941593170166016, |
|
"eval_runtime": 968.3576, |
|
"eval_samples_per_second": 2.061, |
|
"eval_steps_per_second": 1.031, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.8413173176391006e-06, |
|
"loss": 0.8992, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.8333018256319617e-06, |
|
"loss": 0.8777, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.82528284385323e-06, |
|
"loss": 0.8709, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.817260456264086e-06, |
|
"loss": 0.9326, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.809234746861372e-06, |
|
"loss": 0.9006, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.801205799676709e-06, |
|
"loss": 0.929, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.7931736987756165e-06, |
|
"loss": 0.9161, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.7851385282566372e-06, |
|
"loss": 0.908, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.7771003722504534e-06, |
|
"loss": 0.9008, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.769059314919006e-06, |
|
"loss": 0.9195, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.7610154404546136e-06, |
|
"loss": 0.8826, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.752968833079089e-06, |
|
"loss": 0.858, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.744919577042863e-06, |
|
"loss": 0.9305, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.7368677566240976e-06, |
|
"loss": 0.9289, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.7288134561278017e-06, |
|
"loss": 0.9168, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.720756759884956e-06, |
|
"loss": 0.8958, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.7126977522516223e-06, |
|
"loss": 0.8924, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.7046365176080635e-06, |
|
"loss": 0.9235, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.6965731403578614e-06, |
|
"loss": 0.9171, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6885077049270316e-06, |
|
"loss": 0.9159, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6804402957631364e-06, |
|
"loss": 0.8949, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6723709973344088e-06, |
|
"loss": 0.8796, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6642998941288573e-06, |
|
"loss": 0.8943, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.6562270706533917e-06, |
|
"loss": 0.8714, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.6481526114329313e-06, |
|
"loss": 0.8647, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.640076601009522e-06, |
|
"loss": 0.9151, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.631999123941452e-06, |
|
"loss": 0.8942, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.6239202648023666e-06, |
|
"loss": 0.8863, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.6158401081803784e-06, |
|
"loss": 0.9138, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.6077587386771896e-06, |
|
"loss": 0.9095, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5996762409071978e-06, |
|
"loss": 0.9049, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.591592699496616e-06, |
|
"loss": 0.8793, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.583508199082585e-06, |
|
"loss": 0.864, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.575422824312284e-06, |
|
"loss": 0.9173, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5673366598420487e-06, |
|
"loss": 0.8799, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5592497903364834e-06, |
|
"loss": 0.8751, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5511623004675743e-06, |
|
"loss": 0.8871, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5430742749138015e-06, |
|
"loss": 0.9121, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.534985798359257e-06, |
|
"loss": 0.8947, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5268969554927512e-06, |
|
"loss": 0.8911, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5188078310069326e-06, |
|
"loss": 0.8626, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5107185095973967e-06, |
|
"loss": 0.8943, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5026290759618026e-06, |
|
"loss": 0.9163, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.494539614798982e-06, |
|
"loss": 0.8599, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.486450210808057e-06, |
|
"loss": 0.8907, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4783609486875507e-06, |
|
"loss": 0.889, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4702719131345003e-06, |
|
"loss": 0.8872, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.462183188843569e-06, |
|
"loss": 0.8836, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4540948605061652e-06, |
|
"loss": 0.8837, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.446007012809548e-06, |
|
"loss": 0.884, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_bleu": 8.1679, |
|
"eval_gen_len": 18.9785, |
|
"eval_loss": 0.8873680830001831, |
|
"eval_runtime": 968.5261, |
|
"eval_samples_per_second": 2.061, |
|
"eval_steps_per_second": 1.03, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.437919730435946e-06, |
|
"loss": 0.8902, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4298330980616674e-06, |
|
"loss": 0.8499, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4217472003562144e-06, |
|
"loss": 0.8927, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.413662121981399e-06, |
|
"loss": 0.9325, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4055779475904536e-06, |
|
"loss": 0.8586, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.397494761827145e-06, |
|
"loss": 0.8788, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.3894126493248884e-06, |
|
"loss": 0.9161, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.3813316947058634e-06, |
|
"loss": 0.9058, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.373251982580124e-06, |
|
"loss": 0.8881, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.365173597544718e-06, |
|
"loss": 0.9027, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.3570966241827947e-06, |
|
"loss": 0.914, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.3490211470627254e-06, |
|
"loss": 0.8989, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.3409472507372134e-06, |
|
"loss": 0.8792, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3328750197424115e-06, |
|
"loss": 0.8651, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3248045385970357e-06, |
|
"loss": 0.9071, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.316735891801482e-06, |
|
"loss": 0.8676, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.308669163836938e-06, |
|
"loss": 0.9145, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.300604439164501e-06, |
|
"loss": 0.9072, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2925418022242955e-06, |
|
"loss": 0.8916, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2844813374345837e-06, |
|
"loss": 0.872, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.2764231291908847e-06, |
|
"loss": 0.8983, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.2683672618650945e-06, |
|
"loss": 0.8939, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.2603138198045966e-06, |
|
"loss": 0.9226, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.2522628873313806e-06, |
|
"loss": 0.8937, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.2442145487411605e-06, |
|
"loss": 0.8331, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.2361688883024912e-06, |
|
"loss": 0.8817, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.228125990255889e-06, |
|
"loss": 0.875, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.2200859388129447e-06, |
|
"loss": 0.9098, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.2120488181554433e-06, |
|
"loss": 0.8639, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.2040147124344864e-06, |
|
"loss": 0.89, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.195983705769607e-06, |
|
"loss": 0.8963, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1879558822478883e-06, |
|
"loss": 0.9029, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1799313259230894e-06, |
|
"loss": 0.8764, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1719101208147557e-06, |
|
"loss": 0.8964, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.163892350907349e-06, |
|
"loss": 0.9076, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.1558781001493604e-06, |
|
"loss": 0.8936, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.147867452452435e-06, |
|
"loss": 0.8659, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.1398604916904923e-06, |
|
"loss": 0.897, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.131857301698852e-06, |
|
"loss": 0.8884, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.123857966273348e-06, |
|
"loss": 0.8893, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.115862569169458e-06, |
|
"loss": 0.9308, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.1078711941014242e-06, |
|
"loss": 0.8899, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.099883924741376e-06, |
|
"loss": 0.8964, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0919008447184562e-06, |
|
"loss": 0.9132, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.083922037617943e-06, |
|
"loss": 0.9075, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.075947586980376e-06, |
|
"loss": 0.8968, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.067977576300682e-06, |
|
"loss": 0.8683, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0600120890272976e-06, |
|
"loss": 0.8596, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0520512085613e-06, |
|
"loss": 0.8863, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0440950182555337e-06, |
|
"loss": 0.8786, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_bleu": 7.8516, |
|
"eval_gen_len": 18.9805, |
|
"eval_loss": 0.8830544948577881, |
|
"eval_runtime": 968.5663, |
|
"eval_samples_per_second": 2.061, |
|
"eval_steps_per_second": 1.03, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0361436014137315e-06, |
|
"loss": 0.8911, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.028197041289649e-06, |
|
"loss": 0.9172, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0202554210861906e-06, |
|
"loss": 0.8468, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0123188239545375e-06, |
|
"loss": 0.8445, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0043873329932774e-06, |
|
"loss": 0.8917, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.996461031247536e-06, |
|
"loss": 0.8759, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9885400017081075e-06, |
|
"loss": 0.9045, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9806243273105807e-06, |
|
"loss": 0.8706, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9727140909344767e-06, |
|
"loss": 0.8973, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9648093754023784e-06, |
|
"loss": 0.8648, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.956910263479066e-06, |
|
"loss": 0.8774, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9490168378706456e-06, |
|
"loss": 0.9013, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9411291812236855e-06, |
|
"loss": 0.9092, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9332473761243532e-06, |
|
"loss": 0.8628, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.925371505097548e-06, |
|
"loss": 0.8867, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9175016506060357e-06, |
|
"loss": 0.8694, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9096378950495915e-06, |
|
"loss": 0.8613, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9017803207641282e-06, |
|
"loss": 0.8966, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8939290100208425e-06, |
|
"loss": 0.8381, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8860840450253467e-06, |
|
"loss": 0.866, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8782455079168144e-06, |
|
"loss": 0.9077, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8704134807671138e-06, |
|
"loss": 0.908, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8625880455799562e-06, |
|
"loss": 0.8655, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8547692842900283e-06, |
|
"loss": 0.8985, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8469572787621426e-06, |
|
"loss": 0.8852, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8391521107903747e-06, |
|
"loss": 0.8918, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8313538620972094e-06, |
|
"loss": 0.8765, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8235626143326865e-06, |
|
"loss": 0.8762, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8157784490735404e-06, |
|
"loss": 0.9053, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8080014478223523e-06, |
|
"loss": 0.8811, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8002316920066932e-06, |
|
"loss": 0.8794, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.792469262978271e-06, |
|
"loss": 0.8885, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.7847142420120815e-06, |
|
"loss": 0.8312, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.7769667103055564e-06, |
|
"loss": 0.856, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.7692267489777104e-06, |
|
"loss": 0.8706, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.761494439068295e-06, |
|
"loss": 0.873, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.7537698615369504e-06, |
|
"loss": 0.8709, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.7460530972623537e-06, |
|
"loss": 0.8237, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.738344227041376e-06, |
|
"loss": 0.8571, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7306433315882372e-06, |
|
"loss": 0.8635, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7229504915336574e-06, |
|
"loss": 0.8814, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.715265787424013e-06, |
|
"loss": 0.8667, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7075892997204958e-06, |
|
"loss": 0.8739, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.6999211087982686e-06, |
|
"loss": 0.8479, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.6922612949456274e-06, |
|
"loss": 0.8878, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.6846099383631537e-06, |
|
"loss": 0.8956, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.6769671191628807e-06, |
|
"loss": 0.8428, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.6693329173674521e-06, |
|
"loss": 0.8344, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.6617074129092857e-06, |
|
"loss": 0.8504, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.6540906856297336e-06, |
|
"loss": 0.8899, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_bleu": 7.9392, |
|
"eval_gen_len": 18.9785, |
|
"eval_loss": 0.8788951635360718, |
|
"eval_runtime": 967.0825, |
|
"eval_samples_per_second": 2.064, |
|
"eval_steps_per_second": 1.032, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.6464828152782508e-06, |
|
"loss": 0.8444, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.638883881511556e-06, |
|
"loss": 0.8604, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.6312939638927994e-06, |
|
"loss": 0.8953, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.623713141890728e-06, |
|
"loss": 0.8298, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.6161414948788575e-06, |
|
"loss": 0.8403, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.6085791021346365e-06, |
|
"loss": 0.8284, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.6010260428386205e-06, |
|
"loss": 0.8546, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.5934823960736402e-06, |
|
"loss": 0.8742, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.5859482408239718e-06, |
|
"loss": 0.8856, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.5784236559745175e-06, |
|
"loss": 0.8231, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.5709087203099687e-06, |
|
"loss": 0.8579, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.5634035125139923e-06, |
|
"loss": 0.8614, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.5559081111683977e-06, |
|
"loss": 0.8646, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.5484225947523201e-06, |
|
"loss": 0.8503, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.5409470416413943e-06, |
|
"loss": 0.8683, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.5334815301069374e-06, |
|
"loss": 0.8306, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.526026138315128e-06, |
|
"loss": 0.8103, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.5185809443261897e-06, |
|
"loss": 0.8605, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.5111460260935695e-06, |
|
"loss": 0.833, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.5037214614631234e-06, |
|
"loss": 0.9071, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.496307328172306e-06, |
|
"loss": 0.854, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.4889037038493488e-06, |
|
"loss": 0.8733, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.4815106660124517e-06, |
|
"loss": 0.8441, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.4741282920689736e-06, |
|
"loss": 0.874, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.4667566593146167e-06, |
|
"loss": 0.8514, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.45939584493262e-06, |
|
"loss": 0.8502, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.4520459259929527e-06, |
|
"loss": 0.8563, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.4447069794515e-06, |
|
"loss": 0.8676, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.437379082149271e-06, |
|
"loss": 0.8202, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.4300623108115793e-06, |
|
"loss": 0.8378, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.4227567420472487e-06, |
|
"loss": 0.8805, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.4154624523478095e-06, |
|
"loss": 0.8568, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.408179518086694e-06, |
|
"loss": 0.8617, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.4009080155184407e-06, |
|
"loss": 0.8427, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.393648020777899e-06, |
|
"loss": 0.8675, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.3863996098794213e-06, |
|
"loss": 0.8523, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.3791628587160768e-06, |
|
"loss": 0.8456, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.3719378430588553e-06, |
|
"loss": 0.8492, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.3647246385558682e-06, |
|
"loss": 0.8459, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.3575233207315635e-06, |
|
"loss": 0.8667, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.3503339649859315e-06, |
|
"loss": 0.8342, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.3431566465937163e-06, |
|
"loss": 0.8684, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.3359914407036267e-06, |
|
"loss": 0.8315, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.3288384223375487e-06, |
|
"loss": 0.8773, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.3216976663897622e-06, |
|
"loss": 0.8355, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.3145692476261587e-06, |
|
"loss": 0.8641, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.3074532406834505e-06, |
|
"loss": 0.8655, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.300349720068398e-06, |
|
"loss": 0.8502, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.2932587601570245e-06, |
|
"loss": 0.8529, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.286180435193839e-06, |
|
"loss": 0.8638, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_bleu": 8.1623, |
|
"eval_gen_len": 18.979, |
|
"eval_loss": 0.878086507320404, |
|
"eval_runtime": 964.1689, |
|
"eval_samples_per_second": 2.07, |
|
"eval_steps_per_second": 1.035, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.2791148192910586e-06, |
|
"loss": 0.8571, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.2720619864278338e-06, |
|
"loss": 0.8588, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.2650220104494714e-06, |
|
"loss": 0.8413, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.2579949650666648e-06, |
|
"loss": 0.8627, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.2509809238547165e-06, |
|
"loss": 0.8522, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.2439799602527741e-06, |
|
"loss": 0.8369, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.2369921475630586e-06, |
|
"loss": 0.8676, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.230017558950096e-06, |
|
"loss": 0.8744, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.2230562674399538e-06, |
|
"loss": 0.8681, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.2161083459194714e-06, |
|
"loss": 0.8504, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.2091738671355039e-06, |
|
"loss": 0.8568, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.2022529036941546e-06, |
|
"loss": 0.862, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.1953455280600188e-06, |
|
"loss": 0.8509, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.188451812555422e-06, |
|
"loss": 0.8718, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.1815718293596653e-06, |
|
"loss": 0.877, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.1747056505082658e-06, |
|
"loss": 0.8373, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.1678533478922075e-06, |
|
"loss": 0.8603, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.1610149932571847e-06, |
|
"loss": 0.8459, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.1541906582028526e-06, |
|
"loss": 0.8692, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.1473804141820783e-06, |
|
"loss": 0.8508, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.1405843325001878e-06, |
|
"loss": 0.8799, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.1338024843142265e-06, |
|
"loss": 0.8758, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.1270349406322109e-06, |
|
"loss": 0.8531, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.1202817723123807e-06, |
|
"loss": 0.8452, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.1135430500624675e-06, |
|
"loss": 0.9195, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.1068188444389444e-06, |
|
"loss": 0.8824, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.10010922584629e-06, |
|
"loss": 0.8379, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.0934142645362547e-06, |
|
"loss": 0.8654, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.0867340306071228e-06, |
|
"loss": 0.8671, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.0800685940029742e-06, |
|
"loss": 0.8641, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.073418024512964e-06, |
|
"loss": 0.8477, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.066782391770576e-06, |
|
"loss": 0.8477, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.0601617652529066e-06, |
|
"loss": 0.8356, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.0535562142799325e-06, |
|
"loss": 0.8803, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.0469658080137801e-06, |
|
"loss": 0.8187, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.0403906154580127e-06, |
|
"loss": 0.8548, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.0338307054568977e-06, |
|
"loss": 0.8409, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.027286146694689e-06, |
|
"loss": 0.8734, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.0207570076949099e-06, |
|
"loss": 0.858, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.0142433568196347e-06, |
|
"loss": 0.8373, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.007745262268768e-06, |
|
"loss": 0.8376, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.0012627920793424e-06, |
|
"loss": 0.8204, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 9.947960141247918e-07, |
|
"loss": 0.8247, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 9.883449961142504e-07, |
|
"loss": 0.8753, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 9.819098055918424e-07, |
|
"loss": 0.8703, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 9.7549050993597e-07, |
|
"loss": 0.823, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 9.690871763586137e-07, |
|
"loss": 0.8877, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 9.626998719046263e-07, |
|
"loss": 0.8174, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 9.563286634510293e-07, |
|
"loss": 0.8523, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 9.499736177063165e-07, |
|
"loss": 0.8293, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_bleu": 8.0989, |
|
"eval_gen_len": 18.98, |
|
"eval_loss": 0.8752478361129761, |
|
"eval_runtime": 964.0098, |
|
"eval_samples_per_second": 2.071, |
|
"eval_steps_per_second": 1.035, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 9.436348012097496e-07, |
|
"loss": 0.8633, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 9.373122803306672e-07, |
|
"loss": 0.8656, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 9.31006121267791e-07, |
|
"loss": 0.8289, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.247163900485232e-07, |
|
"loss": 0.8325, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.184431525282659e-07, |
|
"loss": 0.8779, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.121864743897266e-07, |
|
"loss": 0.8456, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.059464211422286e-07, |
|
"loss": 0.8431, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.9972305812103e-07, |
|
"loss": 0.8287, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.935164504866367e-07, |
|
"loss": 0.842, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.873266632241201e-07, |
|
"loss": 0.8748, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.811537611424383e-07, |
|
"loss": 0.8652, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.749978088737541e-07, |
|
"loss": 0.8327, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.688588708727621e-07, |
|
"loss": 0.8433, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.627370114160133e-07, |
|
"loss": 0.861, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.566322946012389e-07, |
|
"loss": 0.852, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.505447843466836e-07, |
|
"loss": 0.8334, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.444745443904337e-07, |
|
"loss": 0.8509, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.384216382897476e-07, |
|
"loss": 0.8272, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.323861294203964e-07, |
|
"loss": 0.7921, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.263680809759955e-07, |
|
"loss": 0.8153, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.203675559673441e-07, |
|
"loss": 0.8168, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.143846172217671e-07, |
|
"loss": 0.872, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.084193273824531e-07, |
|
"loss": 0.8519, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.024717489078032e-07, |
|
"loss": 0.8594, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.965419440707756e-07, |
|
"loss": 0.8445, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.90629974958232e-07, |
|
"loss": 0.8429, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.84735903470289e-07, |
|
"loss": 0.819, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.788597913196702e-07, |
|
"loss": 0.829, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.730017000310575e-07, |
|
"loss": 0.8163, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.671616909404508e-07, |
|
"loss": 0.8699, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.613398251945239e-07, |
|
"loss": 0.8303, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.555361637499833e-07, |
|
"loss": 0.8012, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.49750767372932e-07, |
|
"loss": 0.8178, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.439836966382303e-07, |
|
"loss": 0.847, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.382350119288647e-07, |
|
"loss": 0.8623, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.325047734353155e-07, |
|
"loss": 0.8079, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.267930411549234e-07, |
|
"loss": 0.8337, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.210998748912657e-07, |
|
"loss": 0.8399, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.154253342535248e-07, |
|
"loss": 0.851, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 7.097694786558693e-07, |
|
"loss": 0.8304, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 7.041323673168307e-07, |
|
"loss": 0.846, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.985140592586781e-07, |
|
"loss": 0.8835, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.929146133068102e-07, |
|
"loss": 0.8588, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.873340880891308e-07, |
|
"loss": 0.8275, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.817725420354365e-07, |
|
"loss": 0.8529, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.762300333768082e-07, |
|
"loss": 0.8705, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.707066201450003e-07, |
|
"loss": 0.8591, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.652023601718282e-07, |
|
"loss": 0.8235, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.597173110885732e-07, |
|
"loss": 0.8575, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.542515303253666e-07, |
|
"loss": 0.8625, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_bleu": 8.176, |
|
"eval_gen_len": 18.979, |
|
"eval_loss": 0.8743442296981812, |
|
"eval_runtime": 965.6565, |
|
"eval_samples_per_second": 2.067, |
|
"eval_steps_per_second": 1.033, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.488050751105979e-07, |
|
"loss": 0.8735, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.433780024703124e-07, |
|
"loss": 0.8721, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.379703692276104e-07, |
|
"loss": 0.8554, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.325822320020608e-07, |
|
"loss": 0.8569, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.272136472091008e-07, |
|
"loss": 0.8107, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.218646710594465e-07, |
|
"loss": 0.8671, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.165353595585069e-07, |
|
"loss": 0.8392, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.112257685057973e-07, |
|
"loss": 0.8353, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.059359534943501e-07, |
|
"loss": 0.8244, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.006659699101419e-07, |
|
"loss": 0.8328, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 5.954158729315032e-07, |
|
"loss": 0.8333, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.901857175285488e-07, |
|
"loss": 0.8651, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.849755584625985e-07, |
|
"loss": 0.8671, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.797854502856029e-07, |
|
"loss": 0.8453, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.746154473395752e-07, |
|
"loss": 0.8358, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.694656037560206e-07, |
|
"loss": 0.8291, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.643359734553693e-07, |
|
"loss": 0.8541, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.592266101464122e-07, |
|
"loss": 0.8466, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.541375673257394e-07, |
|
"loss": 0.8221, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.490688982771769e-07, |
|
"loss": 0.8347, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.440206560712352e-07, |
|
"loss": 0.8715, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.389928935645452e-07, |
|
"loss": 0.854, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.339856633993124e-07, |
|
"loss": 0.8747, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.28999018002761e-07, |
|
"loss": 0.8586, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.240330095865856e-07, |
|
"loss": 0.8247, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.190876901464067e-07, |
|
"loss": 0.8357, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.14163111461225e-07, |
|
"loss": 0.847, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.092593250928782e-07, |
|
"loss": 0.8241, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.043763823855036e-07, |
|
"loss": 0.8706, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.995143344649964e-07, |
|
"loss": 0.8625, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.946732322384795e-07, |
|
"loss": 0.8476, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.89853126393767e-07, |
|
"loss": 0.8344, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.850540673988346e-07, |
|
"loss": 0.8456, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.802761055012914e-07, |
|
"loss": 0.8175, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.755192907278536e-07, |
|
"loss": 0.848, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.7078367288381886e-07, |
|
"loss": 0.8592, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.660693015525486e-07, |
|
"loss": 0.82, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.613762260949456e-07, |
|
"loss": 0.8745, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.567044956489394e-07, |
|
"loss": 0.8827, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.520541591289701e-07, |
|
"loss": 0.862, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.4742526522547626e-07, |
|
"loss": 0.8684, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.428178624043866e-07, |
|
"loss": 0.8537, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.382319989066117e-07, |
|
"loss": 0.8451, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.336677227475383e-07, |
|
"loss": 0.8589, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.2912508171652765e-07, |
|
"loss": 0.8102, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.2460412337641504e-07, |
|
"loss": 0.8662, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.2010489506300933e-07, |
|
"loss": 0.8566, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.156274438846017e-07, |
|
"loss": 0.8663, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.11171816721469e-07, |
|
"loss": 0.8125, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.0673806022538425e-07, |
|
"loss": 0.8605, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_bleu": 8.0117, |
|
"eval_gen_len": 18.9805, |
|
"eval_loss": 0.8721033334732056, |
|
"eval_runtime": 963.8746, |
|
"eval_samples_per_second": 2.071, |
|
"eval_steps_per_second": 1.035, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.023262208191284e-07, |
|
"loss": 0.8296, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.9793634469600216e-07, |
|
"loss": 0.83, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.9356847781934575e-07, |
|
"loss": 0.8432, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.892226659220552e-07, |
|
"loss": 0.8396, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.8489895450610407e-07, |
|
"loss": 0.8742, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.8059738884206775e-07, |
|
"loss": 0.8124, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.7631801396864757e-07, |
|
"loss": 0.8238, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.7206087469220195e-07, |
|
"loss": 0.8443, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.6782601558627563e-07, |
|
"loss": 0.8567, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.6361348099113123e-07, |
|
"loss": 0.8406, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.5942331501329003e-07, |
|
"loss": 0.8667, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.552555615250658e-07, |
|
"loss": 0.8206, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.511102641641051e-07, |
|
"loss": 0.8545, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.469874663329342e-07, |
|
"loss": 0.8578, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.428872111985021e-07, |
|
"loss": 0.8555, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.388095416917267e-07, |
|
"loss": 0.8161, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.3475450050705125e-07, |
|
"loss": 0.8234, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.3072213010199053e-07, |
|
"loss": 0.8342, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.267124726966903e-07, |
|
"loss": 0.8458, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.2272557027348524e-07, |
|
"loss": 0.8573, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.187614645764564e-07, |
|
"loss": 0.8544, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.1482019711099735e-07, |
|
"loss": 0.8455, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.109018091433802e-07, |
|
"loss": 0.847, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.07006341700318e-07, |
|
"loss": 0.8277, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.031338355685418e-07, |
|
"loss": 0.8297, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.992843312943702e-07, |
|
"loss": 0.8648, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.954578691832835e-07, |
|
"loss": 0.8469, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.9165448929950685e-07, |
|
"loss": 0.8757, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.878742314655844e-07, |
|
"loss": 0.8526, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.8411713526196677e-07, |
|
"loss": 0.8369, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.80383240026596e-07, |
|
"loss": 0.8428, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.766725848544907e-07, |
|
"loss": 0.8428, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7298520859734054e-07, |
|
"loss": 0.8695, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6932114986309874e-07, |
|
"loss": 0.8534, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6568044701557494e-07, |
|
"loss": 0.8515, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6206313817403627e-07, |
|
"loss": 0.8642, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.5846926121280843e-07, |
|
"loss": 0.8184, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.5489885376087626e-07, |
|
"loss": 0.8173, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.5135195320149355e-07, |
|
"loss": 0.85, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.478285966717889e-07, |
|
"loss": 0.858, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.4432882106237786e-07, |
|
"loss": 0.8104, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.408526630169772e-07, |
|
"loss": 0.8469, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.3740015893201906e-07, |
|
"loss": 0.8599, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.3397134495627278e-07, |
|
"loss": 0.874, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.305662569904646e-07, |
|
"loss": 0.8251, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.2718493068690229e-07, |
|
"loss": 0.86, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.2382740144910236e-07, |
|
"loss": 0.8766, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.2049370443141704e-07, |
|
"loss": 0.8541, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.171838745386695e-07, |
|
"loss": 0.8276, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.1389794642578649e-07, |
|
"loss": 0.8479, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_bleu": 8.1008, |
|
"eval_gen_len": 18.978, |
|
"eval_loss": 0.8710653781890869, |
|
"eval_runtime": 966.0685, |
|
"eval_samples_per_second": 2.066, |
|
"eval_steps_per_second": 1.033, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.1063595449743595e-07, |
|
"loss": 0.8244, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.073979329076664e-07, |
|
"loss": 0.8897, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.0418391555955042e-07, |
|
"loss": 0.8269, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.0099393610482688e-07, |
|
"loss": 0.8353, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.9782802794355239e-07, |
|
"loss": 0.8267, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.9468622422374962e-07, |
|
"loss": 0.8484, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.9156855784106004e-07, |
|
"loss": 0.8665, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.8847506143839983e-07, |
|
"loss": 0.8377, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.854057674056181e-07, |
|
"loss": 0.859, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.8236070787915754e-07, |
|
"loss": 0.8135, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.793399147417188e-07, |
|
"loss": 0.8111, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.7634341962192602e-07, |
|
"loss": 0.8239, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.7337125389399496e-07, |
|
"loss": 0.8728, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.7042344867740645e-07, |
|
"loss": 0.8582, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.6750003483657739e-07, |
|
"loss": 0.8271, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.646010429805417e-07, |
|
"loss": 0.8376, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.6172650346262642e-07, |
|
"loss": 0.867, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.5887644638013566e-07, |
|
"loss": 0.8924, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.5605090157403495e-07, |
|
"loss": 0.8466, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.5324989862863792e-07, |
|
"loss": 0.8462, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.5047346687129898e-07, |
|
"loss": 0.8679, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.4772163537210393e-07, |
|
"loss": 0.8395, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.4499443294356541e-07, |
|
"loss": 0.8514, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.422918881403243e-07, |
|
"loss": 0.8532, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.3961402925884744e-07, |
|
"loss": 0.8172, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.36960884337132e-07, |
|
"loss": 0.7917, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.3433248115441362e-07, |
|
"loss": 0.8305, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.3172884723087336e-07, |
|
"loss": 0.8496, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.2915000982735155e-07, |
|
"loss": 0.8685, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.2659599594506106e-07, |
|
"loss": 0.8706, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.2406683232530414e-07, |
|
"loss": 0.8015, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.2156254544919476e-07, |
|
"loss": 0.8639, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.1908316153737858e-07, |
|
"loss": 0.8624, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.1662870654975955e-07, |
|
"loss": 0.8271, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.1419920618522984e-07, |
|
"loss": 0.8196, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.1179468588139702e-07, |
|
"loss": 0.8528, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.0941517081432096e-07, |
|
"loss": 0.8642, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.0706068589824925e-07, |
|
"loss": 0.8515, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.0473125578535526e-07, |
|
"loss": 0.8398, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.0242690486548134e-07, |
|
"loss": 0.8797, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.0014765726588437e-07, |
|
"loss": 0.8537, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 9.789353685097953e-08, |
|
"loss": 0.812, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 9.566456722209432e-08, |
|
"loss": 0.8588, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 9.34607717172195e-08, |
|
"loss": 0.8394, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 9.12821734107644e-08, |
|
"loss": 0.8493, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.91287951133174e-08, |
|
"loss": 0.8376, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.700065937140401e-08, |
|
"loss": 0.832, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.489778846725417e-08, |
|
"loss": 0.8802, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.282020441856637e-08, |
|
"loss": 0.8084, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.076792897827757e-08, |
|
"loss": 0.8391, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_bleu": 8.2041, |
|
"eval_gen_len": 18.9795, |
|
"eval_loss": 0.8708174228668213, |
|
"eval_runtime": 963.8453, |
|
"eval_samples_per_second": 2.071, |
|
"eval_steps_per_second": 1.035, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.874098363433668e-08, |
|
"loss": 0.8315, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.673938960947924e-08, |
|
"loss": 0.8792, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.476316786100336e-08, |
|
"loss": 0.8167, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.281233908055269e-08, |
|
"loss": 0.8536, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.088692369389888e-08, |
|
"loss": 0.8216, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.89869418607264e-08, |
|
"loss": 0.8033, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.711241347442415e-08, |
|
"loss": 0.8184, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.526335816187474e-08, |
|
"loss": 0.847, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.34397952832505e-08, |
|
"loss": 0.8446, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.164174393181038e-08, |
|
"loss": 0.869, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.986922293369834e-08, |
|
"loss": 0.8355, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.812225084774969e-08, |
|
"loss": 0.8527, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.640084596529399e-08, |
|
"loss": 0.8716, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.4705026309964104e-08, |
|
"loss": 0.8185, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.3034809637508846e-08, |
|
"loss": 0.8322, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.139021343560452e-08, |
|
"loss": 0.81, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.977125492367452e-08, |
|
"loss": 0.8391, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.817795105270723e-08, |
|
"loss": 0.8142, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.661031850507924e-08, |
|
"loss": 0.8039, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.5068373694380775e-08, |
|
"loss": 0.828, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.355213276524356e-08, |
|
"loss": 0.8512, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.206161159317129e-08, |
|
"loss": 0.839, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.059682578437474e-08, |
|
"loss": 0.8535, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.915779067560743e-08, |
|
"loss": 0.8548, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.774452133400469e-08, |
|
"loss": 0.8437, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.635703255692735e-08, |
|
"loss": 0.8258, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.4995338871804954e-08, |
|
"loss": 0.8327, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.3659454535985015e-08, |
|
"loss": 0.8453, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.234939353658345e-08, |
|
"loss": 0.8719, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.1065169590337453e-08, |
|
"loss": 0.8516, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.98067961434631e-08, |
|
"loss": 0.8355, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.857428637151327e-08, |
|
"loss": 0.8453, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.7367653179240783e-08, |
|
"loss": 0.8211, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.6186909200462128e-08, |
|
"loss": 0.8567, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.503206679792647e-08, |
|
"loss": 0.8693, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.390313806318545e-08, |
|
"loss": 0.8263, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2800134816466647e-08, |
|
"loss": 0.8622, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.172306860654977e-08, |
|
"loss": 0.8541, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0671950710645928e-08, |
|
"loss": 0.8408, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9646792134279667e-08, |
|
"loss": 0.8284, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.864760361117296e-08, |
|
"loss": 0.8354, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7674395603134442e-08, |
|
"loss": 0.8895, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6727178299948133e-08, |
|
"loss": 0.8532, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.5805961619267396e-08, |
|
"loss": 0.8279, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.491075520651142e-08, |
|
"loss": 0.8415, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4041568434764175e-08, |
|
"loss": 0.8381, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.3198410404675066e-08, |
|
"loss": 0.8357, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.2381289944366492e-08, |
|
"loss": 0.8405, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.1590215609337264e-08, |
|
"loss": 0.8565, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.0825195682377387e-08, |
|
"loss": 0.8649, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_bleu": 8.1488, |
|
"eval_gen_len": 18.9785, |
|
"eval_loss": 0.8710347414016724, |
|
"eval_runtime": 973.1045, |
|
"eval_samples_per_second": 2.051, |
|
"eval_steps_per_second": 1.026, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.0086238173478146e-08, |
|
"loss": 0.8551, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 9.373350819749382e-09, |
|
"loss": 0.8313, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.686541085339006e-09, |
|
"loss": 0.8401, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.025816161353895e-09, |
|
"loss": 0.8806, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.391182965785504e-09, |
|
"loss": 0.8271, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.782648143436321e-09, |
|
"loss": 0.8348, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.200218065851304e-09, |
|
"loss": 0.8564, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.6438988312504385e-09, |
|
"loss": 0.8111, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.113696264466006e-09, |
|
"loss": 0.856, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.6096159168798616e-09, |
|
"loss": 0.814, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.1316630663670864e-09, |
|
"loss": 0.887, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.6798427172390904e-09, |
|
"loss": 0.8207, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.254159600192819e-09, |
|
"loss": 0.8367, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.8546181722599597e-09, |
|
"loss": 0.881, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.48122261676087e-09, |
|
"loss": 0.8333, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.1339768432609988e-09, |
|
"loss": 0.8364, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.8128844875289764e-09, |
|
"loss": 0.8306, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.517948911499978e-09, |
|
"loss": 0.8722, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.2491732032385295e-09, |
|
"loss": 0.8287, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.0065601769088106e-09, |
|
"loss": 0.8401, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.901123727427351e-10, |
|
"loss": 0.8713, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.998320570149708e-10, |
|
"loss": 0.8514, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.357212220182372e-10, |
|
"loss": 0.8462, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.9778158604276684e-10, |
|
"loss": 0.8381, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.860145933585411e-10, |
|
"loss": 0.8647, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.0042141420030238e-10, |
|
"loss": 0.8551, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.1002944754509055e-11, |
|
"loss": 0.8085, |
|
"step": 107700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.759807150731214e-12, |
|
"loss": 0.8357, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 107877, |
|
"total_flos": 3.211633985499169e+18, |
|
"train_loss": 0.3135226284782316, |
|
"train_runtime": 125809.459, |
|
"train_samples_per_second": 3.43, |
|
"train_steps_per_second": 0.857 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 107877, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 2000, |
|
"total_flos": 3.211633985499169e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|